14 Replies Latest reply on Jan 29, 2010 12:21 PM by tali

    2D Reduce Kernel does not work !!!!!

    tali
      Strange error in 2D reduce kernel !!!!!

      Hi 

      I am implementing a sparse vector multiply routine to do  a simple y=Ax.

      Due to size limitations of 8192 in 1D streams, i have to extend the code to 2D streams. 

      This is where i get the problem.

      I get my final y-vector by reducing the data in a tempStream<>.

       

      The problem is that the result in my reduce data is sometimes corrupted for certain sizes of matrix.

      However I have tried to manually reduce the stream on the CPU in a simple for-loop and then the result if OK. 

      I cannot belive that the Brook+ 2D kernel has this strange problem. 

      I have attached the code, any help from any one will be of great help as i am stucked with it for days.............

       

       

      //--------------------------------------------------------------------- The KERNELS //--------------------------------------------------------------------- kernel void spmv_gather_2D(int C[][], float x[][], int Max_Stream_Width, out float result<>) { int2 i = instance().xy; // result<> will be a 2D stream of size > Max_Stream_Width // C is also a 2D stream int k = C[i.y][i.x]; // index into C // this will return some index greater than Max_Stream_Width // so int2 k2d; // to index into the 2d X-stream k2d.x = k % Max_Stream_Width; k2d.y = k / Max_Stream_Width; // finally, the gather result result = x[k2d.y][k2d.x]; } kernel void spmv_mul_2D(float a<>, float x<>, out float c<>) { c = a * x; } reduce void spmv_reduce_2D(float temp<>, reduce float y<>) { y += temp; } //--------------------------------------------------------------------- main.cpp //--------------------------------------------------------------------- #include <stdio.h> #include <stdlib.h> #include <time.h> #include <math.h> #include "spmv.h" //include the brook generated header //#include "brookgenfiles/spmv.h" //include the brook generated header //------------------------------------------------------------------------------------------- // prototypes //------------------------------------------------------------------------------------------- void printSpM(int N, int *R, int *C, float *A); void sprand(int nrows,int ncols,int nnz, int R[],int C[],float A[], int*); void SpMV(int nrows, int ncols, int *R, int *C, float *A, float *x, float *y); void saveToFile(int nrows, int*R, int*C, float*A, float*y); void prepareTestData(int nnz, int *R, int * C, float* A); void printMatrix(float* A, int* C, int*R, int N, int M); void outputGPUData(float* A, int* C, int*R, float *x, float* newA, int* newC, float *newX, int N, int nMaxWidth ); void prepareGPUData(float *A, int *C, int* R, float *newA, int *newC, int nnz, int nrows, int iMaxWidth ); // stream creation int* create2DFloatStream(::brook::Stream<float>** stream, int MAX_STREAM_SIZE, float** data, int dataLen); int* create2DFloatStream(::brook::Stream<float>** stream, int MAX_STREAM_SIZE, int streamLen); int* create2DIntStream(::brook::Stream<int>** stream, int MAX_STREAM_SIZE, int** data, int dataLen); int* create2DIntStream(::brook::Stream<int>** stream, int MAX_STREAM_SIZE, int streamLen); //-------------------------------------------------------------------------------------------- // Main //-------------------------------------------------------------------------------------------- int main(){ //real data // int ncols = 10000; // int nrows = 10000; // int nnz = (nrows * ncols)*((25.0f)/100.0f) ; //25% sparsity 25 000 000 // int MAX_STREAM_SIZE = 8192; // set it to 8192 = 8*1024 = 8 Kb // Error Data 1 int ncols = 100; int nrows = 100; int nnz = 7500; //75% sparsity int MAX_STREAM_SIZE = ncols/2; // set it to 8192 = 8*1024 = 8 Kb // Test Data 1 // int ncols = 5; // int nrows = 5; // int nnz = 11; //25% sparsity // int MAX_STREAM_SIZE = 4; // set it to 8192 = 8*1024 = 8 Kb //Sparse Matrix Vectors- Single Precision int N, *R, *C; float *A, *x, *y; //timers clock_t start, finish; //allocate memory N = nrows*ncols; R = (int*) malloc((nrows+1) * sizeof (int)); //Equal to no. of rows +1 C = (int*) malloc(nnz * sizeof (int)); //Equal to no. of non-zero elements A = (float*) malloc(nnz * sizeof (float)); //Equal to no. of non-zero elements x = (float*) malloc(ncols * sizeof (float)); //multiplier vector x y = (float*) malloc(nrows * sizeof (float)); //resultant vector //initialise vector x to all ones for(int i=0; i<ncols; i++){ x[i] = (float)(i+1); } //---------------------------------------------------------------------------------------- // Create a random sparse matrix //---------------------------------------------------------------------------------------- // int MAX_ROW_WIDTH = 3; //max no. of non-zeros in a single row sprand(nrows,ncols, nnz, R, C, A, &MAX_ROW_WIDTH); // prepareTestData(nnz, R, C, A); // printMatrix(A, C, R, nrows, ncols); //---------------------------------------------------------------------------------------- // CPU EXECUTION //---------------------------------------------------------------------------------------- //measure time int trials = 100; start =clock(); for(int i=0; i< trials; i++){ //do: y = Ax SpMV(nrows, ncols, R, C, A, x, y); } finish =clock(); double timeA= (double)(finish-start)*1000.0 / CLOCKS_PER_SEC; trials = 200; start =clock(); for(int i=0; i< trials; i++){ //do: y = Ax SpMV(nrows, ncols, R, C, A, x, y); } finish =clock(); double timeB= (double)(finish-start)*1000.0 / CLOCKS_PER_SEC; double runtime = (timeB-timeA)/100; printf("CPU TIME: %.2f milliseconds \n",runtime); //----------------------------------------------------------------------------------------- // Change Max. Stream Width allowed from 8192 to some multiple // of MAX_ROW_WIDTH of the matrix //----------------------------------------------------------------------------------------- int multiple = MAX_STREAM_SIZE / MAX_ROW_WIDTH; if(multiple == 0) multiple =1; // never have it zero MAX_STREAM_SIZE = multiple * MAX_ROW_WIDTH; printf("\nMAX_ROW_WIDTH : %d \n",MAX_ROW_WIDTH); printf("max. 1D stream size: %d \n", MAX_STREAM_SIZE); //----------------------------------------------------------------------------------- // Prepare the Padded GPU Data //----------------------------------------------------------------------------------- // prepare the new Compressed Row data for the streams unsigned int iStreamLen = (MAX_ROW_WIDTH * nrows); unsigned int iXLen = (ncols); unsigned int iResultLen = (nrows); // array data to be copied over to the streams float *newA = (float*) malloc( iStreamLen * sizeof (float)); int *newC = (int*) malloc( iStreamLen * sizeof (int)); // prepare GPU data by padding in zeros for both A, C arrays. prepareGPUData(A, C, R, newA, newC, nnz, nrows, MAX_ROW_WIDTH ); // create a copy of the original x[] vector - // This vector will be passed down to the 2D stream X // and copied data will be lost and replaced. // We need to save x[] for testign later. //float *X_old = (float*) malloc( ncols * sizeof (float)); //for(int i=0; i<ncols; i++){ // X_old[i] = x[i]; //} //----------------------------------------------------------------------------------- // 2D Stream Declarartions //----------------------------------------------------------------------------------- // Stream A //int* dim; //::brook::Stream<float> *AStream; //dim = create2DFloatStream(&AStream, MAX_STREAM_SIZE, &newA /*data*/, iStreamLen/*data length*/); //printf("\nstream A< %d, %d > \n",dim[0], dim[1]); // Stream C //::brook::Stream<int> *CStream; //dim = create2DIntStream(&CStream, MAX_STREAM_SIZE, &newC /*data*/, iStreamLen/*data length*/); //printf("stream C< %d, %d > \n",dim[0], dim[1]); // Stream Temp //::brook::Stream<float> *tempStream; //dim = create2DFloatStream(&tempStream, MAX_STREAM_SIZE, iStreamLen/*stream length*/); //printf("stream Temp< %d, %d > \n",dim[0], dim[1]); //int temp_W = dim[0]; //int temp_H = dim[1]; // Stream X // ::brook::Stream<float> *xStream; // dim = create2DFloatStream(&xStream, MAX_STREAM_SIZE, iStreamLen/*stream length*/); // printf("stream X< %d, %d > \n",dim[0], dim[1]); // float *newX = (float*) malloc( dim[0]*dim[1] * sizeof (float)); // int xLen = dim[0]*dim[1]; //---------------------------------- //Streams A, C , X , Temp //---------------------------------- unsigned int height = iStreamLen /MAX_STREAM_SIZE; if(iStreamLen%MAX_STREAM_SIZE > 0) height++; unsigned int width = MAX_STREAM_SIZE; // 2D-stream declaraion unsigned int dim[2] = {width, height}; ::brook::Stream<float> AStream(2/*rank*/, dim /*dimension*/); ::brook::Stream<int> CStream(2/*rank*/, dim /*dimension*/); ::brook::Stream<float> tempStream(2/*rank*/, dim /*dimension*/); ::brook::Stream<float> xStream(2/*rank*/, dim /*dimension*/); //save dimensions of temp stream unsigned int temp_W = width; unsigned int temp_H = height; // print data printf("\n Stream A<%d, %d> ", dim[0], dim[1]); printf("\n Stream C<%d, %d> ", dim[0], dim[1]); printf("\n Stream Temp<%d, %d> ", dim[0], dim[1]); printf("\n Stream X<%d, %d> ", dim[0], dim[1]); float* A_data = (float*) malloc(width* height* sizeof (float) ); int* C_data = (int*) malloc(width* height* sizeof (int) ); for(int i=0; i<(width* height) ; i++) { if(i< iStreamLen){ A_data[i] = newA[i]; C_data[i] = newC[i]; } else{ A_data[i] = 0; C_data[i] = 0; } } // free the old array data free(newA); free(newC); //---------------------------------- //Streams xOLD - old data //---------------------------------- height = ncols /MAX_STREAM_SIZE; if(ncols%MAX_STREAM_SIZE > 0) height++; width = MAX_STREAM_SIZE; // 2D-stream declaraion unsigned int dimX[2] = {width, height}; ::brook::Stream<float> xOldStream(2/*rank*/, dimX /*dimension*/); printf("\n Stream xOLD<%d, %d> ", dimX[0], dimX[1]); float* x_data = (float*) malloc(width* height* sizeof (float) ); for(int i=0; i<(width* height) ; i++) { if(i< ncols){ x_data[i] = x[i]; } else{ x_data[i] = 0; } } // Stream XOLD - contains the original x-vector as in the CPU code // ::brook::Stream<float> *xOldStream; // dim = create2DFloatStream(&xOldStream, MAX_STREAM_SIZE, &X_old /*data*/, ncols/*data length*/); // printf("stream X_old< %d, %d > \n",dim[0], dim[1]); //---------------------------------- //Stream Y //---------------------------------- int Y_w = temp_W / MAX_ROW_WIDTH; // calc. width of the reduce stream. int Y_h = temp_H; // same as the temp stream's which will be reduced unsigned int dimY[2]={Y_w, Y_h}; ::brook::Stream<float> yStream(2, dimY); //_Y = new ::brook::Stream<float>(2, dimY); printf("\n Stream Y< %d, %d > \n",dimY[0], dimY[1]); // Initialise the Y-Stream to all zeros float *newY = (float*) malloc( Y_w * Y_h * sizeof(float) ); for(int i =0; i<(Y_w*Y_h) ; i++ ){ newY[i] =0; } //---------------------------------------------------------- // GPU EXECUTION //---------------------------------------------------------- // read data into GPU streams AStream.read(A_data); CStream.read(C_data); xOldStream.read(x_data); yStream.read(newY); printf("\nCalling the kernels....",dimY[0], dimY[1]); trials = 100; start =clock(); for(int i=0; i< trials; i++){ // KERNEL: Gather //spmv_gather_2D(*(CStream), *(xOldStream), MAX_STREAM_SIZE ,*(xStream) ); spmv_gather_2D(CStream, xOldStream, MAX_STREAM_SIZE ,xStream ); // KERNEL: Multiplication //spmv_mul_2D( *(AStream), *(xStream), *(tempStream) ); spmv_mul_2D( AStream, xStream, tempStream ); // KERNEL: REDUCE //spmv_reduce_2D( *(tempStream), yStream ); spmv_reduce_2D( tempStream, yStream ); // download data yStream.write(newY); } finish =clock(); timeA= (double)(finish-start)*1000.0 / CLOCKS_PER_SEC; trials = 200; start =clock(); for(int i=0; i< trials; i++){ // KERNEL: Gather //spmv_gather_2D(*(CStream), *(xOldStream), MAX_STREAM_SIZE ,*(xStream) ); spmv_gather_2D(CStream, xOldStream, MAX_STREAM_SIZE ,xStream ); // KERNEL: Multiplication //spmv_mul_2D( *(AStream), *(xStream), *(tempStream) ); spmv_mul_2D( AStream, xStream, tempStream ); // KERNEL: REDUCE //spmv_reduce_2D( *(tempStream), yStream ); spmv_reduce_2D( tempStream, yStream ); // download data yStream.write(newY); } finish =clock(); timeB= (double)(finish-start)*1000.0 / CLOCKS_PER_SEC; runtime = (timeB-timeA)/100; printf("\nGPU run-time: %.2f milliseconds \n",runtime); //---------------------------------------------------------- // Error Checking //---------------------------------------------------------- // ERROR in Reduce Kernel float error = 0; for(int i=0; i<(nrows); i++){ float e = (y[i] - newY[i]); if(e<0) // make it positive e = e* (-1); error += (e); } printf("\nTotal error in Reduce-kernel = %.f \n", error ); // Compare data in stream A with original A // for(int i=0; i< nrows; i++){ // for(int j=R[i], offset=0; j<R[i+1]; j++, offset++){ // int indNewA = i* MAX_ROW_WIDTH + offset; // printf("\n A[%d, %d]=%.f , newA[%d, %d]=%.f \n", i, C[j], A[j], i, offset, newA[indNewA]); // } // } // Do Manual Reduce float *temp = (float*) malloc( temp_W * temp_H * sizeof (float)); tempStream.write(temp); // Manual Reduce for(int i=0; i< nrows; i++){ newY[i] = 0; for(int k=0; k<MAX_ROW_WIDTH; k++){ int indNewA = i* MAX_ROW_WIDTH + k; newY[i] += temp[indNewA]; } } // Error : Manual Reduce error = 0; for(int i=0; i<(nrows); i++){ float e = (y[i] - newY[i]); if(e<0) // make it positive e = e* (-1); error += (e); } printf("\nError in Manual Reduce= %.f \n", error ); /* // Error in Gather Kernel xStream.write(newX); float errGather =0; //error in gather step for(int i=0; i<xLen; i++){ //printf("X[ %d ]=%.2f \n",i,newX[i]-x[newC[i]]); errGather += (newX[i]-x[newC[i]]); } printf("\nError in gather step=%.2f \n",errGather); // Error in Multiplication Kernel error = 0; for(int i=0; i< nrows; i++){ for(int j=R[i], offset=0; j<R[i+1]; j++, offset++){ int indNewA = i* MAX_ROW_WIDTH + offset; //printf("\n A[%d, %d]=%.f , newA[%d, %d]=%.f \n", i, C[j], A[j], i, offset, newA[indNewA]); //printf("\n temp[%d, %d]=%.f , newA*newX[%d, %d]=%.f \n", i, C[j], A[j]*x[C[j]], i, offset, newA[indNewA]*newX[indNewA]); //printf("\n temp[%d, %d]=%.f , temp_stream[%d, %d]=%.f \n", i, C[j], A[j]*x[C[j]], i, offset, temp[indNewA]); //printf("\n temp_error[%d, %d]=%.f \n", i, C[j], ( (A[j]*x[C[j]]) - temp[indNewA]) ); error += ( (A[j]*x[C[j]]) - temp[indNewA] ); } } free(temp); printf("\nTotal error in the Mul-kernel = %.f \n", error ); */ if(yStream.error()) printf("Y.errorLog()= %s \n", yStream.errorLog() ); //---------------------------------------------------------- // deallocate memory //---------------------------------------------------------- free(A_data); free(C_data); free(newY); free(y); free(x); free(R); free(C); free(A); return 0; } //////////////////////////////////////////////////////////////////////////////// //! //! \brief Create a 2D float-stream if the required stream-size exceeds 8192 //! but still less than 8192*8192 = 67108864 //! But we will pass the max size to vary between 0 and 8192. //////////////////////////////////////////////////////////////////////////////// int* create2DFloatStream(::brook::Stream<float>** stream, // stream handle to return int MAX_STREAM_SIZE, // max stream size allowed int streamLen) // minimum length of desired stream { // calculate the width, height of the new stream. unsigned int height = streamLen/MAX_STREAM_SIZE; if(streamLen%MAX_STREAM_SIZE > 0) height ++; unsigned int width = MAX_STREAM_SIZE; // unsigned int width = ceil(sqrt(streamLen)); // unsigned int height = width; unsigned int dim[2] = {width, height}; // create the new brook stream now (*stream) = new ::brook::Stream<float>(2/*rank*/, dim/*dimension*/ ); // return the dimensions of the created stream int *dimension = new int[2]; dimension[0] = dim[0]; dimension[1] = dim[1]; return dimension; } //----------------------------------------------------------------------------------------------- int* create2DIntStream(::brook::Stream<int>** stream, // stream handle to return int MAX_STREAM_SIZE, // max stream size allowed int streamLen) // minimum length of desired stream { // calculate the width, height of the new stream. // unsigned int width = ceil(sqrt(streamLen)); // unsigned int height = width; unsigned int height = streamLen/MAX_STREAM_SIZE; if(streamLen%MAX_STREAM_SIZE > 0) height ++; unsigned int width = MAX_STREAM_SIZE; unsigned int dim[2] = {width, height}; // create the new brook stream now (*stream) = new ::brook::Stream<int>(2/*rank*/, dim/*dimension*/ ); // return the dimensions of the created stream int *dimension = new int[2]; dimension[0] = dim[0]; dimension[1] = dim[1]; return dimension; } //////////////////////////////////////////////////////////////////////////////// //! //! \brief Create a 2D float-stream if the required stream-size exceeds 8192 //! but still less than 8192*8192 = 67108864 //! But we will pass the max size to vary between 0 and 8192. //! The actual array data to be copied might not be a multiple of 8192. //! Hence we have to pad Zeros in the last row of the 2D stream. //! This function allows that automatically. //////////////////////////////////////////////////////////////////////////////// int* create2DFloatStream(::brook::Stream<float>** stream, // stream handle to return int MAX_STREAM_SIZE, // max stream size allowed float** data, // data to copy int dataLen) // data length { // make the width, height of the 2D stream to be equal so that the stream is square. // unsigned int width = ceil(sqrt(dataLen)); // unsigned int height = width; // calculate the width, height of the new stream. unsigned int height = dataLen/MAX_STREAM_SIZE; if(dataLen%MAX_STREAM_SIZE > 0) height ++; unsigned int width = MAX_STREAM_SIZE; unsigned int dim[2] = {width, height}; // create the new stream data float* newData = (float*) malloc( width*height*sizeof(float) ); // copy data into new-data array, pad with zero in the last row float *ptr = *(data); for(int i=0; i<(width*height); i++){ if( i < dataLen ){ newData[i] = ptr[i]; } else newData[i] = 0; // pad Zero } free(ptr); // delete old data *(data) = newData; // set new data as the reference // create the new brook stream now (*stream) = new ::brook::Stream<float>(2/*rank*/, dim/*dimension*/ ); // read the data into the stream (*stream)->read(newData); // return the dimensions of the created stream int *dimension = new int[2]; dimension[0] = dim[0]; dimension[1] = dim[1]; return dimension; } //////////////////////////////////////////////////////////////////////////////// //! //! \brief Create a 2D int-stream if the required stream-size exceeds 8192 //! but still less than 8192*8192 = 67108864 //! But we will pass the max size to vary between 0 and 8192. //! The actual array data to be copied might not be a multiple of 8192. //! Hence we have to pad Zeros in the last row of the 2D stream. //! This function allows that automatically. //////////////////////////////////////////////////////////////////////////////// int* create2DIntStream(::brook::Stream<int>** stream, // stream handle to return int MAX_STREAM_SIZE, // max stream size allowed int** data, // data to copy int dataLen) // data length { // calculate the width, height of the new stream. unsigned int height = dataLen/MAX_STREAM_SIZE; if(dataLen%MAX_STREAM_SIZE > 0) height ++; unsigned int width = MAX_STREAM_SIZE; unsigned int dim[2] = {width, height}; // create the new stream data int* newData = (int*) malloc( width*height*sizeof(int) ); // copy data into new-data array, pad with zero in the last row int * ptr = *(data); for(int i=0; i<(width*height); i++){ if( i < dataLen ){ newData[i] = ptr[i]; } else newData[i] = 0; // pad Zero } free(ptr); // delete old data *(data) = newData; // set new data as the reference // create the new brook stream now (*stream) = new ::brook::Stream<int>(2/*rank*/, dim/*dimension*/ ); // read the data into the stream (*stream)->read(newData); // return the dimensions of the created stream int *dimension = new int[2]; dimension[0] = dim[0]; dimension[1] = dim[1]; return dimension; } //////////////////////////////////////////////////////////////////////////////// //! //! \brief Prepare data to be copied to the GPU streams. //! \param A array containing all non zeros of matrix A, as in CSR format //! \param C array containing the column indexes in compressed row format //! \param R array containing the row indexes in compressed row format //! \param newA new array A, with size adjusted for streams //! \param newC new array C, with size adjusted for streams //! \param nNonZeros total no. of non-zeros in A //! \param N no. of rows in A //! \param nMaxWidth max no. of non-zeros present in any single row of A //////////////////////////////////////////////////////////////////////////////// void prepareGPUData(float* A, int* C, int* R, float* newA, int* newC, int nNonZeros, int N, int nMaxWidth ){ for( int i=0; i< N; i++){ int offset =0; for(int j=R[i]; j<R[i+1]; j++){ newA[nMaxWidth*i+offset] = A[j]; newC[nMaxWidth*i+offset] = C[j]; offset++; } // pad the rest of the area with zeros while(offset < nMaxWidth){ newA[nMaxWidth*i+offset] = 0.0f; newC[nMaxWidth*i+offset] = 0; offset++; } //newR[i]= i*nMaxWidth; } } //////////////////////////////////////////////////////////////////////////////// //! //! \brief Implement sparse y = Ax //! //////////////////////////////////////////////////////////////////////////////// void SpMV(int nrows, int ncols, int *R, int *C, float *A, float *x, float *y){ /* Slower version for(int i = 0; i<nrows; i++){ for(int j = R[i]; j<R[i+1]; j++){ y[i]+= ( A[j] * x[ C[j] ]); } } */ //improved 1 float temp = 0; for(int i = 0; i<nrows; i++){ temp = 0; for(int j = R[i]; j<R[i+1]; j++){ temp += ( A[j] * x[ C[j] ]); } y[i] = temp; } } //print the sparse matrix void printSpM(int N, int *R, int *C, float *A){ for(int i = 0; i<N; i++) for(int j = R[i]; j<R[i+1]; j++){ printf("Element (%d,%d) has the value %f\n",i,C[j],A[j]); } } //Create a random sparse matrix void sprand(int nrows,int ncols,int nnz, int R[],int C[],float A[], int* maxWidth) { int i,j,k,n; double r; n = nrows*ncols; k = 0; int counter = 1; // to count the max no. of non-zeros in a single row. int iMaxWidth = 0; int iRowWidth = 0; for(i = 0; i<nrows; i++) { R[i] = k; iRowWidth = 0; for(j = 0; j<ncols; j++) { r = rand() / (double) RAND_MAX; if(r*(n-(i*ncols+j)) < (nnz-k)) { C[k] = j; A[k] = counter++; /* You can also put a random number as data */ k = k+1; // save the max width of non-zeros in a single row. iRowWidth++; if(iRowWidth > iMaxWidth) iMaxWidth = iRowWidth; } } } R[nrows] = k; *(maxWidth) = iMaxWidth; } void saveToFile(int nrows, int*R, int*C, float*A, float*y){ //Save the sparse matrix to a file to be read by Matlab FILE *file; file = fopen("c:\\matrix.txt","w"); //w = write, a+ = append for(int i=0; i<nrows; i++){ for(int j=R[i]; j<R[i+1]; j++){ //%_f for double precision //%f for single-precision fprintf(file,"%d %d %f\n", i+1, C[j]+1, A[j]);//format:{row# col# value} } } fclose(file); /*done!*/ FILE *filey; //save the vector y to file filey = fopen("c:\\y.txt","w"); //w = write, a+ = append for(int i=0; i<nrows; i++){ fprintf(filey,"%f\n", y[i]);//format:{row# col# value} } fclose(filey); /*done!*/ } void outputGPUData(float* A, int* C, int*R, float *x, float* newA, int* newC, float *newX, int N, int nMaxWidth ){ for( int i=0; i< N; i++){ int offset =0; for(int j=R[i]; j<R[i+1]; j++){ printf("newA[%d]=%f , A[%d]=%f \n",(nMaxWidth*i+offset), newA[nMaxWidth*i+offset],j, A[j] ); printf("newC[%d]=%d , C[%d]=%d \n",(nMaxWidth*i+offset), newC[nMaxWidth*i+offset],j, C[j] ); printf("newX[%d]=%f , x[%d]=%f \n",(nMaxWidth*i+offset), newX[(nMaxWidth*i+offset)],C[j], x[C[j]] ); offset++; } // pad the rest of the area with zeros while(offset < nMaxWidth){ printf("newA[%d]=%f \n",(nMaxWidth*i+offset), newA[nMaxWidth*i+offset] ); printf("newC[%d]=%d \n",(nMaxWidth*i+offset), newC[nMaxWidth*i+offset] ); printf("newX[%d]=%f \n",(nMaxWidth*i+offset), newX[(nMaxWidth*i+offset)] ); offset++; } printf("\n"); } } void printMatrix(float* A, int* C, int*R, int N, int M){ for( int i=0; i< N; i++){ for(int j=0; j<M; j++){ bool flag = true; for(int k = R[i]; k<R[i+1]; k++){ if(j == C[k]){ printf("%.0f ", A[k] ); flag =false; break; } } if(flag) printf("%d ", 0 ); } printf("\n"); } } void prepareTestData(int nnz, int *R, int * C, float* A){ for(int i=0; i<nnz; i++){ A[i] = (i+1); } C[0]=1; C[1]=3; C[2]=0; C[3]=1; C[4]=3; C[5]=1; C[6]=2; C[7]=3; C[8]=2; C[9]=3; C[10]=4; R[0]=0; R[1]=2; R[2]=5; R[3]=8; R[4]=10; R[5]=11; }