4 Replies Latest reply on Nov 26, 2009 3:41 PM by riza.guntur

    Wrong scatter tutorial in samples

    riza.guntur

      I see

      // Output stream for scatter is always 1D and float4
          unsigned int streamSizeScatter[] = {height, width};
          brook::Stream<float> outputStream(2, streamSizeScatter);

      Isn't that 2D? Why the comment said has to be 1D and float4?

        • Wrong scatter tutorial in samples
          gaurav.garg

          Looks like this comment is wrong. These limitations were there before Brook+1.3 release.

            • Wrong scatter tutorial in samples
              riza.guntur

              Btw I found the samples to produce incorrect results when width != height

              I try to correct but no luck

              Here is my code:

              int
              main(int argc, char* argv[])
              {
                  // Specifying the width and height of the 2D buffer
                  const unsigned int width = 10;
                  const unsigned int height = 5;

                  //--------------------------------------------------------------------------
                  // Creating and initializing the input buffer
                  //--------------------------------------------------------------------------

                  // Creating an input buffer
                  float* inputBuffer = new float[width * height];

                  // Initializing the input buffer such that
                  // input(i,j) = i*width + j
                  fillBuffer(inputBuffer, width, height);

                  // Printing input buffer
                  fprintf(stdout, "Input buffer:\n");
                  printBuffer(inputBuffer, width, 0, 0, width, height);

                  //--------------------------------------------------------------------------
                  // Creating the input stream and copying data from input buffer
                  //--------------------------------------------------------------------------

                  // Specifying the size of the 2D stream
                  unsigned int streamSize[] = {width, height};

                  // Specifying the rank of the stream
                  unsigned int rank = 2;

                  // Create a 2D stream of specified size i.e. 64x64 floating-point values   
                  brook::Stream<float> inputStream(rank, streamSize);

                  // Copying data from input buffer to input stream
                  inputStream.read(inputBuffer);

                  //--------------------------------------------------------------------------
                  // Creating the output stream
                  //--------------------------------------------------------------------------
                 
                  // Output stream for scatter is always 1D and float4
                  unsigned int streamSizeScatter[] = {height, width};
                  brook::Stream<float> outputStream(2, streamSizeScatter);

                  //--------------------------------------------------------------------------
                  // Executing kernel and copying back data
                  //--------------------------------------------------------------------------   

                  // Calling the kernel on the input and output streams
                  scatterTransposeGPU.domainOffset(uint4(0,0,0,0));
                  scatterTransposeGPU.domainSize(uint4(height,width,1,1));
                  scatterTransposeGPU(inputStream, outputStream);

                  // Creating an output buffer
                  float* outputBuffer = new float[width * height];
                  float* cpuOutputBuffer = new float[width * height];
                  memset(cpuOutputBuffer, 0, width * height * sizeof(float));

                  // Copying data from output stream to output buffer
                  outputStream.write(outputBuffer);

                  // Check error on stream
                  if(outputStream.error())
                  {
                      // Print error Log associated to stream
                      fprintf(stdout, "%s\n", outputStream.errorLog());
                  }

                  fprintf(stdout, "Transpose:GPU Result\n");
                  printBuffer(outputBuffer, height, 0, 0, height, width);
                  printBuffer(outputBuffer, height, 0, 0, height, width);

                  // creating CPU Result
                  for(unsigned int i = 0; i < height; i++)
                  {
                      for(unsigned int j = 0; j < width; j++)
                      {
                          cpuOutputBuffer[j*height + i] = inputBuffer[i*width + j];
                      }
                  }

                  // Printing CPU Result
                  fprintf(stdout, "Transpose:CPU Result\n");
                  printBuffer(cpuOutputBuffer, height, 0, 0, height, width);

                  //--------------------------------------------------------------------------
                  // Checking whether the result is correct or not
                  //--------------------------------------------------------------------------

                  if(!verify(cpuOutputBuffer, outputBuffer, height, width))
                  {  
                      fprintf(stdout, "Failed.\n");
                  }
                  else
                  {       
                      fprintf(stdout, "Passed.\n");
                  }

                  //--------------------------------------------------------------------------
                  // Cleaning up
                  //--------------------------------------------------------------------------
                 
                  delete[] inputBuffer;
                  delete[] outputBuffer;
                  delete[] cpuOutputBuffer;
                  return 0;
              }

              The output I get is:

              C:\Program Files\Brook+_1.4.1_beta\samples\bin\CPP\xp_x86_32>scatter_stream_kernel.exe
              Input buffer:
                0       1       2       3       4       5       6       7       8       9
               10      11      12      13      14      15      16      17      18      19
               20      21      22      23      24      25      26      27      28      29
               30      31      32      33      34      35      36      37      38      39
               40      41      42      43      44      45      46      47      48      49

              Transpose:GPU Result
                0      10      20      30      40
                1      11      21      31      41
                2      12      22      32      42
                3      13      23      33      43
                4      14      24      34      44
                4      24      44      64      84
                5      25      45      65      85
                6      26      46      66      86
                7      27      47      67      87
                8      28      48      68      88

                0      10      20      30      40
                1      11      21      31      41
                2      12      22      32      42
                3      13      23      33      43
                4      14      24      34      44
                4      24      44      64      84
                5      25      45      65      85
                6      26      46      66      86
                7      27      47      67      87
                8      28      48      68      88

              Transpose:CPU Result
                0      10      20      30      40
                1      11      21      31      41
                2      12      22      32      42
                3      13      23      33      43
                4      14      24      34      44
                5      15      25      35      45
                6      16      26      36      46
                7      17      27      37      47
                8      18      28      38      48
                9      19      29      39      49

              Failed.

              What wrong here?

                • Wrong scatter tutorial in samples
                  gaurav.garg

                  I am able to reproduce this issue. I am sure this is a recent issue and it used to work with Catalyst 9.2. I will file the bug in SF.

                    • Wrong scatter tutorial in samples
                      riza.guntur

                      It used to work with Cat 9.2?

                      I revert back to 9.2 the result still the same

                      Input buffer:
                        0       1       2       3       4       5       6       7       8       9      10      11      12      13      14      15
                       16      17      18      19      20      21      22      23      24      25      26      27      28      29      30      31
                       32      33      34      35      36      37      38      39      40      41      42      43      44      45      46      47
                       48      49      50      51      52      53      54      55      56      57      58      59      60      61      62      63
                       64      65      66      67      68      69      70      71      72      73      74      75      76      77      78      79
                       80      81      82      83      84      85      86      87      88      89      90      91      92      93      94      95
                       96      97      98      99     100     101     102     103     104     105     106     107     108     109     110     111
                      112     113     114     115     116     117     118     119     120     121     122     123     124     125     126     127

                      Transpose:GPU Result
                        0      16      32      48      64      80      96     112
                        1      17      33      49      65      81      97     113
                        2      18      34      50      66      82      98     114
                        3      19      35      51      67      83      99     115
                        4      20      36      52      68      84     100     116
                        5      21      37      53      69      85     101     117
                        6      22      38      54      70      86     102     118
                        7      23      39      55      71      87     103     119
                        0      -2       0       0       0       0       0       0
                        0       0       0       0       0       0       0       0
                        0      64       4      68     128     192     132     196
                      2048    2112    2052    2116    2176    2240    2180    2244
                      2056    2120    2060    2124    2184    2248    2188    2252
                        8      72      12      76     136     200     140     204
                      1040    1104    1044    1108    1168    1232    1172    1236
                      3088    3152    3092    3156    3216    3280    3220    3284

                      Transpose:CPU Result
                        0      16      32      48      64      80      96     112
                        1      17      33      49      65      81      97     113
                        2      18      34      50      66      82      98     114
                        3      19      35      51      67      83      99     115
                        4      20      36      52      68      84     100     116
                        5      21      37      53      69      85     101     117
                        6      22      38      54      70      86     102     118
                        7      23      39      55      71      87     103     119
                        8      24      40      56      72      88     104     120
                        9      25      41      57      73      89     105     121
                       10      26      42      58      74      90     106     122
                       11      27      43      59      75      91     107     123
                       12      28      44      60      76      92     108     124
                       13      29      45      61      77      93     109     125
                       14      30      46      62      78      94     110     126
                       15      31      47      63      79      95     111     127