rexiaoyu

Problem with simple matrix addition

Discussion created by rexiaoyu on Aug 5, 2009
Latest reply on Aug 14, 2009 by rexiaoyu

I declare two 16x16 matries represented in 1D and do the addition. In the kernel, I make a thead process N elements(N = 1, 2,4,.....) , when N is less than 16, it works fine, but when it reaches 16, some kind of runtime error happens. I can not figure it out.

The code is as below(main.cpp and locate.br):

main.cpp:

 

#include <stdio.h>

#include <stdlib.h>

#include "brookgenfiles/locate.h"

 

using namespace brook;

#define SIZE 16

#define SIZE2 256

 

void printMatrix(int len, float m[])

{

int i, j;

for (i = 0; i < len; i++)

{

for (j = 0; j < len; j++)

{

printf("%f, ", m[i * len + j]);

}

printf("\n");

}

 

}

int main()

{

//array a, b and c

float a[SIZE2];

float b[SIZE2];

float c[SIZE2];

 

int i;

for (i = 0; i< SIZE2; i++)

{

a = 1.0;

b = 2.0;

}

 

unsigned int msize = SIZE2;

Stream<float> sa(1, &msize);

Stream<float> sb(1, &msize);

Stream<float> sc(1, &msize);

 

sa.read(a);

sb.read(b);

uint4 domainSize = uint4(SIZE2, 1, 1, 1);

blockAdd.domainSize(domainSize);

blockAdd(sa, sb, sc);

 

sc.write(c);

if (sc.error())

{

printf("Error occured! %s\n", sc.errorLog());

return 1;

}

printMatrix(SIZE, c);

getchar();

return 0;

}



locate.br:

 

Attribute[GroupSize(64, 1, 1)]

kernel void

blockAdd(float a[], float b[], out float c[])

{

int tid = instance().x;

//every thread process len elements, len =  1, 2, 4, 6, 8, 16, ....

//when len = 16, come the error

int len= 16;

 

int start = tid * len;

int i;

int index;

for (i = 0; i < len; i++)

{

c[start + i] = a[start + i] + b[start + i];

}

}



Outcomes