I'm trying to make an implementation of AES cipher in brook+ language. However, i've come up with strange behavior of this code (most part of algorith is now commented-out and not included, here presents only AddRoundKey routine):
//data - source stream; reverse - encrypt or decrypt; res - resulting stream; AES* - tables, used in other algorithm's routines
kernel void brCrypt_kernel(uint4 data<>, uchar key[], int reverse, out uint4 res<>, uchar AEST[], uchar AESU[], uchar AESAlogtable[], uchar AESLogtable[], uchar AESRcon[])
{
unsigned int i,j,k,n;
uchar t;
uchar b1,b2,b3,b4;
uchar4 rdd0 ,rdd1,rdd2,rdd3,keyt;
rdd0 = uchar4((data.x>>24) % 256, (data.x>>16) % 256, (data.x>>8) % 256, data.x % 256);
rdd1 = uchar4((data.y>>24) % 256, (data.y>>16) % 256, (data.y>>8) % 256, data.y % 256);
rdd2 = uchar4((data.z>>24) % 256, (data.z>>16) % 256, (data.z>>8) % 256, data.z % 256);
rdd3 = uchar4((data.w>>24) % 256, (data.w>>16) % 256, (data.w>>8) % 256, data.w % 256);
keyt = uchar4(0,0,0,0);
if(!reverse)
{
keyt = uchar4(key[0],key[1],key[2],key[3]);
rdd0 = rdd0 ^ keyt;
keyt = uchar4(key[4],key[5],key[6],key[7]);
rdd1 = rdd1 ^ keyt;
keyt = uchar4(key[8],key[9],key[10],key[11]);
rdd2 = rdd2 ^ keyt;
keyt = uchar4(key[12],key[13],key[14],key[15]);
rdd3 = rdd3 ^ keyt;
for(n=1;n<=(uint)10;n++)
{
keyt = uchar4(key[16*n+0],key[16*n+1],key[16*n+2],key[16*n+3]);
rdd0 = rdd0 ^ keyt;
keyt = uchar4(key[16*n+4],key[16*n+5],key[16*n+6],key[16*n+7]);
rdd1 = rdd1 ^ keyt;
keyt = uchar4(key[16*n+8],key[16*n+9],key[16*n+10],key[16*n+11]);
rdd2 = rdd2 ^ keyt;
keyt = uchar4(key[16*n+12],key[16*n+13],key[16*n+14],key[16*n+15]);
rdd3 = rdd3 ^ keyt;
}
}
else
{
for(n=(uint)10;n>=(uint)1;n--)
{
keyt = uchar4(key[16*n+0],key[16*n+1],key[16*n+2],key[16*n+3]);
rdd0 = rdd0 ^ keyt;
keyt = uchar4(key[16*n+4],key[16*n+5],key[16*n+6],key[16*n+7]);
rdd1 = rdd1 ^ keyt;
keyt = uchar4(key[16*n+8],key[16*n+9],key[16*n+10],key[16*n+11]);
rdd2 = rdd2 ^ keyt;
keyt = uchar4(key[16*n+12],key[16*n+13],key[16*n+14],key[16*n+15]);
rdd3 = rdd3 ^ keyt;
}
keyt = uchar4(key[0],key[1],key[2],key[3]);
rdd0 = rdd0 ^ keyt;
keyt = uchar4(key[4],key[5],key[6],key[7]);
rdd1 = rdd1 ^ keyt;
keyt = uchar4(key[8],key[9],key[10],key[11]);
rdd2 = rdd2 ^ keyt;
keyt = uchar4(key[12],key[13],key[14],key[15]);
rdd3 = rdd3 ^ keyt;
}
res.x = rdd0.w + (uint)256*(rdd0.z + (uint)256*(rdd0.y + (uint)256*rdd0.x));
res.y = rdd1.w + (uint)256*(rdd1.z + (uint)256*(rdd1.y + (uint)256*rdd1.x));
res.z = rdd2.w + (uint)256*(rdd2.z + (uint)256*(rdd2.y + (uint)256*rdd2.x));
res.w = rdd3.w + (uint)256*(rdd3.z + (uint)256*(rdd3.y + (uint)256*rdd3.x));
}
I call a function firts to encrypt data, and then to decrypt it. With CAL backend there are some mismatches between source and decrypted string, which depends from placement in string and number of for-cycle iterations (if it's set to zero, everything is ok, with >0 iterations mismatched bytes randomly change their places). However, when running same binary with CPU backend it produces the same encrypted string, but decodes it correctly. Please, help me to understand what's the error, I'm sick and tired trying to figure it out. Thank in advance.
Sorry for my bad english
Is it possible for you to post your runtime code?
Function which calls kernel:
int crypt(unsigned char *ores, unsigned char *odata, const unsigned int odatasize, const unsigned char *okey, bool reverse)
{
uint datasize = (int)ceil(odatasize / 16.0);
uint ii = 256;
unsigned char key[176];
memcpy(key,okey,16);
brAESExpandKey(key, AEST, AESRcon); // CPU-only function
Stream<uint4> data(1,&datasize);
Stream<uint4> res(1,&datasize);
Stream<uchar> sAEST(1,&ii), sAESU(1,&ii), sAESAlogtable(1,&ii), sAESLogtable(1,&ii), sAESRcon(1,&ii);
ii = 176;
Stream<uchar> gkey(1,&ii);
sAEST.read(AEST);
sAESU.read(AESU);
sAESAlogtable.read(AESAlogtable);
sAESLogtable.read(AESLogtable);
sAESRcon.read(AESRcon);
data.read(odata);
gkey.read(key);
brCrypt_kernel(data,gkey,(reverse?1:0),res,sAEST,sAESU,sAESAlogtable, sAESLogtable, sAESRcon);
res.write (ores);
return datasize * 16;
}
And here is main():
int main()
{
unsigned char *a = (unsigned char *)"cbddefghijklmnoZabcdefghijklmno\0";
unsigned char *key = (unsigned char *)"JakaOkfejaofjoajsf";
unsigned char b[32], c[32];
crypt(b,a,32,key,false);
crypt(c,b,32,key,true);
cout << a << endl << b <<endl << c<<endl;
return 0;
}
In CPU mode it prints:
cbddefghijklmnoZabcdefghijklmno
?????R????Z???8(?????R????Z???8
cbddefghijklmnoZabcdefghijklmno
And in CAL mode:
cbddefghijklmnoZabcdefghijklmno
?????R????Z???8(?????R????Z???8
caddeeghijklmnoZaacdeeghijklmno
Do you have any ideas/recomendations? I've tried to run my code on two different computers (though OS and driver versions were same (win xp x64 sp2, catalyst 9.4)), and one computer with different OSes (xp x64 and kubuntu 8.04) with same effect - encryption results on both CPU and CAL backend are identical (and right), while decryption on CAL backend returns wrong result.