Hi, I did some tests on an HD7970 card recently, simply running the md5 hash function in openCL on it. But I found that the result is wrong (the same code runs fine on an NVIDIA card).
It became correct if I used the cl-disable-opt option. Does anyone have similar experience? Is it a bug in the compiler? I have been using openCL 1.2.
A simplified version of the kernel code is like this:
__kernel void crack(__global unsigned char * correctPass, __global unsigned char *cudaCharSet, int wordLength, int charSetLen, unsigned int v1, unsigned int v2, unsigned int v3, unsigned int v4, __constant unsigned char *cudaBrute, __global unsigned int* result)
{
const unsigned int a0 = 0x67452301;
const unsigned int b0 = 0xEFCDAB89;
const unsigned int c0 = 0x98BADCFE;
const unsigned int d0 = 0x10325476;
int length = 3;
unsigned int a = 0;
unsigned int b = 0;
unsigned int c = 0;
unsigned int d = 0;
vals[0] |= ((unsigned int)v1) << ((0 % 4) * 8); //(97+((int)(get_group_id(0))%26)) << ((0 % 4) * 8);
vals[0] |= (97+1%26) << ((1 % 4) * 8);
vals[0] |= (97+2) << ((2 % 4) * 8);
vals[0] |= 0x80 << ((3 % 4) * 8);
//Initialize hash value for this chunk:
a = a0;
b = b0;
c = c0;
d = d0;
#define S11 7
#define S12 12
#define S13 17
#define S14 22
FF ( a, b, c, d, vals[0], S11, 3614090360); /* 1 */
FF ( d, a, b, c, 0, S12, 3905402710); /* 2 */
FF ( c, d, a, b, 0, S13, 606105819); /* 3 */
unsigned int inter = ~c;
//FF ( b, c, d, a, 0, S14, 3250441966); /* 4 */
result[2]=a&~c;
unsigned int test2 = ~c&a&0x80000000;
result[3] = (d&c) | result[2];
}
//////////////////////////////
The value of result[3] is wrong. If I used a constant value instead of "v1" in the first assignment of vals[0], the result is correct.