Ceq

Bug indexing gather arrays using integer values?

Discussion created by Ceq on Aug 31, 2009
Latest reply on Sep 3, 2009 by riza.guntur

Looks like there is a bug using branching and integer values for indexing gather streams, using float works fine. Also changing the "if then else" structure to two separate "if" statements fix the problem.

Complete test case:

/*
vSrc = {
     0.0  1.0  2.0  3.0  4.0  5.0  6.0  7.0
     8.0  9.0 10.0 11.0 12.0 13.0 14.0 15.0 }

vInd = {
     -10   -9    6  -11    7    5   -3   13
       2    4   13  -12    6    9    9    1 }

- Positive indirections copy the data from vSrc using the indirection.
- Negative indirections copy the data from vSrc directly.
- The output is marked adding 0.1f or 0.2f to know the taken path.

Right output using "if(ins >= 0) dst = ..."
vOut = {
     0.1  1.1  6.2  3.1  7.2  5.2  6.1 13.2
     2.2  4.2 13.2 11.1  6.2  9.2  9.2  1.2 }

Wrong output using "else dst = ..."
vOut = {
     0.1  1.1  4.2  3.1  4.2  4.2  6.1 12.2
     2.2  4.2 12.2 11.1  6.2  9.2  8.2  0.2 }
*/


// Prints a float array
void vfprint(const char *name, const float *v, const int len) {
    int i;
    printf("%s = { ", name);
    for(i = 0; i < len; i++)
        if(!(i & 0x07)) printf("\n    %4.1f ", v[i ]);
        else printf("%4.1f ", v[i ]);
    printf("}\n");
}


// Prints a int array
void viprint(const char *name, const int *v, const int len) {
    int i;
    printf("%s = { ", name);
    for(i = 0; i < len; i++)
        if(!(i & 0x07)) printf("\n    %4i ", v[i ]);
        else printf("%4i ", v[i ]);
    printf("}\n");
}


kernel void
KBUG(float src[][], int ins<>, out float dst<>, int DIMX)
{
    // Convertimos coordenadas 2D a 1D
    int2 pos2D = instance().xy;
    // int pos = pos2D.x + DIMX * pos2D.y;

    int2 ins2D;
    ins2D.y = (int)floor((float)ins / (float)DIMX);
    ins2D.x = ins - ins2D.y * DIMX;

    if(ins <  0) dst = src[pos2D.y][pos2D.x] + 0.1f;
    // if(ins >= 0) dst = src[ins2D.y][ins2D.x] + 0.2f; // <--- *** THIS WORKS OK ***
    else dst = src[ins2D.y][ins2D.x] + 0.2f;      // <--- *** THIS FAILS ***
}


int main(int argc, char* argv[ ] ) {
    // Test size
    const int SIZE_X = 4;
    const int SIZE_Y = 4;
    const int SIZE = SIZE_X * SIZE_Y;

    // CPU arrays
    float vSrc[SIZE];
    int   vInd[SIZE];
    float vOut[SIZE];

    // Init
    int i;
    srand(0);
    for(i = 0; i < SIZE; i++) {
        vSrc[i ] = (float)i;
        vInd[i ] = (rand() % 32) - 16;
        vOut[i ] = 0;
    }

    {
        // Stream arrays
        float sSrc<SIZE_Y, SIZE_X>;
        int   sInd<SIZE_Y, SIZE_X>;
        float sOut<SIZE_Y, SIZE_X>;

        // Load
        streamRead(sSrc, vSrc);
        streamRead(sInd, vInd);

        // Launch kernel
        KBUG(sSrc, sInd, sOut, SIZE_X);

        // Write
        streamWrite(sOut, vOut);
    }

    // Print output
    vfprint("vSrc", vSrc, SIZE);
    viprint("vInd", vInd, SIZE);
    vfprint("vOut", vOut, SIZE);

    return 0;
}

Brook 1.4, Catalyst 9.8, Radeon 4850, WinXP x64, MSVC 2005

Outcomes