5 Replies Latest reply on Mar 10, 2009 7:10 PM by MicahVillmow

    wrong result by using CAL

    licoah

      I have written .cpp as follows:

       float* A;
          float* res;

          A = new float[4];
          A[0] = 0.1f;
          A[1] = 0.2f;
          A[2] = 0.3f;
          A[3] = 0.4f;
         
          //  Initialize CAL, a CALdevice and a CALcontext.
          CALdevice device = 0;
          CALcontext ctx = 0;
          if (!Initialize(&device, &ctx, 0))
          {
              return 0;
          }

          //  Compile, Link and load the shader into a CALmodule.
          CALmodule module;
          if (!SetupKernel(ILKernel, &module, &ctx))
          {
              return 0;
          }

          CALmem iMem;
          CALmem oMem;
          CALresource iRes;
          CALresource oRes;
         
          Data Inputs;
          Data Outputs;
         
          //Allocate memory
          Inputs.DataSize = sizeof(CALfloat);
          Inputs.Width = 2;
          Inputs.Height = 2;
          Inputs.ComponentSize = 1;
          int num = 2 * 2 * Inputs.DataSize ;
          Inputs.c_data = new CALchar

      ;
          memset((void*)Inputs.c_data, 0, num);
          memcpy(Inputs.f_data, A, Inputs.ComponentSize * Inputs.DataSize * Inputs.Height * Inputs.Width);

          Outputs.DataSize = sizeof(CALfloat);
          Outputs.Width = 2;
          Outputs.Height = 2;
          Outputs.ComponentSize = 1;
          num = 2 * 2 * Outputs.DataSize ;
          Outputs.c_data = new CALchar
      ;
          memset((void*)Outputs.c_data, 0, num);

          //Allocate resources
          CALresult r = calResAllocLocal2D(&iRes, device, Inputs.Width, Inputs.Height, CAL_FORMAT_FLOAT_2, 0);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "AllocateRes: There is an error in calResAllocLocal2D\n");
              return 0;
          }
          r = calCtxGetMem(&iMem, ctx, iRes);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "AllocateRes: There is an error in calCtxGetMem\n");
              return 0;
          }

          r = calResAllocLocal2D(&oRes, device, Outputs.Width, Outputs.Height, CAL_FORMAT_FLOAT_2, 0);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "AllocateRes: There is an error in calResAllocLocal2D\n");
              return 0;
          }
          r = calCtxGetMem(&oMem, ctx, oRes);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "AllocateRes: There is an error in calCtxGetMem\n");
              return 0;
          }


          //Bind name
          CALname progName = 0;
          CALchar buffer[10];
          //input
          sprintf(buffer, "i%d", 0);
          r = calModuleGetName(&progName, ctx, module, buffer);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "BindIOName: input: Error string is %s\n",calGetErrorString());
              fprintf(stderr, "Failing name binding was %s\n", buffer);
              return 0;
          }
          r = calCtxSetMem(ctx, progName, iMem);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "BindIOName: input: Error string is %s\n",calGetErrorString());
              fprintf(stderr, "Error string is %s\n",calGetErrorString());
              return 0;
          }
         
          //output
          sprintf(buffer, "o%d", 0);
          r = calModuleGetName(&progName, ctx, module, buffer);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "BindIOName: output: Error string is %s\n",calGetErrorString());
              fprintf(stderr, "Failing name binding was %s\n", buffer);
              return 0;
          }
          r = calCtxSetMem(ctx, progName, oMem);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "BindIOName: output: Error string is %s\n",calGetErrorString());
              fprintf(stderr, "Error string is %s\n",calGetErrorString());
              return 0;
          }

          //Copy data to GPU
          CALuint pitch;
          CALchar* ptr;
          r = calResMap((CALvoid**)&ptr, &pitch, iRes, 0);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "CopyDataToGPU: calResMap: Error string is %s\n", calGetErrorString());
              return 0;
          }
          memcpy( ptr, Inputs.c_data, Inputs.Width * Inputs.Height * Inputs.DataSize * Inputs.ComponentSize);
          r = calResUnmap(iRes);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "CopyDataToGPU: calResUnmap: Error string is %s\n", calGetErrorString());
              return 0;
          }
         
          /******************/
          //Execute kernel
          /******************/
          CALfunc func;
          r = CAL_RESULT_ERROR;
          if (calModuleGetEntry(&func, ctx, module, "main") != CAL_RESULT_OK)
          {
              fprintf(stderr, "There was an error finding the program entry point.\n");
              fprintf(stderr, "Error string is %s\n", calGetErrorString());
              return 0;
          }
         
          //Setup a computation domain
          CALdomain rect;
          rect.x = 0;
          rect.y = 0;
          rect.width = 2;
          rect.height = 2;
         
         
          //Execute the program
          CALevent event = 0;
          r = calCtxRunProgram(&event, ctx, func, &rect);
          if (r != CAL_RESULT_OK)
          {
              fprintf(stderr, "There was an error running the program, Error code: %d.\n", r);
              fprintf(stderr, "Error string is %s\n", calGetErrorString());
              return 0;
          }
         
          //Wait for the last run to complete.
          while(calCtxIsEventDone(ctx, event) == CAL_RESULT_PENDING);

          /*************/
          //Copy data from GPU
          /**************/
          r = calResMap((CALvoid**)&ptr, &pitch, oRes, 0);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "CopyDataToGPU: calResMap: input: Error string is %s\n", calGetErrorString());
              return 0;
          }
          memcpy(Outputs.c_data, ptr, Outputs.Width * Outputs.Height * Outputs.DataSize * Outputs.ComponentSize);
          r = calResUnmap(oRes);
          if(r != CAL_RESULT_OK)
          {
              fprintf(stderr, "CopyDataToGPU: calResUnmap: input: Error string is %s\n", calGetErrorString());
              return 0;
          }
         
          cout<<"1    "<<Outputs.f_data[0]<<endl;
          cout<<"2    "<<Outputs.f_data[1]<<endl;
          cout<<"3    "<<Outputs.f_data[2]<<endl;
          cout<<"4    "<<Outputs.f_data[3]<<endl;
          return 0;

       

      kernel is :

      kernel
      void test(float a<>, out float res<>{
          res = a;
      }

      I get the result :

      1    0.1
      2    0
      3    0.3
      4    0

      Data , and some functions are from sample.h

      Can somebody help me?

        • wrong result by using CAL
          licoah

          I changed CAL_FORMAT_FLOAT_2 to CAL_FORMAT_FLOAT_1

          got the result

          1    0.1
          2    0.2
          3    -2.87352e-16
          4    -2.87352e-16

            • wrong result by using CAL
              MicahVillmow

              Please post the IL code that is producing the wrong results.

                • wrong result by using CAL
                  licoah

                  const CALchar ILKernel[] =
                          "il_ps_2_0\n"
                          "dcl_literal l0,0x00000000,0x00000000,0x00000000,0x00000000\n"
                          "dcl_literal l1,0x00000001,0x00000001,0x00000001,0x00000001\n"
                          "dcl_literal l2,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF\n"
                          "dcl_literal l3,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF,0x7FFFFFFF\n"
                          "dcl_literal l4,0x7F800000,0x7F800000,0x7F800000,0x7F800000\n"
                          "dcl_literal l5,0x80000000,0x80000000,0x80000000,0x80000000\n"
                          "dcl_literal l6,0x3E9A209B,0x3E9A209B,0x3E9A209B,0x3E9A209B\n"
                          "dcl_literal l7,0x3F317218,0x3F317218,0x3F317218,0x3F317218\n"
                          "dcl_literal l8,0x40490FDB,0x40490FDB,0x40490FDB,0x40490FDB\n"
                          "dcl_literal l9,0x3FC90FDB,0x3FC90FDB,0x3FC90FDB,0x3FC90FDB\n"
                          "dcl_literal l10,0x00000003,0x00000003,0x00000003,0x00000003\n"
                          "dcl_literal l11,0x00000002,0x00000002,0x00000002,0x00000002\n"
                          "dcl_output_usage(generic) o0.xyzw\n"
                          "dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n"
                          "dcl_input_position_interp(linear_noperspective) v0.xy__\n"
                          "mov r272.xy__,v0.xyzw\n"
                          "call 38 \n"
                          "call 0 \n"
                          "endmain\n"
                          "\n"
                          "func 0\n"
                          "mov o0.xyzw,r271.xyzw\n"
                          "ret\n"
                          "\n"
                          "func 2\n"
                          "ieq r0.x___,r17.x000,l0.x000\n"
                          "if_logicalnz r0.x000\n"
                          "sample_resource(0)_sampler(0) r19.xyzw,r18.xy00\n"
                          "endif\n"
                          "mov r16.x___,r19.x000\n"
                          "ret_dyn\n"
                          "ret\n"
                          "\n"
                          "func 37\n"
                          "mov r270.x___,r269.x000\n"
                          "ret\n"
                          "\n"
                          "func 38\n"
                          "mov r276.xy__,r272.xy00\n"
                          "mov r17.x___,l0.x000\n"
                          "mov r18.xy__,r276.xy00\n"
                          "call 2 \n"
                          "mov r279.x___,r16.x000\n"
                          "mov r274.x___,r279.x000\n"
                          "mov r269.x___,r274.x000\n"
                          "call 37 \n"
                          "mov r275.x___,r270.x000\n"
                          "mov r277.x___,r275.x000\n"
                          "mov r277._y__,l0.0x00\n"
                          "mov r277.__z_,l0.00x0\n"
                          "mov r277.___w,l0.000x\n"
                          "mov r273.xyzw,r277.xyzw\n"
                          "mov r271.xyzw,r273.xyzw\n"
                          "ret_dyn\n"
                          "ret\n"
                          "\n"
                          "end\n";