Here is a code example wich mostly equivalent to hellocal example in AMD Streamcomputing SDK, note that this is useGPU 1.0 it supports all CAL feautures exept AllocateRemote, which could be easily added. useGPU 2.0 are ready to use and provides only 2 classes to work with GPU - mem and kernel, this version have no multigpu support now. I'll post some exemples soon.
#include "useGPU.h"
#include "cal.h"
#include "calcl.h"
#include
#include
using namespace std;
std::string programIL =
"il_ps_2_0\n"
"dcl_input_interp(linear) v0.xy\n"
"dcl_output_generic o0\n"
"dcl_resource_id(0)_type(2d,unnorm)_fmtx(float)_fmty(float)_fmtz(float)_fmtw(float)\n"
"sample_resource(0)_sampler(0) r0, v0.xy\n"
"dcl_literal l0, 0x3F000000, 0x3F000000, 0x3F000000, 0x3F000000\n"
"sub r2.x, r0.x, v0.x\n"
"mov o0.x, r2.x\n"
"ret_dyn\n"
"end\n";
void main()
{
useGPU g;
float idata[256][256];
for (int i = 0; i < 256; ++i)
{
for (int j = 0; j < 256; j++)
{
idata = 256;
}
}
g.Allocate(0, idata , std::make_pair(256,256), CAL_FORMAT_FLOAT_1);
float* odata = NULL;
g.Allocate(0, odata , std::make_pair(256,256), CAL_FORMAT_FLOAT_1);
g.CreateImage(programIL,0);
vector < pair<int,string> > params;
params.push_back( make_pair(0,"i0") );
params.push_back( make_pair(1,"o0") );
CALevent e = g.Execute(0,0,1,params);
while ( g.Wait(0,e) == CAL_RESULT_PENDING);
void* ret = g.GetResult(1);
odata = reinterpret_cast(ret);
for(int i = 0; i < 8; i++)
{
for(int j = 0; j < 8; j++)printf("%.1f ", odata[i*256+j]);
printf("\n");
}
getchar();
}