25 Replies Latest reply on Nov 22, 2011 5:28 AM by nareshsankapelly

    BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)

    selva_c

      I wanna use OpenCL/ID3D10Texture2D interoperability.

      But AMD APP SDK don't  have any sample. Does anyone know what how to interop or sample code.

      I wrote a code to interop to my project, but I didn't read date from ID3D10Texture2D by read_imagef().

      please. (sorry for my poorish english..)

        • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
          selva_c

          I tyied to get sample by read_imagef with DXGI_FORMAT_R16G16B16A16_FLOAT ID3D10Texture2D, then I could get correct value. but, I could not get correct value (which was always zero) with DXGI_FORMAT_R8G8B8A8_UNORM ID3D10Texture2D by same way.

          I think it is APP SDK's BUG.

            • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
              selva_c

              are they any AMD's stuff that watch this forums ??

              does AMD have any bag tracking tool ?

                  • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                    selva_c

                    I updated with latest 11-8 ccc driver. but, this BUG which is getting incorrect value from read_imagef() at  DXGI_FORMAT_R8G8B8A8_UNORM have not been fixed, yet.

                      • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                        genaganna

                         

                        Originally posted by: selva_c I updated with latest 11-8 ccc driver. but, this BUG which is getting incorrect value from read_imagef() at  DXGI_FORMAT_R8G8B8A8_UNORM have not been fixed, yet.

                        Could you please copy your code to reproduce this case?

                          • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                            selva_c

                            Could you please copy your code to reproduce this case?

                            sorry. I don't have any simple reproduce code. I think that I don't have to present any code because every code which have ID3D10Texture2D with DXGI_FORMAT_R8G8B8A8_UNORM and read_imagef() reproduce it.

                            (I tryed it on my project & modified NVIDIA sample code)

                            > to AMD staff

                            Please, try & fix them.

                            or Please Let me ask. How do I write code?

                              • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                genaganna

                                 

                                Originally posted by: selva_c


                                Could you please copy your code to reproduce this case?

                                sorry. I don't have any simple reproduce code. I think that I don't have to present any code because every code which have ID3D10Texture2D with DXGI_FORMAT_R8G8B8A8_UNORM and read_imagef() reproduce it.

                                (I tryed it on my project & modified NVIDIA sample code)

                                > to AMD staff

                                Please, try & fix them.

                                or Please Let me ask. How do I write code?

                                Selva_c,

                                I hope you are talking about oclSimpleD3D10Texture. I am able to run this on cypress without any problem.  Could you please tell what you have modified in this sample to reproduce.

                                  • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                    selva_c

                                    * oclSimpleD3D10Texture.h

                                    /* * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ //----------------------------------------------------------------------------- // Global variables //----------------------------------------------------------------------------- #define MAX_EPSILON 10 #define D3D10_SHARING_EXTENSION "cl_nv_d3d10_sharing" static char *SDK_name = "simpleD3D10Texture"; ID3D10Device* g_pd3dDevice = NULL; // Our rendering device IDXGISwapChain* g_pSwapChain = NULL; // The swap chain of the window ID3D10RenderTargetView* g_pSwapChainRTV = NULL; //The Render target view on the swap chain ( used for clear) ID3D10RasterizerState* g_pRasterState = NULL; ID3D10InputLayout* g_pInputLayout = NULL; ID3D10Effect* g_pSimpleEffect = NULL; ID3D10EffectTechnique* g_pSimpleTechnique = NULL; ID3D10EffectVectorVariable* g_pvQuadRect = NULL; ID3D10EffectScalarVariable* g_pUseCase = NULL; ID3D10EffectShaderResourceVariable* g_pTexture2D = NULL; ID3D10EffectShaderResourceVariable* g_pTexture3D = NULL; ID3D10EffectShaderResourceVariable* g_pTextureCube = NULL; static const char g_simpleEffectSrc[] = "float4 g_vQuadRect; \n" \ "int g_UseCase; \n" \ "Texture2D g_Texture2D; \n" \ "Texture3D g_Texture3D; \n" \ "TextureCube g_TextureCube; \n" \ "\n" \ "SamplerState samLinear{ \n" \ " Filter = MIN_MAG_LINEAR_MIP_POINT; \n" \ "};\n" \ "\n" \ "struct Fragment{ \n" \ " float4 Pos : SV_POSITION;\n" \ " float3 Tex : TEXCOORD0; };\n" \ "\n" \ "Fragment VS( uint vertexId : SV_VertexID )\n" \ "{\n" \ " Fragment f;\n" \ " f.Tex = float3( 0.f, 0.f, 0.f); \n"\ " if (vertexId == 1) f.Tex.x = 1.f; \n"\ " else if (vertexId == 2) f.Tex.y = 1.f; \n"\ " else if (vertexId == 3) f.Tex.xy = float2(1.f, 1.f); \n"\ " \n" \ " f.Pos = float4( g_vQuadRect.xy + f.Tex * g_vQuadRect.zw, 0, 1);\n" \ " \n" \ " if (g_UseCase == 1) { \n"\ " if (vertexId == 1) f.Tex.z = 0.5f; \n"\ " else if (vertexId == 2) f.Tex.z = 0.5f; \n"\ " else if (vertexId == 3) f.Tex.z = 1.f; \n"\ " } \n" \ " else if (g_UseCase >= 2) { \n"\ " f.Tex.xy = f.Tex.xy * 2.f - 1.f; \n"\ " } \n" \ " return f;\n" \ "}\n" \ "\n" \ "float4 PS( Fragment f ) : SV_Target\n" \ "{\n" \ " if (g_UseCase == 0) return g_Texture2D.Sample( samLinear, f.Tex.xy ); \n" \ " else if (g_UseCase == 1) return g_Texture3D.Sample( samLinear, f.Tex ); \n" \ " else if (g_UseCase == 2) return g_TextureCube.Sample( samLinear, float3(f.Tex.xy, 1.0) ); \n" \ " else if (g_UseCase == 3) return g_TextureCube.Sample( samLinear, float3(f.Tex.xy, -1.0) ); \n" \ " else if (g_UseCase == 4) return g_TextureCube.Sample( samLinear, float3(1.0, f.Tex.xy) ); \n" \ " else if (g_UseCase == 5) return g_TextureCube.Sample( samLinear, float3(-1.0, f.Tex.xy) ); \n" \ " else if (g_UseCase == 6) return g_TextureCube.Sample( samLinear, float3(f.Tex.x, 1.0, f.Tex.y) ); \n" \ " else if (g_UseCase == 7) return g_TextureCube.Sample( samLinear, float3(f.Tex.x, -1.0, f.Tex.y) ); \n" \ " else return float4(f.Tex, 1);\n" \ "}\n" \ "\n" \ "technique10 Render\n" \ "{\n" \ " pass P0\n" \ " {\n" \ " SetVertexShader( CompileShader( vs_4_0, VS() ) );\n" \ " SetGeometryShader( NULL );\n" \ " SetPixelShader( CompileShader( ps_4_0, PS() ) );\n" \ " }\n" \ "}\n" \ "\n"; // testing/tracing function used pervasively in tests. if the condition is unsatisfied // then spew and fail the function immediately (doing no cleanup) #define AssertOrQuit(x) \ if (!(x)) \ { \ shrLog("Assert unsatisfied in %s at %s:%d\n", __FUNCTION__, __FILE__, __LINE__); \ return 1; \ } clGetDeviceIDsFromD3D10KHR_fn clGetDeviceIDsFromD3D10KHR = NULL; clCreateFromD3D10BufferKHR_fn clCreateFromD3D10BufferKHR = NULL; clCreateFromD3D10Texture2DKHR_fn clCreateFromD3D10Texture2DKHR = NULL; clCreateFromD3D10Texture3DKHR_fn clCreateFromD3D10Texture3DKHR = NULL; clEnqueueAcquireD3D10ObjectsKHR_fn clEnqueueAcquireD3D10ObjectsKHR = NULL; clEnqueueReleaseD3D10ObjectsKHR_fn clEnqueueReleaseD3D10ObjectsKHR = NULL; #define INITPFN(x) \ x = (x ## _fn)clGetExtensionFunctionAddress(#x);\ if(!x) { shrLog("failed getting " #x); Cleanup(EXIT_FAILURE); } // CL objects cl_context cxGPUContext; cl_command_queue cqCommandQueue; cl_device_id device; cl_uint uiNumDevsUsed = 1; // Number of devices used in this sample cl_program cpProgram_tex2d; cl_program cpProgram_texcube; cl_program cpProgram_texvolume; cl_kernel ckKernel_tex2d; cl_kernel ckKernel_texcube; cl_kernel ckKernel_texvolume; size_t szGlobalWorkSize[2]; size_t szLocalWorkSize[2]; cl_mem cl_pbos[2] = {0,0}; cl_int ciErrNum; // Timer and fps vars int iFrameCount = 0; // FPS count for averaging int iFrameTrigger = 90; // FPS trigger for sampling int iFramesPerSec = 0; // frames per second int iTestSets = 3; // # of loop set retriggers before auto exit when bNoPrompt = shrTrue // app configuration parms const char* cProcessor [] = {"OpenCL GPU", "Host C++ CPU"}; int iProcFlag = 0; // 0 = GPU, 1 = CPU shrBOOL bNoPrompt = shrFALSE; // false = normal GL loop, true = Finite period of GL loop (a few seconds) shrBOOL bQATest = shrFALSE; // false = normal GL loop, true = run No-GL test sequence int g_iFrameToCompare = 10; bool g_bDone = false; bool g_bPassed = true; unsigned int g_iAdapter; D3DADAPTER_IDENTIFIER9 g_adapter_id; D3DDISPLAYMODE g_d3ddm; D3DPRESENT_PARAMETERS g_d3dpp; bool g_bWindowed = true; const unsigned int g_WindowWidth = 720; const unsigned int g_WindowHeight = 720; // Data structure for 2D texture shared between DX9 and CL struct { ID3D10Texture2D *pTexture; ID3D10ShaderResourceView *pSRView; cl_mem clTexture; cl_mem clMem; unsigned int pitch; unsigned int width; unsigned int height; } g_texture_2d; // Data structure for cube texture shared between DX9 and CL struct { ID3D10Texture2D *pTexture; ID3D10ShaderResourceView *pSRView; cl_mem clTexture[6]; cl_mem clMem[6]; unsigned int pitch; unsigned int size; } g_texture_cube; // Data structure for volume textures shared between DX9 and CL struct { ID3D10Texture3D *pTexture; ID3D10ShaderResourceView *pSRView; cl_mem clTexture; cl_mem clMem; unsigned int pitch; unsigned int pitchslice; unsigned int width; unsigned int height; unsigned int depth; } g_texture_vol;

                                    • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                      selva_c

                                      * oclSimpleD3D10Texture.cpp

                                      /* * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ // this define tells to use intermediate buffer // the direct write to the texture doesn't seem to work... for now... //#define USE_STAGING_BUFFER #include <stdlib.h> #include <stdio.h> #include <string.h> #ifdef _WIN32 #define WINDOWS_LEAN_AND_MEAN #include <windows.h> #endif #include <mmsystem.h> // D3D10 includes #include "dynlink_d3d10.h" // OpenCL includes #include <oclUtils.h> #include <shrQATest.h> #include <CL/cl_ext.h> // Project specific includes #include "cl_d3d10.h" #include "oclSimpleD3D10Texture.h" #include "rendercheck_d3d10.h" int *pArgc = NULL; char **pArgv = NULL; //----------------------------------------------------------------------------- // Forward declarations //----------------------------------------------------------------------------- HRESULT InitD3D10( HWND hWnd, bool &noD3DAvailable ); HRESULT InitCL(int argc, const char** argv); HRESULT InitTextures( ); HRESULT ReleaseTextures(); HRESULT DeviceLostHandler(); LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam); void RunKernels(); void DrawScene(); void RunCL(); void TriggerFPSUpdate(); void Cleanup(int iExitCode=0); void (*pCleanup)(int) = &Cleanup; void TestNoDX9(); //----------------------------------------------------------------------------- // Program main //----------------------------------------------------------------------------- int main(int argc, char** argv) { pArgc = &argc; pArgv = argv; shrQAStart(argc, argv); // start logs shrSetLogFileName ("oclSimpleD3D10Texture.txt"); shrLog("%s Starting...\n\n", argv[0]); bool bCheckD3D10 = dynlinkLoadD3D10API(); // If D3D10 is not present, print an error message and then quit if (!bCheckD3D10) { printf("%s did not detect a D3D10 device, exiting...\n", SDK_name); dynlinkUnloadD3D10API(); // Cleanup and leave Cleanup (EXIT_SUCCESS); } // process command line arguments if (argc > 1) { bQATest = shrCheckCmdLineFlag(argc, (const char **)argv, "qatest"); bNoPrompt = shrCheckCmdLineFlag(argc, (const char **)argv, "noprompt"); } // // create window // // Register the window class WNDCLASSEX wc = { sizeof(WNDCLASSEX), CS_CLASSDC, MsgProc, 0L, 0L, GetModuleHandle(NULL), NULL, NULL, NULL, NULL, "OpenCL/D3D10 Texture InterOP", NULL }; RegisterClassEx( &wc ); int xBorder = ::GetSystemMetrics(SM_CXSIZEFRAME); int yMenu = ::GetSystemMetrics(SM_CYMENU); int yBorder = ::GetSystemMetrics(SM_CYSIZEFRAME); // Create the application's window (padding by window border for uniform BB sizes across OSs) HWND hWnd = CreateWindow( wc.lpszClassName, "OpenCL/D3D10 Texture InterOP", WS_OVERLAPPEDWINDOW, 0, 0, g_WindowWidth + 2*xBorder, g_WindowHeight+ 2*yBorder+yMenu, NULL, NULL, wc.hInstance, NULL ); ShowWindow(hWnd, SW_SHOWDEFAULT); UpdateWindow(hWnd); // init fps timer shrDeltaT (1); bool noD3DAvailable; HRESULT hr = InitD3D10(hWnd, noD3DAvailable); // let's give-up if D3D failed. But we will write "succeed" if(FAILED(hr)) { // Unregister windows class UnregisterClass( wc.lpszClassName, wc.hInstance ); // // and exit with SUCCESS if the reason is unavailability // Cleanup(noD3DAvailable ? EXIT_SUCCESS : EXIT_FAILURE); } if(FAILED(InitCL(argc, (const char **)argv)) || FAILED(InitTextures())) { Cleanup(EXIT_FAILURE); } // // the main loop // while(false == g_bDone) { RunCL(); DrawScene(); // // handle I/O // MSG msg; ZeroMemory( &msg, sizeof(msg) ); while( msg.message!=WM_QUIT ) { if( PeekMessage( &msg, NULL, 0U, 0U, PM_REMOVE ) ) { TranslateMessage( &msg ); DispatchMessage( &msg ); } else { RunCL(); DrawScene(); if(bQATest) { for(int count=0;count<g_iFrameToCompare;count++) { RunCL(); DrawScene(); } const char *ref_image_path = "ref_oclSimpleD3D10Texture.ppm"; const char *cur_image_path = "oclSimpleD3D10Texture.ppm"; // Save a reference of our current test run image CheckRenderD3D10::ActiveRenderTargetToPPM(g_pd3dDevice,cur_image_path); // compare to offical reference image, printing PASS or FAIL. g_bPassed = CheckRenderD3D10::PPMvsPPM(cur_image_path,ref_image_path,argv[0],MAX_EPSILON, 0.15f); PostQuitMessage(0); g_bDone = true; } } } }; // Unregister windows class UnregisterClass( wc.lpszClassName, wc.hInstance ); // Cleanup and leave Cleanup (g_bPassed ? EXIT_SUCCESS : EXIT_FAILURE); } //----------------------------------------------------------------------------- // Name: TriggerFPSUpdate() // Desc: Triggers reset of fps vars at transition //----------------------------------------------------------------------------- void TriggerFPSUpdate() { iFrameCount = 0; shrDeltaT(1); iFramesPerSec = 1; iFrameTrigger = 2; } //----------------------------------------------------------------------------- // Name: InitD3D10() // Desc: Initializes Direct3D //----------------------------------------------------------------------------- HRESULT InitD3D10(HWND hWnd, bool &noD3DAvailable) { HRESULT hr = S_OK; noD3DAvailable = false; // Select our adapter IDXGIAdapter* pCLCapableAdapter = NULL; { // iterate through the candidate adapters IDXGIFactory *pFactory; hr = sFnPtr_CreateDXGIFactory(__uuidof(IDXGIFactory), (void**)(&pFactory) ); if(FAILED(hr)) { noD3DAvailable = true; return hr; } for (UINT adapter = 0; !pCLCapableAdapter; ++adapter) { // get a candidate DXGI adapter IDXGIAdapter* pAdapter = NULL; hr = pFactory->EnumAdapters(adapter, &pAdapter); if (FAILED(hr)) { break; } // TODO: check here if the adapter is ok for CL { // if so, mark it as the one against which to create our d3d10 device pCLCapableAdapter = pAdapter; break; } pAdapter->Release(); } pFactory->Release(); } if(!pCLCapableAdapter) if(FAILED(hr)) { noD3DAvailable = true; return E_FAIL; } // Set up the structure used to create the device and swapchain DXGI_SWAP_CHAIN_DESC sd; ZeroMemory( &sd, sizeof(sd) ); sd.BufferCount = 1; sd.BufferDesc.Width = g_WindowWidth; sd.BufferDesc.Height = g_WindowHeight; sd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; sd.BufferDesc.RefreshRate.Numerator = 60; sd.BufferDesc.RefreshRate.Denominator = 1; sd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; sd.OutputWindow = hWnd; sd.SampleDesc.Count = 1; sd.SampleDesc.Quality = 0; sd.Windowed = TRUE; // Create device and swapchain hr = sFnPtr_D3D10CreateDeviceAndSwapChain1( pCLCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0, D3D10_FEATURE_LEVEL_10_0, D3D10_1_SDK_VERSION, &sd, &g_pSwapChain, (ID3D10Device1**)&g_pd3dDevice); if(FAILED(hr)) { noD3DAvailable = true; return hr; } pCLCapableAdapter->Release(); pCLCapableAdapter = NULL; // Create a render target view of the swapchain ID3D10Texture2D* pBuffer; hr = g_pSwapChain->GetBuffer( 0, __uuidof( ID3D10Texture2D ), (LPVOID*)&pBuffer); if(FAILED(hr)) return hr; hr = g_pd3dDevice->CreateRenderTargetView(pBuffer, NULL, &g_pSwapChainRTV); pBuffer->Release(); if(FAILED(hr)) return hr; g_pd3dDevice->OMSetRenderTargets(1, &g_pSwapChainRTV, NULL); // Setup the viewport D3D10_VIEWPORT vp; vp.Width = g_WindowWidth; vp.Height = g_WindowHeight; vp.MinDepth = 0.0f; vp.MaxDepth = 1.0f; vp.TopLeftX = 0; vp.TopLeftY = 0; g_pd3dDevice->RSSetViewports( 1, &vp ); // Setup the effect { ID3D10Blob* pCompiledEffect; ID3D10Blob* pErrors = NULL; hr = sFnPtr_D3D10CompileEffectFromMemory( (void*)g_simpleEffectSrc, sizeof(g_simpleEffectSrc), NULL, NULL, // pDefines NULL, // pIncludes 0, // HLSL flags 0, // FXFlags &pCompiledEffect, &pErrors); if( pErrors ) { LPVOID l_pError = NULL; l_pError = pErrors->GetBufferPointer(); // then cast to a char* to see it in the locals window shrLog("Compilation error: \n %s", (char*) l_pError); } if(FAILED(hr)) return hr; hr = sFnPtr_D3D10CreateEffectFromMemory( pCompiledEffect->GetBufferPointer(), pCompiledEffect->GetBufferSize(), 0, // FXFlags g_pd3dDevice, NULL, &g_pSimpleEffect); pCompiledEffect->Release(); g_pSimpleTechnique = g_pSimpleEffect->GetTechniqueByName( "Render" ); g_pvQuadRect = g_pSimpleEffect->GetVariableByName("g_vQuadRect")->AsVector(); g_pUseCase = g_pSimpleEffect->GetVariableByName("g_UseCase")->AsScalar(); g_pTexture2D = g_pSimpleEffect->GetVariableByName("g_Texture2D")->AsShaderResource(); g_pTexture3D = g_pSimpleEffect->GetVariableByName("g_Texture3D")->AsShaderResource(); g_pTextureCube = g_pSimpleEffect->GetVariableByName("g_TextureCube")->AsShaderResource(); // Setup no Input Layout g_pd3dDevice->IASetInputLayout(0); g_pd3dDevice->IASetPrimitiveTopology( D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP ); } D3D10_RASTERIZER_DESC rasterizerState; rasterizerState.FillMode = D3D10_FILL_SOLID; rasterizerState.CullMode = D3D10_CULL_FRONT; rasterizerState.FrontCounterClockwise = false; rasterizerState.DepthBias = false; rasterizerState.DepthBiasClamp = 0; rasterizerState.SlopeScaledDepthBias = 0; rasterizerState.DepthClipEnable = false; rasterizerState.ScissorEnable = false; rasterizerState.MultisampleEnable = false; rasterizerState.AntialiasedLineEnable = false; g_pd3dDevice->CreateRasterizerState( &rasterizerState, &g_pRasterState ); g_pd3dDevice->RSSetState( g_pRasterState ); return S_OK; } //----------------------------------------------------------------------------- // Name: CreateKernelProgram() // Desc: Creates OpenCL program and kernel instances //----------------------------------------------------------------------------- HRESULT CreateKernelProgram( const char *exepath, const char *clName, const char *clPtx, const char *kernelEntryPoint, cl_program &cpProgram, cl_kernel &ckKernel ) { // Program Setup size_t program_length; const char* source_path = shrFindFilePath(clName, exepath); char *source = oclLoadProgSource(source_path, "", &program_length); oclCheckErrorEX(source != NULL, shrTRUE, pCleanup); // create the program cpProgram = clCreateProgramWithSource(cxGPUContext, 1,(const char **) &source, &program_length, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); free(source); // build the program #ifdef USE_STAGING_BUFFER static char *opts = "-cl-fast-relaxed-math -DUSE_STAGING_BUFFER"; #else static char *opts = "-cl-fast-relaxed-math"; #endif ciErrNum = clBuildProgram(cpProgram, 0, NULL, opts, NULL, NULL); if (ciErrNum != CL_SUCCESS) { // write out standard error, Build Log and PTX, then cleanup and exit shrLogEx(LOGBOTH | ERRORMSG, ciErrNum, STDERROR); oclLogBuildInfo(cpProgram, oclGetFirstDev(cxGPUContext)); oclLogPtx(cpProgram, oclGetFirstDev(cxGPUContext), clPtx); Cleanup(EXIT_FAILURE); } // create the kernel ckKernel = clCreateKernel(cpProgram, kernelEntryPoint, &ciErrNum); if (!ckKernel) { Cleanup(EXIT_FAILURE); } // set the args values return ciErrNum ? E_FAIL : S_OK; } //----------------------------------------------------------------------------- // Name: InitCL() // Desc: Get platform and devices and create context and queues //----------------------------------------------------------------------------- HRESULT InitCL(int argc, const char** argv) { cl_platform_id cpPlatform; //Get the NVIDIA platform ciErrNum = oclGetPlatformID(&cpPlatform); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // // Initialize extension functions for D3D10 // INITPFN(clGetDeviceIDsFromD3D10KHR); INITPFN(clCreateFromD3D10BufferKHR); INITPFN(clCreateFromD3D10Texture2DKHR); INITPFN(clCreateFromD3D10Texture3DKHR); INITPFN(clEnqueueAcquireD3D10ObjectsKHR); INITPFN(clEnqueueReleaseD3D10ObjectsKHR); // Query the OpenCL device that would be good for the current D3D device // We need to take the one that is on the same Gfx card. // Get the device ids for the adapter cl_device_id cdDevice; cl_uint num_devices = 0; ciErrNum = clGetDeviceIDsFromD3D10KHR( cpPlatform, CL_D3D10_DEVICE_KHR, g_pd3dDevice, CL_PREFERRED_DEVICES_FOR_D3D10_KHR, 1, &cdDevice, &num_devices); if (ciErrNum == -1) { shrLog("No OpenCL device available that supports D3D10, exiting...\n"); Cleanup (EXIT_SUCCESS); } else { oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } cl_context_properties props[] = { CL_CONTEXT_D3D10_DEVICE_KHR, (cl_context_properties)g_pd3dDevice, CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform, 0 }; cxGPUContext = clCreateContext(props, 1, &cdDevice, NULL, NULL, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // Log device used shrLog("Device: "); oclPrintDevName(LOGBOTH, cdDevice); shrLog("\n"); // create a command-queue cqCommandQueue = clCreateCommandQueue(cxGPUContext, cdDevice, 0, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); CreateKernelProgram(argv[0], "texture_2d.cl", "texture_2d.ptx", "cl_kernel_texture_2d", cpProgram_tex2d, ckKernel_tex2d); CreateKernelProgram(argv[0], "texture_cube.cl", "texture_cube.ptx", "cl_kernel_texture_cube", cpProgram_texcube, ckKernel_texcube); CreateKernelProgram(argv[0], "texture_volume.cl", "texture_volume.ptx", "cl_kernel_texture_volume", cpProgram_texvolume, ckKernel_texvolume); return S_OK; } //----------------------------------------------------------------------------- // Name: InitTextures() // Desc: Initializes Direct3D Textures (allocation and initialization) //----------------------------------------------------------------------------- HRESULT InitTextures() { // // create the D3D resources we'll be using // // 2D texture { g_texture_2d.width = 256; g_texture_2d.pitch = g_texture_2d.width; // for now, let's set pitch == to width g_texture_2d.height = 256; D3D10_TEXTURE2D_DESC desc; ZeroMemory( &desc, sizeof(D3D10_TEXTURE2D_DESC) ); desc.Width = g_texture_2d.width; desc.Height = g_texture_2d.height; desc.MipLevels = 1; desc.ArraySize = 1; desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; desc.SampleDesc.Count = 1; desc.Usage = D3D10_USAGE_DEFAULT; desc.BindFlags = D3D10_BIND_SHADER_RESOURCE; if (FAILED(g_pd3dDevice->CreateTexture2D( &desc, NULL, &g_texture_2d.pTexture))) return E_FAIL; if (FAILED(g_pd3dDevice->CreateShaderResourceView(g_texture_2d.pTexture, NULL, &g_texture_2d.pSRView)) ) return E_FAIL; g_pTexture2D->SetResource( g_texture_2d.pSRView ); // Create the OpenCL part g_texture_2d.clTexture = clCreateFromD3D10Texture2DKHR( cxGPUContext, 0, g_texture_2d.pTexture, 0, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); #ifdef USE_STAGING_BUFFER // Memory Setup : allocate 4 bytes (RGBA) pixels // Create the intermediate buffers in which OpenCL will do the rendering // then we will blit the result back to the texture that we will have mapped to OpenCL area g_texture_2d.clMem = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, 4 * g_texture_2d.pitch * g_texture_2d.height, NULL, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); #endif } // 3D texture { g_texture_vol.width = 64; g_texture_vol.height = 64; g_texture_vol.depth = 64; g_texture_vol.pitch = g_texture_vol.width; g_texture_vol.pitchslice = g_texture_vol.pitch * g_texture_vol.height; D3D10_TEXTURE3D_DESC desc; ZeroMemory( &desc, sizeof(D3D10_TEXTURE3D_DESC) ); desc.Width = g_texture_vol.width; desc.Height = g_texture_vol.height; desc.Depth = g_texture_vol.depth; desc.MipLevels = 1; desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; desc.Usage = D3D10_USAGE_DEFAULT; desc.BindFlags = D3D10_BIND_SHADER_RESOURCE; if (FAILED(g_pd3dDevice->CreateTexture3D( &desc, NULL, &g_texture_vol.pTexture))) return E_FAIL; if (FAILED(g_pd3dDevice->CreateShaderResourceView(g_texture_vol.pTexture, NULL, &g_texture_vol.pSRView)) ) return E_FAIL; g_pTexture3D->SetResource( g_texture_vol.pSRView ); g_texture_vol.clTexture = clCreateFromD3D10Texture3DKHR( cxGPUContext, 0, g_texture_vol.pTexture, 0, //Miplevel &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // Create the staging buffer for the volume texture because it is impossible to directly write into it g_texture_vol.clMem = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, 4 * g_texture_vol.pitch * g_texture_vol.height * g_texture_vol.depth, NULL, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } // cube texture { g_texture_cube.size = 64; g_texture_cube.pitch = g_texture_cube.size; D3D10_TEXTURE2D_DESC desc; ZeroMemory( &desc, sizeof(D3D10_TEXTURE2D_DESC) ); desc.Width = g_texture_cube.size; desc.Height = g_texture_cube.size; desc.MipLevels = 1; desc.ArraySize = 6; desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM; desc.SampleDesc.Count = 1; desc.Usage = D3D10_USAGE_DEFAULT; desc.BindFlags = D3D10_BIND_SHADER_RESOURCE; desc.MiscFlags = D3D10_RESOURCE_MISC_TEXTURECUBE ; if (FAILED(g_pd3dDevice->CreateTexture2D( &desc, NULL, &g_texture_cube.pTexture))) return E_FAIL; D3D10_SHADER_RESOURCE_VIEW_DESC SRVDesc; ZeroMemory( &SRVDesc, sizeof(SRVDesc) ); SRVDesc.Format = desc.Format; SRVDesc.ViewDimension = D3D10_SRV_DIMENSION_TEXTURECUBE; SRVDesc.TextureCube.MipLevels = desc.MipLevels; SRVDesc.TextureCube.MostDetailedMip = 0; if (FAILED(g_pd3dDevice->CreateShaderResourceView(g_texture_cube.pTexture, &SRVDesc, &g_texture_cube.pSRView)) ) return E_FAIL; g_pTextureCube->SetResource( g_texture_cube.pSRView ); // Create the OpenCL part for(int i=0; i<6; i++) { g_texture_cube.clTexture[i] = clCreateFromD3D10Texture2DKHR( cxGPUContext, 0, g_texture_cube.pTexture, (D3DCUBEMAP_FACES)i, // face &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); #ifdef USE_STAGING_BUFFER g_texture_cube.clMem[i] = clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, 4 * g_texture_cube.pitch * g_texture_cube.size, NULL, &ciErrNum); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); #endif } } return S_OK; } //----------------------------------------------------------------------------- // Name: ReleaseTextures() // Desc: Release Direct3D Textures (free-ing) //----------------------------------------------------------------------------- HRESULT ReleaseTextures() { // // clean up Direct3D // { if (g_texture_2d.pSRView != NULL) g_texture_2d.pSRView->Release(); if (g_texture_2d.pTexture != NULL) g_texture_2d.pTexture->Release(); if (g_texture_cube.pSRView != NULL) g_texture_cube.pSRView->Release(); if (g_texture_cube.pTexture != NULL) g_texture_cube.pTexture->Release(); if (g_texture_vol.pSRView != NULL) g_texture_vol.pSRView->Release(); if (g_texture_vol.pSRView != NULL) g_texture_vol.pTexture->Release(); if (g_pInputLayout != NULL) g_pInputLayout->Release(); if (g_pSimpleEffect != NULL) g_pSimpleEffect->Release(); if (g_pSwapChainRTV != NULL) g_pSwapChainRTV->Release(); if (g_pSwapChain != NULL) g_pSwapChain->Release(); } return S_OK; } //----------------------------------------------------------------------------- // Name: AcquireTexturesForOpenCL() // Desc: Acquire textures for OpenCL //----------------------------------------------------------------------------- void AcquireTexturesForOpenCL() { cl_event event; cl_mem memToAcquire[6+1+1]; memToAcquire[0] = g_texture_2d.clTexture; memToAcquire[1] = g_texture_vol.clTexture; memToAcquire[2] = g_texture_cube.clTexture[0]; memToAcquire[3] = g_texture_cube.clTexture[1]; memToAcquire[4] = g_texture_cube.clTexture[2]; memToAcquire[5] = g_texture_cube.clTexture[3]; memToAcquire[6] = g_texture_cube.clTexture[4]; memToAcquire[7] = g_texture_cube.clTexture[5]; // do the acquire ciErrNum = clEnqueueAcquireD3D10ObjectsKHR( cqCommandQueue, 6 + 1 + 1, //cube map + tex2d + volume texture memToAcquire, 0, NULL, &event); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // make sure the event type is correct cl_uint eventType = 0; ciErrNum = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); if(eventType != CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR) { shrLog("event type is not CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR !\n"); } ciErrNum = clReleaseEvent(event); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } //----------------------------------------------------------------------------- // Name: ReleaseTexturesFromOpenCL() // Desc: Release Textures from OpenCL //----------------------------------------------------------------------------- void ReleaseTexturesFromOpenCL() { cl_event event; cl_mem memToAcquire[6+1+1]; memToAcquire[0] = g_texture_2d.clTexture; memToAcquire[1] = g_texture_vol.clTexture; memToAcquire[2] = g_texture_cube.clTexture[0]; memToAcquire[3] = g_texture_cube.clTexture[1]; memToAcquire[4] = g_texture_cube.clTexture[2]; memToAcquire[5] = g_texture_cube.clTexture[3]; memToAcquire[6] = g_texture_cube.clTexture[4]; memToAcquire[7] = g_texture_cube.clTexture[5]; // do the acquire ciErrNum = clEnqueueReleaseD3D10ObjectsKHR( cqCommandQueue, 6 + 1 + 1, //cube map + tex2d + volume texture memToAcquire, 0, NULL, &event); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // make sure the event type is correct cl_uint eventType = 0; ciErrNum = clGetEventInfo( event, CL_EVENT_COMMAND_TYPE, sizeof(eventType), &eventType, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); if(eventType != CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR) { shrLog("event type is not CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR !\n"); } ciErrNum = clReleaseEvent(event); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } //----------------------------------------------------------------------------- //! Run the CL part of the computation //----------------------------------------------------------------------------- void RunKernels() { static float t = 0.0f; // ---------------------------------------------------------------- // populate the 2d texture { // set global and local work item dimensions szLocalWorkSize[0] = 16; szLocalWorkSize[1] = 16; szGlobalWorkSize[0] = shrRoundUp((int)szLocalWorkSize[0], g_texture_2d.pitch); szGlobalWorkSize[1] = shrRoundUp((int)szLocalWorkSize[1], g_texture_2d.height); // set the args values #ifdef USE_STAGING_BUFFER ciErrNum |= clSetKernelArg(ckKernel_tex2d, 0, sizeof(g_texture_2d.clMem), (void *) &(g_texture_2d.clMem)); #else ciErrNum |= clSetKernelArg(ckKernel_tex2d, 0, sizeof(g_texture_2d.clTexture), (void *) &(g_texture_2d.clTexture)); #endif ciErrNum |= clSetKernelArg(ckKernel_tex2d, 1, sizeof(g_texture_2d.clTexture), (void *) &(g_texture_2d.clTexture)); ciErrNum |= clSetKernelArg(ckKernel_tex2d, 2, sizeof(g_texture_2d.width), &g_texture_2d.width); ciErrNum |= clSetKernelArg(ckKernel_tex2d, 3, sizeof(g_texture_2d.height), &g_texture_2d.height); ciErrNum |= clSetKernelArg(ckKernel_tex2d, 4, sizeof(g_texture_2d.pitch), &g_texture_2d.pitch); ciErrNum |= clSetKernelArg(ckKernel_tex2d, 5, sizeof(t), &t); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // launch computation kernel ciErrNum |= clEnqueueNDRangeKernel(cqCommandQueue, ckKernel_tex2d, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); #ifdef USE_STAGING_BUFFER size_t dst[3] = { 0, 0, 0}; size_t region[3] = { g_texture_2d.width, g_texture_2d.height, 1}; ciErrNum |= clEnqueueCopyBufferToImage(cqCommandQueue, g_texture_2d.clMem /* src_buffer */, g_texture_2d.clTexture /* dst_image */, 0 /* src_offset */, dst /* dst_origin[3] */, region /* region[3] */, 0 /* num_events_in_wait_list */, NULL /* event_wait_list */, NULL /* event */); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); #endif } // ---------------------------------------------------------------- // populate the volume texture { // set global and local work item dimensions szLocalWorkSize[0] = 16; szLocalWorkSize[1] = 16; szGlobalWorkSize[0] = shrRoundUp((int)szLocalWorkSize[0], g_texture_vol.pitch); szGlobalWorkSize[1] = shrRoundUp((int)szLocalWorkSize[1], g_texture_vol.height); // set the args values ciErrNum |= clSetKernelArg(ckKernel_texvolume, 0, sizeof(g_texture_vol.clTexture), (void *) &(g_texture_vol.clTexture)); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 1, sizeof(g_texture_vol.width), &g_texture_vol.width); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 2, sizeof(g_texture_vol.height), &g_texture_vol.height); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 3, sizeof(g_texture_vol.depth), &g_texture_vol.depth); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 4, sizeof(g_texture_vol.pitch), &g_texture_vol.pitch); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 5, sizeof(g_texture_vol.pitchslice), &g_texture_vol.pitchslice); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // launch computation kernel ciErrNum |= clEnqueueNDRangeKernel(cqCommandQueue, ckKernel_texvolume, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } // ---------------------------------------------------------------- // populate the faces of the cube map for (int face = 0; face < 6; ++face) { // set global and local work item dimensions szLocalWorkSize[0] = 16; szLocalWorkSize[1] = 16; szGlobalWorkSize[0] = shrRoundUp((int)szLocalWorkSize[0], g_texture_cube.pitch); szGlobalWorkSize[1] = shrRoundUp((int)szLocalWorkSize[1], g_texture_cube.size); // set the args values #ifdef USE_STAGING_BUFFER ciErrNum |= clSetKernelArg(ckKernel_texcube, 0, sizeof(g_texture_cube.clMem[face]), (void *) &(g_texture_cube.clMem[face])); #else ciErrNum |= clSetKernelArg(ckKernel_texcube, 0, sizeof(g_texture_cube.clTexture[face]), (void *) &(g_texture_cube.clTexture[face])); #endif ciErrNum |= clSetKernelArg(ckKernel_texcube, 1, sizeof(g_texture_cube.size), &g_texture_cube.size); ciErrNum |= clSetKernelArg(ckKernel_texcube, 2, sizeof(g_texture_cube.pitch), &g_texture_cube.pitch); ciErrNum |= clSetKernelArg(ckKernel_texcube, 3, sizeof(int), &face); ciErrNum |= clSetKernelArg(ckKernel_texcube, 4, sizeof(t), &t); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // launch computation kernel ciErrNum |= clEnqueueNDRangeKernel(cqCommandQueue, ckKernel_texcube, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); #ifdef USE_STAGING_BUFFER size_t dst[3] = { 0, 0, 0}; size_t region[3] = { g_texture_cube.size, g_texture_cube.size, 1}; ciErrNum |= clEnqueueCopyBufferToImage(cqCommandQueue, g_texture_cube.clMem[face]/* src_buffer */, g_texture_cube.clTexture[face]/* dst_image */, 0 /* src_offset */, dst /* dst_origin[3] */, region /* region[3] */, 0 /* num_events_in_wait_list */, NULL /* event_wait_list */, NULL /* event */); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); #endif } t += 0.1f; } //----------------------------------------------------------------------------- //! RestoreContextResources // - this function restores all of the OpenCL/D3D resources and contexts //----------------------------------------------------------------------------- HRESULT RestoreContextResources() { // Reinitialize D3D10 resources, CL resources/contexts InitCL(0, NULL); InitTextures(); return S_OK; } //----------------------------------------------------------------------------- //! Draw the final result on the screen //----------------------------------------------------------------------------- void DrawScene() { // Clear the backbuffer to a black color float ClearColor[4] = {0.5f, 0.5f, 0.6f, 1.0f}; g_pd3dDevice->ClearRenderTargetView( g_pSwapChainRTV, ClearColor); // // draw the 2d texture // g_pUseCase->SetInt( 0 ); float quadRect[4] = { -0.9f, -0.9f, 0.7f , 0.7f }; g_pvQuadRect->SetFloatVector( (float* ) &quadRect); g_pSimpleTechnique->GetPassByIndex(0)->Apply(0); g_pd3dDevice->Draw( 4, 0 ); // // draw a slice the 3d texture // g_pUseCase->SetInt( 1 ); quadRect[1] = 0.1f; g_pvQuadRect->SetFloatVector( (float* ) &quadRect); g_pSimpleTechnique->GetPassByIndex(0)->Apply(0); g_pd3dDevice->Draw( 4, 0 ); // // draw the 6 faces of the cube texture // float faceRect[4] = { -0.1f, -0.9f, 0.5f, 0.5f }; for ( int f = 0; f < 6; f++ ) { if (f == 3) { faceRect[0] += 0.55f ; faceRect[1] = -0.9f ; } g_pUseCase->SetInt( 2 + f ); g_pvQuadRect->SetFloatVector( (float* ) &faceRect); g_pSimpleTechnique->GetPassByIndex(0)->Apply(0); g_pd3dDevice->Draw( 4, 0 ); faceRect[1] += 0.6f ; } // Present the backbuffer contents to the display g_pSwapChain->Present( 0, 0); } //----------------------------------------------------------------------------- // Name: Cleanup() // Desc: Releases all previously initialized objects //----------------------------------------------------------------------------- void Cleanup(int iExitCode) { // Cleanup allocated objects shrLog("\nStarting Cleanup...\n\n"); if(ckKernel_tex2d)clReleaseKernel(ckKernel_tex2d); if(ckKernel_texcube)clReleaseKernel(ckKernel_texcube); if(ckKernel_texvolume)clReleaseKernel(ckKernel_texvolume); if(cpProgram_tex2d)clReleaseProgram(cpProgram_tex2d); if(cpProgram_texcube)clReleaseProgram(cpProgram_texcube); if(cpProgram_texvolume)clReleaseProgram(cpProgram_texvolume); if(cqCommandQueue)clReleaseCommandQueue(cqCommandQueue); if(cxGPUContext)clReleaseContext(cxGPUContext); //... TODO: add more cleanup // release the D3D resources we created ReleaseTextures(); if (g_pd3dDevice != NULL) g_pd3dDevice->Release(); dynlinkUnloadD3D10API(); // finalize logs and leave shrQAFinish2(bQATest, *pArgc, (const char **)pArgv, (iExitCode == EXIT_SUCCESS) ? QA_PASSED : QA_FAILED); shrLogEx(LOGBOTH | CLOSELOG, 0, "Exiting...\n"); if (!( bNoPrompt || bQATest )) { if (iExitCode != EXIT_SUCCESS) { printf("Press <Enter> to Quit\n"); #ifdef WIN32 getchar(); #endif } } exit (iExitCode); } //----------------------------------------------------------------------------- // Name: RunCL() // Desc: Launches the CL kernels to fill in the texture data //----------------------------------------------------------------------------- void RunCL() { // // map the resources we've registered so we can access them in cl // - it is most efficient to map and unmap all resources in a single call, // and to have the map/unmap calls be the boundary between using the GPU // for Direct3D and cl // // // Transfer ownership from D3D to OpenCL // AcquireTexturesForOpenCL(); // // run kernels which will populate the contents of those textures // RunKernels(); // // give back the ownership to D3D // ReleaseTexturesFromOpenCL(); } //----------------------------------------------------------------------------- // Name: MsgProc() // Desc: The window's message handler //----------------------------------------------------------------------------- static LRESULT WINAPI MsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) { switch(msg) { case WM_KEYDOWN: if(wParam==VK_ESCAPE) { g_bDone = true; Cleanup(); PostQuitMessage(0); return 0; } break; case WM_DESTROY: g_bDone = true; Cleanup(); PostQuitMessage(0); return 0; case WM_PAINT: ValidateRect(hWnd, NULL); return 0; } return DefWindowProc(hWnd, msg, wParam, lParam); }

                                      • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                        selva_c

                                        * texture_volume.cl

                                        /* * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ #pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable /* * Paint a 3D texture with a gradient in X (blue) and Z (green), and have every * other Z slice have full red. */ __kernel void cl_kernel_texture_volume( __write_only image3d_t texOut, uint width, uint height, uint depth, uint pitch, uint pitchSlice ) { const int tx = get_local_id(0); // Cuda equivalent : threadIdx.x const int ty = get_local_id(1); // Cuda equivalent : threadIdx.y const int x = get_global_id(0); // Cuda equivalent : blockIdx.x*bw + tx const int y = get_global_id(1); // Cuda equivalent : blockIdx.y*bh + ty // in the case where, due to quantization into grids, we have // more threads than pixels, skip the threads which don't // correspond to valid pixels if (x >= width || y >= height) return; // walk across the Z slices of this texture. it should be noted that // this is far from optimal data access. float4 pixel; int4 pos = 0; pos.x = x; pos.y = y; for (pos.z = 0; pos.z < depth; ++pos.z) { // get a pointer to this pixel pixel.x = (float)x / (float)(pitch - 1); pixel.y = (float)y / (float)(height - 1); pixel.z = (float)pos.z / (float)(depth - 1); pixel.w = 1.0; write_imagef( texOut, pos, pixel); } }

                                        • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                          selva_c

                                          please replace them which oclSimpleD3D10Texture.h, oclSimpleD3D10Texture.cpp and texture_volume.cl  in original oclSimpleD3D10Texture sample to above code, for reproducing.

                                            • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                              genaganna

                                               

                                              Originally posted by: selva_c please replace them which oclSimpleD3D10Texture.h, oclSimpleD3D10Texture.cpp and texture_volume.cl  in original oclSimpleD3D10Texture sample to above code, for reproducing.

                                              You have talked about Texture2D and new you have given Texture3D.

                                              Are you facing texture2D or texture3D issue?

                                                • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                                  selva_c

                                                  You have talked about Texture2D and new you have given Texture3D.

                                                  Are you facing texture2D or texture3D issue?

                                                  why could you test above code, firstly ....

                                                  I wanna use interop with texture2D but same thing happend with texture3D, too.

                                                  anyway, would you try to execute sample program & analys source code ?

                                                    • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                                      genaganna

                                                       

                                                      Originally posted by: selva_c

                                                      why could you test above code, firstly ....

                                                      I wanna use interop with texture2D but same thing happend with texture3D, too.

                                                      anyway, would you try to execute sample program & analys source code ?

                                                      I can write also based on your explanation. My idea is "It would be good if you share test case you have already".

                                                       .cpp was not copied completely.  I will write myself and let you know.

                                                      It would be good if you tell what modifications you did for oclSimpleD3D10Texture.

                                                        • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                                          selva_c

                                                          I can write also based on your explanation. My idea is "It would be good if you share test case you have already".

                                                           .cpp was not copied completely.  I will write myself and let you know.

                                                          It would be good if you tell what modifications you did for oclSimpleD3D10Texture.

                                                          I'm sorry for missing check pasting code of oclSimpleD3D10Texture.cpp.

                                                          I pasted diff format in below. Please, try it & check the different when switch replacing all DXGI_FORMAT_R8G8B8A8_UNORM to DXGI_FORMAT_R16G16B16A16_FLOAT in oclSimpleD3D10Texture.cpp

                                                          --- oclSimpleD3D10Texture.cpp 2011-08-09 16:35:52.000000000 +0900 +++ oclSimpleD3D10TextureKHR.cpp 2011-10-04 13:05:02.000000000 +0900 @@ -28,10 +28,10 @@ // OpenCL includes #include <oclUtils.h> #include <shrQATest.h> -#include <CL/cl_d3d10_ext.h> #include <CL/cl_ext.h> // Project specific includes +#include "cl_d3d10.h" #include "oclSimpleD3D10Texture.h" #include "rendercheck_d3d10.h" @@ -257,15 +257,16 @@ sd.Windowed = TRUE; // Create device and swapchain - hr = sFnPtr_D3D10CreateDeviceAndSwapChain( + hr = sFnPtr_D3D10CreateDeviceAndSwapChain1( pCLCapableAdapter, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0, - D3D10_SDK_VERSION, + D3D10_FEATURE_LEVEL_10_0, + D3D10_1_SDK_VERSION, &sd, &g_pSwapChain, - &g_pd3dDevice); + (ID3D10Device1**)&g_pd3dDevice); if(FAILED(hr)) { noD3DAvailable = true; @@ -425,12 +426,12 @@ // // Initialize extension functions for D3D10 // - INITPFN(clGetDeviceIDsFromD3D10NV); - INITPFN(clCreateFromD3D10BufferNV); - INITPFN(clCreateFromD3D10Texture2DNV); - INITPFN(clCreateFromD3D10Texture3DNV); - INITPFN(clEnqueueAcquireD3D10ObjectsNV); - INITPFN(clEnqueueReleaseD3D10ObjectsNV); + INITPFN(clGetDeviceIDsFromD3D10KHR); + INITPFN(clCreateFromD3D10BufferKHR); + INITPFN(clCreateFromD3D10Texture2DKHR); + INITPFN(clCreateFromD3D10Texture3DKHR); + INITPFN(clEnqueueAcquireD3D10ObjectsKHR); + INITPFN(clEnqueueReleaseD3D10ObjectsKHR); // Query the OpenCL device that would be good for the current D3D device // We need to take the one that is on the same Gfx card. @@ -439,11 +440,11 @@ cl_device_id cdDevice; cl_uint num_devices = 0; - ciErrNum = clGetDeviceIDsFromD3D10NV( + ciErrNum = clGetDeviceIDsFromD3D10KHR( cpPlatform, - CL_D3D10_DEVICE_NV, + CL_D3D10_DEVICE_KHR, g_pd3dDevice, - CL_PREFERRED_DEVICES_FOR_D3D10_NV, + CL_PREFERRED_DEVICES_FOR_D3D10_KHR, 1, &cdDevice, &num_devices); @@ -457,7 +458,7 @@ cl_context_properties props[] = { - CL_CONTEXT_D3D10_DEVICE_NV, (cl_context_properties)g_pd3dDevice, + CL_CONTEXT_D3D10_DEVICE_KHR, (cl_context_properties)g_pd3dDevice, CL_CONTEXT_PLATFORM, (cl_context_properties)cpPlatform, 0 }; @@ -515,7 +516,7 @@ g_pTexture2D->SetResource( g_texture_2d.pSRView ); // Create the OpenCL part - g_texture_2d.clTexture = clCreateFromD3D10Texture2DNV( + g_texture_2d.clTexture = clCreateFromD3D10Texture2DKHR( cxGPUContext, 0, g_texture_2d.pTexture, @@ -556,7 +557,7 @@ return E_FAIL; g_pTexture3D->SetResource( g_texture_vol.pSRView ); - g_texture_vol.clTexture = clCreateFromD3D10Texture3DNV( + g_texture_vol.clTexture = clCreateFromD3D10Texture3DKHR( cxGPUContext, 0, g_texture_vol.pTexture, @@ -603,7 +604,7 @@ // Create the OpenCL part for(int i=0; i<6; i++) { - g_texture_cube.clTexture[i] = clCreateFromD3D10Texture2DNV( + g_texture_cube.clTexture[i] = clCreateFromD3D10Texture2DKHR( cxGPUContext, 0, g_texture_cube.pTexture, @@ -663,7 +664,7 @@ memToAcquire[6] = g_texture_cube.clTexture[4]; memToAcquire[7] = g_texture_cube.clTexture[5]; // do the acquire - ciErrNum = clEnqueueAcquireD3D10ObjectsNV( + ciErrNum = clEnqueueAcquireD3D10ObjectsKHR( cqCommandQueue, 6 + 1 + 1, //cube map + tex2d + volume texture memToAcquire, @@ -681,9 +682,9 @@ &eventType, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); - if(eventType != CL_COMMAND_ACQUIRE_D3D10_OBJECTS_NV) + if(eventType != CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR) { - shrLog("event type is not CL_COMMAND_ACQUIRE_D3D10_OBJECTS_NV !\n"); + shrLog("event type is not CL_COMMAND_ACQUIRE_D3D10_OBJECTS_KHR !\n"); } ciErrNum = clReleaseEvent(event); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); @@ -706,7 +707,7 @@ memToAcquire[6] = g_texture_cube.clTexture[4]; memToAcquire[7] = g_texture_cube.clTexture[5]; // do the acquire - ciErrNum = clEnqueueReleaseD3D10ObjectsNV( + ciErrNum = clEnqueueReleaseD3D10ObjectsKHR( cqCommandQueue, 6 + 1 + 1, //cube map + tex2d + volume texture memToAcquire, @@ -724,9 +725,9 @@ &eventType, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); - if(eventType != CL_COMMAND_RELEASE_D3D10_OBJECTS_NV) + if(eventType != CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR) { - shrLog("event type is not CL_COMMAND_RELEASE_D3D10_OBJECTS_NV !\n"); + shrLog("event type is not CL_COMMAND_RELEASE_D3D10_OBJECTS_KHR !\n"); } ciErrNum = clReleaseEvent(event); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); @@ -792,7 +793,7 @@ szGlobalWorkSize[1] = shrRoundUp((int)szLocalWorkSize[1], g_texture_vol.height); // set the args values - ciErrNum |= clSetKernelArg(ckKernel_texvolume, 0, sizeof(g_texture_vol.clMem), (void *) &(g_texture_vol.clMem)); + ciErrNum |= clSetKernelArg(ckKernel_texvolume, 0, sizeof(g_texture_vol.clTexture), (void *) &(g_texture_vol.clTexture)); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 1, sizeof(g_texture_vol.width), &g_texture_vol.width); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 2, sizeof(g_texture_vol.height), &g_texture_vol.height); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 3, sizeof(g_texture_vol.depth), &g_texture_vol.depth); @@ -806,20 +807,6 @@ 0, NULL, NULL); oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); - //// ONLY staging buffer works, for volume texture - //// do the copy here - size_t dst[3] = { 0, 0, 0}; - size_t region[3] = { g_texture_vol.width, g_texture_vol.height, g_texture_vol.depth}; - ciErrNum |= clEnqueueCopyBufferToImage(cqCommandQueue, - g_texture_vol.clMem /* src_buffer */, - g_texture_vol.clTexture /* dst_image */, - 0 /* src_offset */, - dst /* dst_origin[3] */, - region /* region[3] */, - 0 /* num_events_in_wait_list */, - NULL /* event_wait_list */, - NULL /* event */); - oclCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } // ----------------------------------------------------------------

                                                              • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                                                genaganna

                                                                 

                                                                Originally posted by: selva_c was this probrem reproduced with above code ?

                                                                Thank you for reporting issue.

                                                                I am able to reproduce the issue. Looks like there is some issue with image.

                                                                enable USE_STAGING_BUFFER to run with buffers.

                                                                  • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                                                    selva_c

                                                                    I am able to reproduce the issue. Looks like there is some issue with image.

                                                                    when is this issue going to be solved ?

                                                                    enable USE_STAGING_BUFFER to run with buffers.

                                                                    I don't wanna have any overhead, so I can't enable that define.

                                                                      • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                                                        selva_c

                                                                         

                                                                        Originally posted by: selva_c


                                                                        I am able to reproduce the issue. Looks like there is some issue with image.

                                                                        when is this issue going to be solved ?

                                                                        enable USE_STAGING_BUFFER to run with buffers.

                                                                        I don't wanna have any overhead, so I can't enable that define.

                                                                         please, don't leave it...

                                                                            • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                                                              genaganna

                                                                               

                                                                              Originally posted by: selva_c why is AMD's support such too BAD ??

                                                                               

                                                                              We are looking the issue. I can't tell you the time lines of fix.

                                                                              • BUG: can't get value by read_imagef() at DXGI_FORMAT_R8G8B8A8_UNORM (OpenCL / ID3D10Texture2D interop)
                                                                                nareshsankapelly

                                                                                Hi Selva_C,

                                                                                The problem with the sample is that it is using same image object for both reading and writing, which is not possible with AMD GPUs.

                                                                                The work around for this is to use one temparary image object as output image object and after the completion of kernel execution, copy back this image to the actual image object using clEnqueueCopyImage.

                                                                                For example:

                                                                                Please replace RunKernels() function by following code.

                                                                                 

                                                                                 

                                                                                 

                                                                                 

                                                                                void RunKernels() { static float t = 0.0f; // ---------------------------------------------------------------- // populate the 2d texture { // set global and local work item dimensions szLocalWorkSize[0] = initWorkGroupSize; szLocalWorkSize[1] = initWorkGroupSize; szGlobalWorkSize[0] = shrRoundUp((int)szLocalWorkSize[0], g_texture_2d.pitch); szGlobalWorkSize[1] = shrRoundUp((int)szLocalWorkSize[1], g_texture_2d.height); //clCreateBuffer(cxGPUContext, CL_MEM_READ_ONLY, 4 * g_texture_2d.pitch * g_texture_2d.height, NULL, &ciErrNum); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; cl_mem tempTexture = clCreateImage2D(cxGPUContext, CL_MEM_WRITE_ONLY, &imageFormat, g_texture_2d.width, g_texture_2d.height, 0, NULL, &ciErrNum); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // set the args values if(bImages == shrFALSE) ciErrNum |= clSetKernelArg(ckKernel_tex2d, 0, sizeof(g_texture_2d.clMem), (void *) &(g_texture_2d.clMem)); else #if 1 ciErrNum |= clSetKernelArg(ckKernel_tex2d, 0, sizeof(tempTexture), (void *) &(tempTexture)); #else ciErrNum |= clSetKernelArg(ckKernel_tex2d, 0, sizeof(g_texture_2d.clTexture), (void *) &(g_texture_2d.clTexture)); #endif ciErrNum |= clSetKernelArg(ckKernel_tex2d, 1, sizeof(g_texture_2d.clTexture), (void *) &(g_texture_2d.clTexture)); ciErrNum |= clSetKernelArg(ckKernel_tex2d, 2, sizeof(g_texture_2d.width), &g_texture_2d.width); ciErrNum |= clSetKernelArg(ckKernel_tex2d, 3, sizeof(g_texture_2d.height), &g_texture_2d.height); ciErrNum |= clSetKernelArg(ckKernel_tex2d, 4, sizeof(g_texture_2d.pitch), &g_texture_2d.pitch); ciErrNum |= clSetKernelArg(ckKernel_tex2d, 5, sizeof(t), &t); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // launch computation kernel ciErrNum |= clEnqueueNDRangeKernel(cqCommandQueue, ckKernel_tex2d, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); ciErrNum = clFinish(cqCommandQueue); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); if(bImages == shrFALSE) { #if 0 cl_uint* ptr = (cl_uint*)clEnqueueMapBuffer(cqCommandQueue, g_texture_2d.clMem, CL_TRUE, CL_MAP_READ, 0, g_texture_2d.height * g_texture_2d.pitch * 4, 0, NULL, NULL, &ciErrNum); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); size_t dst[3] = { 0, 0, 0}; size_t region[3] = { g_texture_2d.width, g_texture_2d.height, 1}; ciErrNum = clEnqueueWriteImage(cqCommandQueue, g_texture_2d.clTexture, CL_TRUE, dst, region, g_texture_2d.width * 4, 0, ptr, 0, NULL, NULL); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); ciErrNum = clEnqueueUnmapMemObject(cqCommandQueue, g_texture_2d.clMem, (void*)ptr, 0, NULL, NULL); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); #else size_t dst[3] = { 0, 0, 0}; size_t region[3] = { g_texture_2d.width, g_texture_2d.height, 1}; ciErrNum |= clEnqueueCopyBufferToImage(cqCommandQueue, g_texture_2d.clMem /* src_buffer */, g_texture_2d.clTexture /* dst_image */, 0 /* src_offset */, dst /* dst_origin[3] */, region /* region[3] */, 0 /* num_events_in_wait_list */, NULL /* event_wait_list */, NULL /* event */); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); #endif } else { size_t origin[3] = {0,0,0}; size_t region[3] = {g_texture_2d.width, g_texture_2d.height, 1}; ciErrNum |= clEnqueueCopyImage(cqCommandQueue, tempTexture, g_texture_2d.clTexture, origin, origin, region, 0, NULL, NULL); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } ciErrNum = clFinish(cqCommandQueue); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } // ---------------------------------------------------------------- // populate the volume texture { // set global and local work item dimensions szLocalWorkSize[0] = initWorkGroupSize; szLocalWorkSize[1] = initWorkGroupSize; szGlobalWorkSize[0] = shrRoundUp((int)szLocalWorkSize[0], g_texture_vol.pitch); szGlobalWorkSize[1] = shrRoundUp((int)szLocalWorkSize[1], g_texture_vol.height); // set the args values ciErrNum |= clSetKernelArg(ckKernel_texvolume, 0, sizeof(g_texture_vol.clMem), (void *) &(g_texture_vol.clMem)); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 1, sizeof(g_texture_vol.width), &g_texture_vol.width); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 2, sizeof(g_texture_vol.height), &g_texture_vol.height); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 3, sizeof(g_texture_vol.depth), &g_texture_vol.depth); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 4, sizeof(g_texture_vol.pitch), &g_texture_vol.pitch); ciErrNum |= clSetKernelArg(ckKernel_texvolume, 5, sizeof(g_texture_vol.pitchslice), &g_texture_vol.pitchslice); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // launch computation kernel ciErrNum |= clEnqueueNDRangeKernel(cqCommandQueue, ckKernel_texvolume, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); ciErrNum = clFinish(cqCommandQueue); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); //// ONLY staging buffer works, for volume texture //// do the copy here size_t dst[3] = { 0, 0, 0}; size_t region[3] = { g_texture_vol.width, g_texture_vol.height, g_texture_vol.depth}; ciErrNum |= clEnqueueCopyBufferToImage(cqCommandQueue, g_texture_vol.clMem /* src_buffer */, g_texture_vol.clTexture /* dst_image */, 0 /* src_offset */, dst /* dst_origin[3] */, region /* region[3] */, 0 /* num_events_in_wait_list */, NULL /* event_wait_list */, NULL /* event */); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); ciErrNum = clFinish(cqCommandQueue); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } cl_mem tempFaceTexture[6]; // ---------------------------------------------------------------- // populate the faces of the cube map for (int face = 0; face < 6; ++face) { // set global and local work item dimensions szLocalWorkSize[0] = initWorkGroupSize; szLocalWorkSize[1] = initWorkGroupSize; szGlobalWorkSize[0] = shrRoundUp((int)szLocalWorkSize[0], g_texture_cube.pitch); szGlobalWorkSize[1] = shrRoundUp((int)szLocalWorkSize[1], g_texture_cube.size); cl_image_format imageFormat; imageFormat.image_channel_data_type = CL_UNORM_INT8; imageFormat.image_channel_order = CL_RGBA; tempFaceTexture[face] = clCreateImage2D(cxGPUContext, CL_MEM_WRITE_ONLY, &imageFormat, g_texture_cube.size, g_texture_cube.size, 0, NULL, &ciErrNum); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // set the args values if(bImages == shrFALSE) ciErrNum |= clSetKernelArg(ckKernel_texcube, 0, sizeof(g_texture_cube.clMem[face]), (void *) &(g_texture_cube.clMem[face])); else #if 0 ciErrNum |= clSetKernelArg(ckKernel_texcube, 0, sizeof(g_texture_cube.clTexture[face]), (void *) &(g_texture_cube.clTexture[face])); #else ciErrNum |= clSetKernelArg(ckKernel_texcube, 0, sizeof(tempFaceTexture[face]), (void *) &(tempFaceTexture[face])); #endif ciErrNum |= clSetKernelArg(ckKernel_texcube, 1, sizeof(g_texture_cube.size), &g_texture_cube.size); ciErrNum |= clSetKernelArg(ckKernel_texcube, 2, sizeof(g_texture_cube.pitch), &g_texture_cube.pitch); ciErrNum |= clSetKernelArg(ckKernel_texcube, 3, sizeof(int), &face); ciErrNum |= clSetKernelArg(ckKernel_texcube, 4, sizeof(t), &t); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); // launch computation kernel ciErrNum |= clEnqueueNDRangeKernel(cqCommandQueue, ckKernel_texcube, 2, NULL, szGlobalWorkSize, szLocalWorkSize, 0, NULL, NULL); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); ciErrNum = clFinish(cqCommandQueue); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); if(bImages == shrFALSE) { size_t dst[3] = { 0, 0, 0}; size_t region[3] = { g_texture_cube.size, g_texture_cube.size, 1}; ciErrNum |= clEnqueueCopyBufferToImage(cqCommandQueue, g_texture_cube.clMem[face]/* src_buffer */, g_texture_cube.clTexture[face]/* dst_image */, 0 /* src_offset */, dst /* dst_origin[3] */, region /* region[3] */, 0 /* num_events_in_wait_list */, NULL /* event_wait_list */, NULL /* event */); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } else { size_t origin[3] = {0,0,0}; size_t region[3] = {g_texture_cube.size, g_texture_cube.size, 1}; ciErrNum |= clEnqueueCopyImage(cqCommandQueue, tempFaceTexture[face], g_texture_cube.clTexture[face], origin, origin, region, 0, NULL, NULL); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } ciErrNum = clFinish(cqCommandQueue); shrCheckErrorEX(ciErrNum, CL_SUCCESS, pCleanup); } t += 0.1f; }