96 Replies Latest reply on Nov 4, 2013 1:33 PM by kylen Branched to a new discussion.

    What's wrong with this file?





      #ifndef __KERNEL_TYPES_H__

      #define __KERNEL_TYPES_H__



      #include "kernel_math.h"



      #include "svm/svm_types.h"






      /* constants */

      #define OBJECT_SIZE                     16

      #define LIGHT_SIZE                              4

      #define FILTER_TABLE_SIZE          256

      #define RAMP_TABLE_SIZE                    256



      /* device capabilities */

      #ifdef __KERNEL_CPU__

      #define __KERNEL_SHADING__

      #define __KERNEL_ADV_SHADING__




      #ifdef __KERNEL_CUDA__

      #define __KERNEL_SHADING__

      #if __CUDA_ARCH__ >= 200

      #define __KERNEL_ADV_SHADING__





      #ifdef __KERNEL_OPENCL__

      #define __KERNEL_SHADING__

      #define __KERNEL_ADV_SHADING__




      /* kernel features */

      #define __SOBOL__

      #define __INSTANCING__

      #define __DPDU__

      #define __UV__

      #define __BACKGROUND__

      #define __CAUSTICS_TRICKS__

      #define __VISIBILITY_FLAG__

      #define __RAY_DIFFERENTIALS__

      #define __CAMERA_CLIPPING__

      #define __INTERSECTION_REFINE__

      #define __CLAMP_SAMPLE__



      #ifdef __KERNEL_SHADING__

      #define __SVM__

      #define __EMISSION__

      #define __TEXTURES__

      #define __HOLDOUT__




      #ifdef __KERNEL_ADV_SHADING__

      #define __MULTI_CLOSURE__

      #define __TRANSPARENT_SHADOWS__

      #define __PASSES__

      #define __BACKGROUND_MIS__

      #define __AO__




      #define __MULTI_LIGHT__

      #define __OSL__

      #define __SOBOL_FULL_SCREEN__

      #define __MODIFY_TP__

      #define __QBVH__



      /* Shader Evaluation */



      enum ShaderEvalType {






      /* Path Tracing */



      enum PathTraceDimension {

                PRNG_FILTER_U = 0,

                PRNG_FILTER_V = 1,

                PRNG_LENS_U = 2,

                PRNG_LENS_V = 3,

                PRNG_BASE_NUM = 4,



                PRNG_BSDF_U = 0,

                PRNG_BSDF_V = 1,

                PRNG_BSDF = 2,

                PRNG_LIGHT = 3,

                PRNG_LIGHT_U = 4,

                PRNG_LIGHT_V = 5,

                PRNG_LIGHT_F = 6,

                PRNG_TERMINATE = 7,

                PRNG_BOUNCE_NUM = 8




      /* these flag values correspond exactly to OSL defaults, so be careful not to

      * change this, or if you do, set the "raytypes" shading system attribute with

      * your own new ray types and bitflag values.


      * for ray visibility tests in BVH traversal, the upper 20 bits are used for

      * layer visibility tests. */



      enum PathRayFlag {

                PATH_RAY_CAMERA = 1,

                PATH_RAY_REFLECT = 2,

                PATH_RAY_TRANSMIT = 4,

                PATH_RAY_DIFFUSE = 8,

                PATH_RAY_GLOSSY = 16,

                PATH_RAY_SINGULAR = 32,

                PATH_RAY_TRANSPARENT = 64,



                PATH_RAY_SHADOW_OPAQUE = 128,

                PATH_RAY_SHADOW_TRANSPARENT = 256,




                PATH_RAY_MIS_SKIP = 512,



                PATH_RAY_ALL = (1|2|4|8|16|32|64|128|256|512),



                PATH_RAY_LAYER_SHIFT = (32-20)




      /* Closure Label */



      typedef enum ClosureLabel {

                LABEL_NONE = 0,

                LABEL_CAMERA = 1,

                LABEL_LIGHT = 2,

                LABEL_BACKGROUND = 4,

                LABEL_TRANSMIT = 8,

                LABEL_REFLECT = 16,

                LABEL_VOLUME = 32,

                LABEL_OBJECT = 64,

                LABEL_DIFFUSE = 128,

                LABEL_GLOSSY = 256,

                LABEL_SINGULAR = 512,

                LABEL_TRANSPARENT = 1024,

                LABEL_STOP = 2048

      } ClosureLabel;



      /* Render Passes */



      typedef enum PassType {

                PASS_NONE = 0,

                PASS_COMBINED = 1,

                PASS_DEPTH = 2,

                PASS_NORMAL = 8,

                PASS_UV = 16,

                PASS_OBJECT_ID = 32,

                PASS_MATERIAL_ID = 64,

                PASS_DIFFUSE_COLOR = 128,

                PASS_GLOSSY_COLOR = 256,

                PASS_TRANSMISSION_COLOR = 512,

                PASS_DIFFUSE_INDIRECT = 1024,

                PASS_GLOSSY_INDIRECT = 2048,

                PASS_TRANSMISSION_INDIRECT = 4096,

                PASS_DIFFUSE_DIRECT = 8192,

                PASS_GLOSSY_DIRECT = 16384,

                PASS_TRANSMISSION_DIRECT = 32768,

                PASS_EMISSION = 65536,

                PASS_BACKGROUND = 131072,

                PASS_AO = 262144,

                PASS_SHADOW = 524288

      } PassType;



      #define PASS_ALL (~0)



      #ifdef __PASSES__



      typedef float3 PathThroughput;



      typedef struct PathRadiance {

                int use_light_pass;



                float3 emission;

                float3 background;

                float3 ao;



                float3 indirect;

                float3 direct_throughput;

                float3 direct_emission;



                float3 color_diffuse;

                float3 color_glossy;

                float3 color_transmission;



                float3 direct_diffuse;

                float3 direct_glossy;

                float3 direct_transmission;



                float3 indirect_diffuse;

                float3 indirect_glossy;

                float3 indirect_transmission;



                float4 shadow;

      } PathRadiance;



      typedef struct BsdfEval {

                int use_light_pass;



                float3 diffuse;

                float3 glossy;

                float3 transmission;

                float3 transparent;

      } BsdfEval;






      typedef float3 PathThroughput;

      typedef float3 PathRadiance;

      typedef float3 BsdfEval;






      /* Shader Flag */



      typedef enum ShaderFlag {

                SHADER_SMOOTH_NORMAL = (1 << 31),

                SHADER_CAST_SHADOW = (1 << 30),

                SHADER_AREA_LIGHT = (1 << 29),




      } ShaderFlag;



      /* Light Type */



      typedef enum LightType {






      } LightType;



      /* Camera Type */



      enum CameraType {







      /* Differential */



      typedef struct differential3 {

                float3 dx;

                float3 dy;

      } differential3;



      typedef struct differential {

                float dx;

                float dy;

      } differential;



      /* Ray */



      typedef struct Ray {

                float3 P;

                float3 D;

                float t;



      #ifdef __RAY_DIFFERENTIALS__

                differential3 dP;

                differential3 dD;


      } Ray;



      /* Intersection */



      typedef struct Intersection {

                float t, u, v;

                int prim;

                int object;

      } Intersection;



      /* Attributes */



      typedef enum AttributeElement {






      } AttributeElement;



      /* Closure data */



      #define MAX_CLOSURE 8



      typedef struct ShaderClosure {

                ClosureType type;

                float3 weight;



      #ifdef __MULTI_CLOSURE__

                float sample_weight;




      #ifdef __OSL__

                void *prim;


                float data0;

                float data1;




      } ShaderClosure;



      /* Shader Data


      * Main shader state at a point on the surface or in a volume. All coordinates

      * are in world space. */



      enum ShaderDataFlag {

                /* runtime flags */

                SD_BACKFACING = 1,                    /* backside of surface? */

                SD_EMISSION = 2,                    /* have emissive closure? */

                SD_BSDF = 4,                              /* have bsdf closure? */

                SD_BSDF_HAS_EVAL = 8,          /* have non-singular bsdf closure? */

                SD_BSDF_GLOSSY = 16,          /* have glossy bsdf */

                SD_HOLDOUT = 32,                    /* have holdout closure? */

                SD_VOLUME = 64,                              /* have volume closure? */



                /* shader flags */

                SD_SAMPLE_AS_LIGHT = 128,                              /* direct light sample */

                SD_HAS_SURFACE_TRANSPARENT = 256,          /* has surface transparency */

                SD_HAS_VOLUME = 512,                                        /* has volume shader */

                SD_HOMOGENEOUS_VOLUME = 1024                    /* has homogeneous volume */




      typedef struct ShaderData {

                /* position */

                float3 P;

                /* smooth normal for shading */

                float3 N;

                /* true geometric normal */

                float3 Ng;

                /* view/incoming direction */

                float3 I;

                /* shader id */

                int shader;

                /* booleans describing shader, see ShaderDataFlag */

                int flag;



                /* primitive id if there is one, ~0 otherwise */

                int prim;

                /* parametric coordinates

                 * - barycentric weights for triangles */

                float u, v;

                /* object id if there is one, ~0 otherwise */

                int object;



      #ifdef __RAY_DIFFERENTIALS__

                /* differential of P. these are orthogonal to Ng, not N */

                differential3 dP;

                /* differential of I */

                differential3 dI;

                /* differential of u, v */

                differential du;

                differential dv;


      #ifdef __DPDU__

                /* differential of P w.r.t. parametric coordinates. note that dPdu is

                 * not readily suitable as a tangent for shading on triangles. */

                float3 dPdu, dPdv;




      #ifdef __MULTI_CLOSURE__

                /* Closure data, we store a fixed array of closures */

                ShaderClosure closure[MAX_CLOSURE];

                int num_closure;

                float randb_closure;


                /* Closure data, with a single sampled closure for low memory usage */

                ShaderClosure closure;




      #ifdef __OSL__

                /* OSL context */

                void *osl_ctx;


      } ShaderData;



      /* Constrant Kernel Data


      * These structs are passed from CPU to various devices, and the struct layout

      * must match exactly. Structs are padded to ensure 16 byte alignment, and we

      * do not use float3 because its size may not be the same on all devices. */



      typedef struct KernelCamera {

                /* type */

                int type;

                int pad1, pad2, pad3;



                /* matrices */

                Transform cameratoworld;

                Transform rastertocamera;



                /* differentials */

                float4 dx;

                float4 dy;



                /* depth of field */

                float aperturesize;

                float blades;

                float bladesrotation;

                float focaldistance;



                /* motion blur */

                float shutteropen;

                float shutterclose;



                /* clipping */

                float nearclip;

                float cliplength;



                /* more matrices */

                Transform screentoworld;

                Transform rastertoworld;

                Transform ndctoworld;

                Transform worldtoscreen;

                Transform worldtoraster;

                Transform worldtondc;

                Transform worldtocamera;

      } KernelCamera;



      typedef struct KernelFilm {

                float exposure;

                int pass_flag;

                int pass_stride;

                int use_light_pass;



                int pass_combined;

                int pass_depth;

                int pass_normal;

                int pass_pad;



                int pass_uv;

                int pass_object_id;

                int pass_material_id;

                int pass_diffuse_color;



                int pass_glossy_color;

                int pass_transmission_color;

                int pass_diffuse_indirect;

                int pass_glossy_indirect;



                int pass_transmission_indirect;

                int pass_diffuse_direct;

                int pass_glossy_direct;

                int pass_transmission_direct;



                int pass_emission;

                int pass_background;

                int pass_ao;

                int pass_shadow;

      } KernelFilm;



      typedef struct KernelBackground {

                /* only shader index */

                int shader;

                int transparent;



                /* ambient occlusion */

                float ao_factor;

                float ao_distance;

      } KernelBackground;



      typedef struct KernelSunSky {

                /* sun direction in spherical and cartesian */

                float theta, phi, pad3, pad4;



                /* perez function parameters */

                float zenith_Y, zenith_x, zenith_y, pad2;

                float perez_Y[5], perez_x[5], perez_y[5];

                float pad5;

      } KernelSunSky;



      typedef struct KernelIntegrator {

                /* emission */

                int use_direct_light;

                int use_ambient_occlusion;

                int num_distribution;

                int num_all_lights;

                float pdf_triangles;

                float pdf_lights;

                int pdf_background_res;



                /* bounces */

                int min_bounce;

                int max_bounce;



                int max_diffuse_bounce;

                int max_glossy_bounce;

                int max_transmission_bounce;



                /* transparent */

                int transparent_min_bounce;

                int transparent_max_bounce;

                int transparent_shadows;



                /* caustics */

                int no_caustics;



                /* seed */

                int seed;



                /* render layer */

                int layer_flag;



                /* clamp */

                float sample_clamp;



                /* padding */

                int pad;

      } KernelIntegrator;



      typedef struct KernelBVH {

                /* root node */

                int root;

                int attributes_map_stride;

                int pad1, pad2;

      } KernelBVH;



      typedef struct KernelData {

                KernelCamera cam;

                KernelFilm film;

                KernelBackground background;

                KernelSunSky sunsky;

                KernelIntegrator integrator;

                KernelBVH bvh;

      } KernelData;






      #endif /*  __KERNEL_TYPES_H__ */

        • Re: What's wrong with this file?

          where can we download blender cycles? Maybe it's easier to answer your question after we know how this file causes trouble.

            • Re: What's wrong with this file?

              Thanks for your response, this is the folder with the kernel file(s)...found in the folder called kernel

                • Re: What's wrong with this file?

                  Hi Nazim,


                  I am able to compile the "kernel.cl" and create a kernel object (clCreateKernel) for "kernel_ocl_path_trace".

                  I see that one of the kernels is commented in "kernel.cl" (kernel_ocl_shader). Hope this is intended.



                  I am on a windows 7 64-bit system with AMD Cayman GPU.

                  But the application is 32-bit only.

                  AMD APP 2.8 (32-bit) is installed on my machine

                  The driver version I see in device manager is

                  The driver packaging version I see in AMD control center is 9.002-120928m-xxxxxxxx

                  Catalyst version 12.10



                  While compiling kernel.cl, the maximum memory usage of the application was ~525MB.

                  How much memory do you have on your machines?

                  I will post the exact code snippets that I used in the next post.


                  Best Regards,

                  Work Item 6

                    • Re: What's wrong with this file?

                      Here is a copy of the host code that I used to compile in Windows using Visual Studio 2010. Note that OCL is a helper C++ class that I use for internal use. It just avoids the hassle of complex initialization.


                      #include <Windows.h>
                      #include <WinBase.h>
                      #include <ocl.hpp>
                      #include <iostream>
                      int main()
                           ocl::platform_t platformType = ocl::platform_t::amd;
                           ocl::device_t deviceType = ocl::device_t::gpu;
                           ocl O(platformType, deviceType);
                           HANDLE fileHandle = CreateFile("kernel.cl", GENERIC_READ, FILE_SHARE_READ, NULL, 
                                                                   OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
                           HANDLE mapHandle = CreateFileMapping(fileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
                           LPVOID fileContent = MapViewOfFile(mapHandle, FILE_MAP_READ, 0, 0, 0);
                           DWORD size = GetFileSize(fileHandle, NULL);
                            cl_int err = O.buildKernel((const char*)fileContent, "kernel_ocl_path_trace", "-I." );
                            std::cout << "Build Kernel Returned " << err << std::endl;
                            return 0;


                      The buildKernel in OCL wrapper does this:

                          int err;
                          char* buildLog = NULL;
                          size_t buildLogSize = 0;
                          program = clCreateProgramWithSource(context, 1, &kernelString, NULL, &err);
                          if(err != CL_SUCCESS)
                              #ifndef PRINT_MUTE
                                  std::cerr << " ocl: couldn't create program from the provided kernel string " << err << std::endl;
                              return err;
                          err = clBuildProgram(program, 1, &device_id, buildOptions, NULL, NULL);
                          if(err != CL_SUCCESS)
                              clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize);
                              buildLog = (char*)(::malloc)(buildLogSize+1);
                              #ifndef PRINT_MUTE
                                  std::cerr << " ocl: Failed to build program executable Build log Size:" << buildLogSize << "\tBuild Log:\n";
                              clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL);
                              #ifndef PRINT_MUTE
                                  std::cerr << buildLog << std::endl;
                              return err;
                          kernel = clCreateKernel(program, kernelFuncName, &err);
                          if ( (!kernel) || (err != CL_SUCCESS) )
                              #ifndef PRINT_MUTE
                                  std::cerr << "ocl: kernel could not be created " << err << std::endl;
                              return err;
                          return err;
                • Re: What's wrong with this file?

                  Hia, the developer of blender cycles, has responded to these changes...


                  If you compile it like this, most features will be disabled, just

                  giving you an ambient occlusion style display without any materials.

                  This is known to work on AMD cards, no surprises there. But if you

                  enable more features (with e.g. __KERNEL_OPENCL_AMD__), that's when

                  you run into problems.

                    • Re: What's wrong with this file?

                      Hello Nazim,


                      Will the steps provided by Nou sufficient to repro this problem?

                      Please help me reproduce the problem. I will definitely work with you to get this resolved.

                      Is the behaviour observed in Linux or windows? Or both?

                      32 or 64bit? or both?


                      By __KERNEL_OPENCL_AMD__, Do you mean a "Compiler" option that I can enable in my previous repro attempt?



                      Best Regards,

                      Workitem 6

                        • Re: What's wrong with this file?



                          Heres the link to the full application....


                          please download the blender 2.64a version...for either linux or windows

                          On either platform this problem re-occurs...

                          once downloaded unzip open "blender.exe"

                          on the bar at the top change blender internal(drop down menu) to cycles

                          press spacebar on keyboard type "user preferences"

                          go to the "system"steps.png

                          change the device from "CPU" to "OPENCL"



                          ONCE YOU CLICK RENDER WAIT...


                          (HOPE YOUR USING WINDOWS VISTA AND ABOVE)

                            • Re: What's wrong with this file?

                              Thanks for the detailed steps. I will first confirm the repro and then will forward the issue to the correct people. Thanks!

                                • Re: What's wrong with this file?



                                  I want to confirm issues with OpenCL on Ubuntu x64 with HD7950 Tahiti card.

                                  My specs:

                                  FX-8350 CPU

                                  Sapphire HD7950 OC boost

                                  32GB RAM

                                  Ubuntu 12.10 Linux 64bit

                                  Latest SVN version of blender


                                  Compilation of kernel takes also looong time and memory here. Additional issue is startup time of the finished OpenCL kernel and its memory usage (my memory usage jumps to 30GB of RAM before start of render) until it actually starts doing something. Then rendering is fast enough but together with loading/startup time it takes longer than CPU render.

                                  Very simple scene takes 1 minute to render on CPU and 4 minutes on GPU !!!!


                                  When you read some blogs and forums about issue of Blender + OpenCL conclusion of Blender development team is OpenCL is unusable for out purposes. This way AMD is loosing considerable number of customers to NVidia. Blender is very popular SW used also in professional 3D production and all of those guys buy NVidia cards to get GPU acceleration.


                                  I do not know if kernel is poorly written or it is problem of OpenCL compiler or AMD driver, but "official" statement from Blender devs is AMD compiler doesn't work for bigger kernels + some other minor reasons. Very disappointing for all of us AMD guys out there.


                                  If you need some help from me (additional tests or more detailed report) I am willing to invest some time to make this work. Just don't have experience with OpenCL.

                                • Re: What's wrong with this file?

                                  Hi Nazim,


                                  I could reproduce the problem on my Windows 64 setup.

                                  I clicked on "view more details" in the windows error message and got crash details.

                                  See the excerpt below. The fault module indicates the OpenCL runtime.


                                  Will forward this to an appropriate team for a detailed analysis. btw, when blender crashed, it was taking around 770MB of RAM.

                                  My system has 4GB RAM in it.... and there were still free pages left



                                  Problem signature:


                                  Problem Event Name: APPCRASH

                                  Application Name: blender.exe

                                  Application Version:

                                  Application Timestamp: 5074808c

                                  Fault Module Name: amdocl64.dll

                                  Fault Module Version: 10.0.1016.4

                                  Fault Module Timestamp: 5065fc35

                                  Exception Code: 40000015

                                  Exception Offset: 0000000000f53679

                                  OS Version: 6.1.7600.


                                    • Re: What's wrong with this file?

                                      Hi at my place the OpenCL kernel is working. Problem is it takes long time to compile and takes 31 GB of RAM out of 32. I need to compile without any other software on (as I do not have swap on my system). But after compilation it seems to be working just fine.

                                        • Re: What's wrong with this file?


                                          I tried this on a machine with 4GB of RAM... I followed the repro case given by Nazim (the one with screenshots).

                                          I was not using any external blend file. Just the default one that Blender starts with (in line with Nazim's steps for repro)

                                          The application consumed some 800MB of RAM before crashing and there was still some free memory left.

                                          I am using 12.10 Cat Driver on Windows 7, 64-bit machine.


                                          AMD's engineering team is already aware of this issue and are tracking the same. FYI.
                                          At the moment, there is no known workaround.

                                          However, if the Blender team is open to optimizing their code for AMD HW, the engg team may be able to suggest some changes. 

                                          For example, the VRay App that suffers from a similar issue got some massive improvements by moving to native functions. 

                                          Hope this helps,

                                • Re: What's wrong with this file?

                                  Hello ladies and gentlemen,

                                  thank you for your interest in this issue.

                                  @developer the amount ram used by blender cycles exceeds the 512mb you mention

                                  @underhood can you take a couple of screen shots: task manager, user preferences and other things that may justify blender cycles working

                                  @nou if AMD are able to create a patch for this software can they do this please...

                                  that would be awsome...


                                  PS i've recently brought a Geforce card(i didn't want but had to due to some work) would it be possible to use nvidia's opencl implementation/compiler with AMD GPUs i.e. so that i could utilize the power of all my opencl gpus...

                                  PSS with the latest drivers 13.1 CCC a new error comes up "insuficient private resources" with this new error the AMD compiler aborts compilation half way

                                  PSSS can opencl use windows 7 swap partitions...

                                  and finaly i will be posting a video which will further clarify the steps to reproduce the issue

                                  THANK YOU

                                    • Re: What's wrong with this file?

                                      PS i've recently brought a Geforce card(i didn't want but had to due to some work) would it be possible to use nvidia's opencl implementation/compiler with AMD GPUs i.e. so that i could utilize the power of all my opencl gpus...


                                      NVIDIA and AMD GPUs cannot be accessed using just NVIDIA OpenCL SDK. They will only show up in their own platforms, if an OpenCL implementation is found for them.

                                      • Re: What's wrong with this file?


                                        I think the issue that Underhood is talking about is different from what you are mentioning.

                                        The issue that I reproduced is by following your steps listed in one of your prev replies (with screenshots)


                                        Also, I dont quite understand what you meant by "OpenCL using swap partitions". If you are just talking about the memory usage - The Swap is managed by OS and is not visible to applications and libraries. So, openCL will not directly deal with Swap

                                          • Re: What's wrong with this file?

                                            Hi i know swap is system thing. I just don't use swap on my system as I have 32GIGs of RAM. When Linux finds out it is out of memory then OOM killer kicks in and kills process that is using most of RAM (in this case AMD opencl compiler). As on my system it was using 31 GIGs of RAM so i had to close all other apps so I have enough memory.


                                            Anyway since update to latest stable linux driver from amd website it crashes anyway so basiclly with new driver it is not working at all.


                                            I will investigate more when I am back on my main workstation pc as i travel a lot

                                        • Re: What's wrong with this file?

                                          Bumping in hopes of a resolution for this inexcusable situation. Cycles has been out for over a year and somehow amd still hasn't been able to update a silly driver to support some subset of the API. Just plain sad. If this issue is not resolved by the time nvidia 700 series come out, I'll have no choice but to change. This is not a threat nor a tantrum, I am just expressing how I feel here, I would very much like to continue with ATI, I just wont have a choice.

                                          Anyway, BUMP for great justice

                                          • Re: What's wrong with this file?

                                            this is working here without no problem

                                            • Re: What's wrong with this file?


                                              I believe the problem occurs due to limitations in the current driver from AMD because the Blender Cycles render using Nvidia OpenCL almost as well as CUDA.



                                              Germano Cavalcante

                                                • Re: What's wrong with this file?


                                                  I have just got access to a test-driver. Will be posting an update soon. Thanks for your patience.

                                                    • Re: What's wrong with this file?

                                                      AMD really need fix this as developer of LuxRender found similar issue when he implemented recursive procedural materials into LuxRender.

                                                      • Re: What's wrong with this file?

                                                        Since we are on the subject, I think I may help you debug this issue once and for all:


                                                        The bug is about an OpenCL implementation on a Render Engine (Cycles) in Blender


                                                        Versions of Blender that have this bug:

                                                        Blender 2.60

                                                        Blender 2.61

                                                        Blender 2.62

                                                        Blender 2.63

                                                        Blender 2.64

                                                        Download of Blender: Official Blender Releases

                                                        In version 2.65 and probably on the next release too, OpenCL is / will be deactivated because of this bug.


                                                        Operational Systems known to have this Bug:

                                                        Windows 8: x86, x86_64

                                                        Windows 7: x86, x86_64

                                                        Windows XP:  x86, x86_64

                                                        Ubuntu 12.04 LTS:  x86, x86_64

                                                        Ubuntu 12.10:  x86, x86_64

                                                        Mac OSX


                                                        Thread on Blender forums about this bug:

                                                        A good news for AMD/ATI Graphic Cards Owners

                                                        Cycles improvements on AMD with Public Driver Release


                                                        File to test the bug:
                                                        Mike Pan BMW Scene
                                                        But any file should reproduce the bug. Even the startup cube when set to render in Cycles


                                                        Hardware that the bug shows:

                                                        AMD / ATI GPU cards OpenCL


                                                        Notice that this bug DOES NOT APPEAR on NVidia GPU OpenCL implementation neither on Intel/AMD CPU OpenCL implementations.


                                                        Latest driver tested that show the bug:

                                                        Catalyst Software Suit 13.1 for any OS

                                                        Catalyst Software Suit 13.2 Beta 6 for any OS


                                                        Steps to reproduce Bug #1:

                                                        Open the desired blender scene;

                                                        On the top middle, change from Blender Render to Cycles Render;

                                                        On the top left, go to File > User preferences > System;

                                                        Choose OpenCL; (now you can either close this windows or save)

                                                        On the left middle, change the feature Se from Supported to Experimental (Now you are compiling with OpenCL)

                                                        Right bellow, change CPU to GPU Compute;

                                                        Click on Render, a little above;


                                                        Description of Bug #1 (May be more than only one bug, don't know):

                                                        Long time to compile the OpenCL kernel.

                                                        RAM usage going to the maximum, even crashing the program.

                                                        Scene not rendering (Black frame);

                                                        Sometimes, a scene may render (Depends on the situation - Read the forums mentioned above) but the colors are wrong.

                                                        OpenCL kernel compiling for each frame (If its an animation).

                                                        OpenCL kernel compiling each time you click on render.

                                                        Long time of wait between the finished OpenCL kernel and the startup of the Render (Have no Ideia of what this might be, but only happens on AMD GPUs)


                                                        Bug #2:

                                                        Textures does not work with AMD GPUs with OpenCL.

                                                        This bug is more dificult to reproduce, but nevertheless it's there. Textures does not work with AMD/ATI but work with other OpenCL implementations;


                                                        I hope this info helps you guys solve this issues. Blender is a very powerfull Open Source engine and it would be nice to see some support from your end to make it work properly on your hardware. Any questions, I'm here to answer.

                                                          • Re: What's wrong with this file?

                                                            From the preliminary test of the bootleg driver, I hear from the testers that mikepan is working without crashing.

                                                            I will confirm this again tomorrow (its a local holiday here today)

                                                            Textures stuff is new. we need a repro case for that...

                                                              • Re: What's wrong with this file?

                                                                Hi himanshu!


                                                                I've made a little research. Textures problems were related to older versions (2.62 and before), and since 2.63 and above are not working with AMD/ATI OpenCL compilers at all (Some trick can be made here, but it didn't have the corrections of 2.63), I can't say for sure that textures aren't working. Maybe if you could test with your driver.


                                                                Simple image texture on the default cube, rendered with Cycles on OpenCL for AMD. It's quite simple and there's a lot of tutorials out there. Remember to test on versions 2.63 or up.


                                                                We all apreciate you effords, very much!





                                                              • Re: What's wrong with this file?

                                                                Hi mo92,

                                                                I guess we are still stuck with the large memory requirements of the blender software. Although, the software is no longer crashing(with internal drivers), it is giving Errors while OpenCL compilation.


                                                                I ran the blender software without BMW file. I checked the commandline window showing the log. It can be opened from the window menu in the blender. It gives "OpenCL compilation failed : Insufficient Private Resources!"


                                                                I ran the Blender with Mike’s BMW file, and the program gives the same error again.


                                                                I have not been able to see the rendered BMW using OpenCL. Blender always shows black screen, which looks reasonable as kernel compilation itself is failing. We are still working on solving this issue. Appreciate your patience on this.

                                                                  • Re: What's wrong with this file?

                                                                    This happens in the AMD drivers available for the public also. The software does no crashing if you wait until the end. It gives "OpenCL compilation failed : Insufficient Private Resources!" too

                                                                    • Re: What's wrong with this file?

                                                                      Thank you and the Engineers behind, looking to fix these issues.


                                                                      My guess would be memory leaking, from the OpenCL compiler. Perhaps it would be easier to debug the compiler by looking into Blender code, compiling it and deactivate piece by piece. Someone within the Blender developers should be able to provide assistance, if required.


                                                                      If you, or any of the Engineers could get in touch with the main Cycles developer, Brecht van Lommel, it might be easier to debug also.


                                                                      I can't state my appreciation for what you are doing for us. Thank you!





                                                                        • Re: What's wrong with this file?

                                                                          Hi Marcelo,


                                                                          You are most welcome. We can't thank you all enough for extending support and more imporatntly for your patience.

                                                                          I will venture into blender forums and see if I can connect the developer with the engineering team.

                                                                          May be, they could solve this easily.


                                                                          I have one last question:

                                                                          1. I know Blender is written for CUDA as well. But, Have you ever compiled this for NVIDIA's OpenCL platform?

                                                                              Can you share your results?



                                                                            • Re: What's wrong with this file?

                                                                              Hi Himanshu,

                                                                              The threads below shows the status and compares the rendering speed between CUDA and Nvidia's OpenCL on various platforms.

                                                                              It may be useful for research.







                                                                              • Re: What's wrong with this file?

                                                                                Sorry, I no longer have nVidia GPU cards.


                                                                                Have you successfully contacted Bretch? Thanks!

                                                                                  • Re: What's wrong with this file?

                                                                                    Yet to contact Bretch. Will do..Thanks.

                                                                                      • Re: What's wrong with this file?

                                                                                        Have passed on Brecht's contact details to the engineering team looking into this issue.


                                                                                        I think the team understands the reason why the compiler is running out of resources.

                                                                                        They will touch base with Brecht, if needed.



                                                                                          • Re: What's wrong with this file?

                                                                                            (Forgive me i'm a native french speaker)


                                                                                            Hi Himanshu thanks a lot for efforts mad to make cycles work on AMD GPU. Maybe AMD engineer think 3D rendering isn't a good or big buisness and blender is opensource. But there is many reason to try make things work well for this software.


                                                                                            1/ many guy that work with 3D software are gamers that pay lot of dollars for high gpu perf (I'am a gamers and many guy i know in computer graphics are gamer too)


                                                                                            2/ For consumers AMD is an opencl prophet with it APU and HSA promotion. it is sad to see AMD work bad with opencl when Nvidia and intel work.


                                                                                            3/ AMD GCN arch are very good device for computing but hardware optimisation are not enougth driver support are needed.


                                                                                            4/ Many little studio are waiting for AMD opencl support in blender cycles to build new configs.My studio Too.


                                                                                            PLEASE PLEASE delivere US !


                                                                                            thank a lot Himanshu

                                                                                              • Re: What's wrong with this file?

                                                                                                Hi Sharly,

                                                                                                As I understand, AMD engineers too share the same technical view that some1 shared above (lot of inlining...)

                                                                                                There is an internal problem report and people are working on it.

                                                                                                So, the issue is being looked at now and we are not going to leave you guys in the cold.

                                                                                                The URL of this thread is under track and I will get back and post an update --whenever it is available.

                                                                                                Things will turn around.

                                                                                                Meantime, we truly appreciate your support and patience on this,

                                                                                                  • Re: What's wrong with this file?

                                                                                                    Thanks a lot Himanshu.


                                                                                                    Today I've make some research to see in deeply way why AMD card don't work with many complex renderer:









                                                                                                    the reason seems to bee more than a simple opencl driver bug (optimisation).The Hadware too, need more improvement .and opencl is more difficult to set than Cuda.


                                                                                                    look at the last answer down of the post from "grimm"




                                                                                                    Brecht said that they need more hardware optimisation to:




                                                                                                    << We will need major driver or hardware improvements to get full cycles support on AMD hardware>>


                                                                                                    if one days all theses renderer are full supported By AMD I will celebrate with lot of CG one my PC.


                                                                                                    Wait and see maybe all will be fix soon.

                                                                                                      • Re: What's wrong with this file?
                                                                                                        the reason seems to bee more than a simple opencl driver bug (optimisation).The Hadware too, need more improvement .and opencl is more difficult to set than Cuda.


                                                                                                        Is there a reason why you say "hardware improvement" is needed?

                                                                                                        I don't see NVIDIA hardware radically different from AMD hardware.

                                                                                                        Except for the dynamic parallelism, possibly aimed at imbalanced workload - I dont see any major differences.

                                                                                                        In fact AMD hardware deliver much more GFLOPs and memory bandwidth than NVIDIA.


                                                                                                        Please share with us any hardware improvements - that you think is necessary from AMD's side.


                                                                                                        The big-kernel and function inlining are the chief problms - that result in "insufficient private resources" message.

                                                                                                        People are working to fix this. If this is fixed, i have very little doubts about AMD OpenCL running these renders as good as anybody else, if not better.

                                                                                                          • Re: What's wrong with this file?

                                                                                                            I also believe no hardware improvements is needed.


                                                                                                            AMD's approach is very good at it's hardware level. On a personal note: For me, it is always more stable.


                                                                                                            About it's processor power on GPGPU, aplications that OpenCL does work (simpler ones), results are as good as nVidia and Intel, if not better, as said. This could be verified easily, with LuxMark and other apps. And that is keeping good performance also at graphical levels (gaming).


                                                                                                            AMD's issue is the way their OpenCL compiler works, from my point of view.


                                                                                                            I don't know if Himanshu is not telling us , but I believe is that for AMD driver to be able to handle complex OpenCL codes, like Cycles and other render engines, it's going to need MAJOR changes, things that *might* take months .



                                                                                                            From my personal experience, AMD has better hardware, drivers and support than any other company out there. I think that the issue with Cycles is now being repassed to proper Support and Engineers, as Himanshu tell us. That's why it could have gone so long without being fixed.


                                                                                                            Please keep us updated about the status on this matter Himanshu. I can guarantee you there are tons of people out there wanting this to happen


                                                                                                            Regards, Marcelo.

                                                                                                            • Re: What's wrong with this file?

                                                                                                              Hello guy ! I'm fine!

                                                                                                              But I see that it make month and there no news about our matter.So I want to suggest something.

                                                                                                              To help us and all the CG community wait we must get some little info about how days after days people are working around to find solution.little info like comment on a work in progress.


                                                                                                              infos between AMD devs and BF (Blender foundation) collaboration.some news about prototype test or more little fings like Roadmap.


                                                                                                              pleae don't worry about this I only want to trust in AMD future.

                                                                                                                • Re: What's wrong with this file?


                                                                                                                  I can guarantee you that active work is happening here. Fixing cycles involves work at both OpenCL compiler level and also in layers beneath it.

                                                                                                                  The work is pretty involved and will take a fair amount of time.


                                                                                                                  I have not got any timelines from AMD engineers. But it looks like this is going to take a while.

                                                                                                                  Please bear with us.

                                                                                                              • Re: What's wrong with this file?

                                                                                                                As I understand problem is that all function calls are inlined. But there exist CALL instruction http://developer.amd.com/wordpress/media/2012/10/R600-R700-Evergreen_Assembly_Language_Format.pdf so it just compiler problem.

                                                                                                                  • Re: What's wrong with this file?

                                                                                                                    I'M Very Happy if it's only opencl code matter very happy ! Thanks you Guy for clarification.


                                                                                                                    I'm newbie in hardware Arch way and my first opinion about AMD device since GCN is that they have solid hardware but some little matter of driver support that can change radically how people see them on the market.however in my previous post I've simply report BRECHT VAN LOMMEL words and the link from wiki.

                                                                                                                    Thank for help us.

                                                                                                                      • Re: What's wrong with this file?

                                                                                                                        Hey there,
                                                                                                                        I read this thread and im very happy that something is going on at this problem. I'm planning to buy a graphics card in the near future and I would love seing the AMD drivers working with Cycles. I'm curiosly observing this thread every day. If the compiler will work with Cycles I'm definitely going for an AMD Card
                                                                                                                        Just wanted to let you know that there are other people out there craving for a solution! Thanks for the efforts!

                                                                                                                          • Re: What's wrong with this file?

                                                                                                                            Hi All,

                                                                                                                            AMD Engineers are working ******* making blender (and many other similar softwares) to work on AMD GPUs. I will post here  if there is any progress.

                                                                                                                              • Re: What's wrong with this file?

                                                                                                                                I don't know if helps but I want to tell my experience with the Latest Beta Driver 13.3

                                                                                                                                The card AMD Radeon HD 7570 no longer detects OpenCL

                                                                                                                                The Intel (R) HD Graphics 4000 is now detecting OpenCL (instead of AMD)

                                                                                                                                Smalllux GPU (another renderer that works with OpenCL) is still working perfectly.

                                                                                                                                Attached a picture with the strangeness.

                                                                                                                                OpenCL Error 2,1.png

                                                                                                                                  • Re: What's wrong with this file?

                                                                                                                                    Hi Germano,

                                                                                                                                    Thanks for reporting. I have forwarded the issue to the driver team. Hopefully, it will get fixed.

                                                                                                                                      • Re: What's wrong with this file?

                                                                                                                                        If this gets fixed, will Blender Cycles work on present AMD cards or will it anyway only be fixed in the next generation?

                                                                                                                                          • Re: What's wrong with this file?

                                                                                                                                            As I understand from AMD engineers, this is basically software-stack issue. So, most likely that after the fix, things should work.

                                                                                                                                            But I cannot guarantee this.

                                                                                                                                              • Re: What's wrong with this file?

                                                                                                                                                Could you please give us an update on progress made so far, if any? It's been almost a month since the last update now, and I would like to know more.

                                                                                                                                                • Re: What's wrong with this file?

                                                                                                                                                  great news all friends! after many try and retry session and moving  from one render engine to another I finally find the holy grail of lightspeed rendering under AMD GCN GPU.of course i try indigo render RT / Luxrender / and finally SLG. but who's the winner.


                                                                                                                                                  I run the same scène under each render engine and for the same quality (less noisy image) i've got :


                                                                                                                                                  Luxrender : 8 hours ( in hybridpath cpu+gpu) with sharp fireflies


                                                                                                                                                  INDIGO RT: 6 hours (hybridpath too)


                                                                                                                                                  SmallluxGPU 3.0:  5 min ( pathocl mod  Full Gpu)


                                                                                                                                                  I7 2600/HD 7950 royalking/16gb ddr3.


                                                                                                                                                  all the test are mad with only one of the GPU because i must change my motherboard and my power supply before setting the second HD 7950 in the beast.

                                                                                                                                                  I will post rendered image and tutorial about SLG later. but i think every AMD user must try it.very awesome.



                                                                                                                                                  for example the HD 7950 is 6X faster than my leadtek GTX 560 non ti.and 15X faster than my i7 2600.

                                                                                                                                                    • Re: What's wrong with this file?

                                                                                                                                                      I love you man.


                                                                                                                                                      Go here: http://www.luxrender.net/en_GB/standalone


                                                                                                                                                      Download the OpenCL archive for your operating system. Extract that somewhere you can easily find it, you'll need to find it again in Blender.

                                                                                                                                                      Now open up Blender. "File" -> "User Preferences" (Control + Alt + U) -> "Addons" tab -> "Install from file" (at the bottom).

                                                                                                                                                      Browse to the folder you just extracted, select the zip file.

                                                                                                                                                      My version is called "LuxBlend26_1.2.1_64bit_OpenCL.zip"

                                                                                                                                                      Now make sure it is enabled by making sure that it is ticked. If it does not show up automagically, click on the "Render" button at the left. "LuxRender" will show up in the list.

                                                                                                                                                      When that is done, do the same, but with "render_smallluxgpu.py". Make sure that is ticked as well.

                                                                                                                                                      Next, in Blender, at the top, next to the "Scene" textbox, change the render engine to "SmallLuxGPU"

                                                                                                                                                      At the right, in the "Render" tab, under "Full path to SmallLuxGPU's executable", enter the the path to "LuxRender_64_OpenCL\slg3.exe".

                                                                                                                                                      Next, under "Full path where the scene is exported", enter the path for the exported scenes to be stored. Entering "//" here will store the exported files in the directory of your Blender .blend.

                                                                                                                                                      If the "Rendering Type" is not yet set to "PathOCL", make sure it is. You could also try other "Rendering Types".

                                                                                                                                                      Now render to your heart's content!


                                                                                                                                                      Should you get any error messages, make sure you have installed the AMD APP SDK, which you can find here: http://developer.amd.com/tools-and-sdks/heterogeneous-computing/amd-accelerated-parallel-processing-app-sdk/


                                                                                                                                                      Any other error messages, you can Google around.


                                                                                                                                                      Edit: you have to add a sun lamp though: Shift+A -> Lamp -> Sun (S). In the "Object Data" on the right (sun icon), make sure "Sky" is enabled.

                                                                                                            • Re: What's wrong with this file?

                                                                                                              I really hope this issue gets resolved soon. I'm getting ready to replace my amd radeon hd 4870 (It's getting long in the tooth). One of the things that will effect my next purchase for a video card will be Cycles support (I might be forced to go with nvidia, I don't like that idea). Anyway, thanks for all the effort so far guys!

                                                                                                                • Re: What's wrong with this file?

                                                                                                                  oh my god if only this get fixed soon imagine what event it will be on CG community. for it will be the time to build a beast powered by AMD. what great deal i dream every days about it.


                                                                                                                  1 W8000    for    display  large wide heavy 3D scene from blender an 3DSMAX

                                                                                                                  2 HD8990   for    very very fast rendering  on cycles octane indigo arion LUXRENDER oh my god

                                                                                                                  1 cpu            the best of AMD


                                                                                                                  my employer are waiting for this to change every config in our studio.

                                                                                                                    • Re: What's wrong with this file?

                                                                                                                      finally i've got two HD 7950 from club 3D for luxrender and try to give a chance to AMD device. I hope that I will never regret it. already i 'm in a testing stage. will send feedback later. if this experimentation outperform my old blender + cycles pippeline it will be great.


                                                                                                                      blender+luxrender under : I7 2600 /16 gb ddr3/2X HD 7950

                                                                                                                      • Re: What's wrong with this file?

                                                                                                                        Hi, I'm new here but I've been following this thread since summer.


                                                                                                                        To sharlybg, I've noticed that you keep mentioning Octane Render, and it seems to me you are waiting for AMD to fix their problems with OpenCL so you can use Octane. Is my assumption correct?


                                                                                                                        But I'm sorry but AMD cannot do anything for you right now in that matter.  Octane Render is a CUDA software, and it requires an nvidia card.


                                                                                                                        The are three ways you can get Octane to work on your AMD GPUs. One is if AMD decided to support CUDA, which I don't think will happen.  And the second is if somebody makes a working Windows build of gpuocelot, but it looks like they are having problems building for Windows.  Anybody here know about it?


                                                                                                                        And the last way is if Octane devs at Otoy decide to support OpenCL. 

                                                                                                                      • Re: What's wrong with this file?

                                                                                                                        you dn't have to wait too long to render what you want under AMD graphique card. this is one of my final render with the HD 7950 from club 3d royal king.follow the steps described by Kylen to install Smallluxgpu.And enjoy your powerfull GCN GPU.I'AM Very happy today.Thanks to AMD. if you dn't believe us try Luxmark 2.0.


                                                                                                                          • Re: What's wrong with this file?

                                                                                                                            Thanks sharlybg, for sharing your experiences. I hope smalluxrender will be helping a lot of people to accelerate their rendering. Also work to enable cycles is also under progress. Hopefully cycles will also work some time in near future.

                                                                                                                              • Re: What's wrong with this file?

                                                                                                                                Thank you for looking into accelerating cycles. Luxrender is not sufficiently integrated into blender to make GPU acceleration an added-value experience (no live preview, which is the real gamechanger in an editing workflow).


                                                                                                                                I had to trade my AMD 7970s for NVIDIA 580s because of this issue. I lost a ton of benchmark score and AMD lost a high margin customer, it was a sad day all around. I hope this issue gets fixed before I go shopping again: the new macpro looks awesome, but it has AMD cards...

                                                                                                                                • Re: What's wrong with this file?

                                                                                                                                  Thank you, for the taking this issue seriously! Still, is there any possibility, that one of the responsible developers give us a status report and a technical insight of the problem (and if they think / know, whether it's a software or a hardware limitation)? That would really help to bridge the time.

                                                                                                                            • Re: What's wrong with this file?

                                                                                                                              It is great to see that the OpenCL issue with Blender is being looked into.  For interest, I have a Firepro V7900 which I will be testing once we have a solution, it will be interesting to see how the workstation and consumer cards match up.  Many times I have considered ditching this card and going for a GTX 580 or a mid range Quadro but have held out because of the benchmarks I've seen with this card.  A solution can't come quick enough!


                                                                                                                              Thank you Himanshu and team for your work.

                                                                                                                              • Re: What's wrong with this file?

                                                                                                                                No news again? waiting become too long without bit of news.yes i've get smallux work for me but it'is only me, and this render engine is features limited compared to Vray octane cycles and all this soft where artist want acceleration with AMD cards.some of us get completely discourage look at this ( Amd/ati Opencl+ Blenderheads+blender Cycles=harmony | Facebook ). is it so hard to solve this issues ?

                                                                                                                                Now AMD is speaking about HSA for APUs and next HD 9000 volcanics island does it mean that radeon 7XXX series will never get cycles vray octane work ?


                                                                                                                                why AMD why why ? we need true answer ?

                                                                                                                                • Re: What's wrong with this file?

                                                                                                                                  No updates yet?
                                                                                                                                  I am beginning to doubt these developers :S pretending to be working on this problem!? why dont we see any updates? I spend over 3000$ into your company, recommended you to friends and never had any trouble using your products. Happy customer is losing faith here! does anyone care? AMD? no, really not?


                                                                                                                                  Thanks for caring.

                                                                                                                                  • Re: What's wrong with this file?

                                                                                                                                    Thanks, i had beginning to doubt that the developers were working on this but now i realized that indeed they're working on it and it seems it's going to run soon.


                                                                                                                                    better late than never, thank you amd.