cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

npm1
Adept II

What's wrong with this file?

THIS CODE IS FROM A PROGRAM CALLED BLENDER CYCLES...IT WORKS...BUT DOESN'T WORK WITH THE AMD OPENCL COMPILER,...IN THE BLENDER COMMUNITY SOME BELIEVE THAT THIS CODE IS PROGRAMMED USING CUDA SYNTAX, OTHERS BELIEVE THAT THIS CODE IS GENERIC ENOUGH FOR BOTH OPENCL AND CUDA,....PLEASE MAKE YOUR JUDGEMENT!!!

#ifndef __KERNEL_TYPES_H__

#define __KERNEL_TYPES_H__

#include "kernel_math.h"

#include "svm/svm_types.h"

CCL_NAMESPACE_BEGIN

/* constants */

#define OBJECT_SIZE                     16

#define LIGHT_SIZE                              4

#define FILTER_TABLE_SIZE          256

#define RAMP_TABLE_SIZE                    256

/* device capabilities */

#ifdef __KERNEL_CPU__

#define __KERNEL_SHADING__

#define __KERNEL_ADV_SHADING__

#endif

#ifdef __KERNEL_CUDA__

#define __KERNEL_SHADING__

#if __CUDA_ARCH__ >= 200

#define __KERNEL_ADV_SHADING__

#endif

#endif

#ifdef __KERNEL_OPENCL__

#define __KERNEL_SHADING__

#define __KERNEL_ADV_SHADING__

#endif

/* kernel features */

#define __SOBOL__

#define __INSTANCING__

#define __DPDU__

#define __UV__

#define __BACKGROUND__

#define __CAUSTICS_TRICKS__

#define __VISIBILITY_FLAG__

#define __RAY_DIFFERENTIALS__

#define __CAMERA_CLIPPING__

#define __INTERSECTION_REFINE__

#define __CLAMP_SAMPLE__

#ifdef __KERNEL_SHADING__

#define __SVM__

#define __EMISSION__

#define __TEXTURES__

#define __HOLDOUT__

#endif

#ifdef __KERNEL_ADV_SHADING__

#define __MULTI_CLOSURE__

#define __TRANSPARENT_SHADOWS__

#define __PASSES__

#define __BACKGROUND_MIS__

#define __AO__

#endif

#define __MULTI_LIGHT__

#define __OSL__

#define __SOBOL_FULL_SCREEN__

#define __MODIFY_TP__

#define __QBVH__

/* Shader Evaluation */

enum ShaderEvalType {

          SHADER_EVAL_DISPLACE,

          SHADER_EVAL_BACKGROUND

};

/* Path Tracing */

enum PathTraceDimension {

          PRNG_FILTER_U = 0,

          PRNG_FILTER_V = 1,

          PRNG_LENS_U = 2,

          PRNG_LENS_V = 3,

          PRNG_BASE_NUM = 4,

          PRNG_BSDF_U = 0,

          PRNG_BSDF_V = 1,

          PRNG_BSDF = 2,

          PRNG_LIGHT = 3,

          PRNG_LIGHT_U = 4,

          PRNG_LIGHT_V = 5,

          PRNG_LIGHT_F = 6,

          PRNG_TERMINATE = 7,

          PRNG_BOUNCE_NUM = 8

};

/* these flag values correspond exactly to OSL defaults, so be careful not to

* change this, or if you do, set the "raytypes" shading system attribute with

* your own new ray types and bitflag values.

*

* for ray visibility tests in BVH traversal, the upper 20 bits are used for

* layer visibility tests. */

enum PathRayFlag {

          PATH_RAY_CAMERA = 1,

          PATH_RAY_REFLECT = 2,

          PATH_RAY_TRANSMIT = 4,

          PATH_RAY_DIFFUSE = 8,

          PATH_RAY_GLOSSY = 16,

          PATH_RAY_SINGULAR = 32,

          PATH_RAY_TRANSPARENT = 64,

          PATH_RAY_SHADOW_OPAQUE = 128,

          PATH_RAY_SHADOW_TRANSPARENT = 256,

          PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),

          PATH_RAY_MIS_SKIP = 512,

          PATH_RAY_ALL = (1|2|4|8|16|32|64|128|256|512),

          PATH_RAY_LAYER_SHIFT = (32-20)

};

/* Closure Label */

typedef enum ClosureLabel {

          LABEL_NONE = 0,

          LABEL_CAMERA = 1,

          LABEL_LIGHT = 2,

          LABEL_BACKGROUND = 4,

          LABEL_TRANSMIT = 8,

          LABEL_REFLECT = 16,

          LABEL_VOLUME = 32,

          LABEL_OBJECT = 64,

          LABEL_DIFFUSE = 128,

          LABEL_GLOSSY = 256,

          LABEL_SINGULAR = 512,

          LABEL_TRANSPARENT = 1024,

          LABEL_STOP = 2048

} ClosureLabel;

/* Render Passes */

typedef enum PassType {

          PASS_NONE = 0,

          PASS_COMBINED = 1,

          PASS_DEPTH = 2,

          PASS_NORMAL = 8,

          PASS_UV = 16,

          PASS_OBJECT_ID = 32,

          PASS_MATERIAL_ID = 64,

          PASS_DIFFUSE_COLOR = 128,

          PASS_GLOSSY_COLOR = 256,

          PASS_TRANSMISSION_COLOR = 512,

          PASS_DIFFUSE_INDIRECT = 1024,

          PASS_GLOSSY_INDIRECT = 2048,

          PASS_TRANSMISSION_INDIRECT = 4096,

          PASS_DIFFUSE_DIRECT = 8192,

          PASS_GLOSSY_DIRECT = 16384,

          PASS_TRANSMISSION_DIRECT = 32768,

          PASS_EMISSION = 65536,

          PASS_BACKGROUND = 131072,

          PASS_AO = 262144,

          PASS_SHADOW = 524288

} PassType;

#define PASS_ALL (~0)

#ifdef __PASSES__

typedef float3 PathThroughput;

typedef struct PathRadiance {

          int use_light_pass;

          float3 emission;

          float3 background;

          float3 ao;

          float3 indirect;

          float3 direct_throughput;

          float3 direct_emission;

          float3 color_diffuse;

          float3 color_glossy;

          float3 color_transmission;

          float3 direct_diffuse;

          float3 direct_glossy;

          float3 direct_transmission;

          float3 indirect_diffuse;

          float3 indirect_glossy;

          float3 indirect_transmission;

          float4 shadow;

} PathRadiance;

typedef struct BsdfEval {

          int use_light_pass;

          float3 diffuse;

          float3 glossy;

          float3 transmission;

          float3 transparent;

} BsdfEval;

#else

typedef float3 PathThroughput;

typedef float3 PathRadiance;

typedef float3 BsdfEval;

#endif

/* Shader Flag */

typedef enum ShaderFlag {

          SHADER_SMOOTH_NORMAL = (1 << 31),

          SHADER_CAST_SHADOW = (1 << 30),

          SHADER_AREA_LIGHT = (1 << 29),

          SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT)

} ShaderFlag;

/* Light Type */

typedef enum LightType {

          LIGHT_POINT,

          LIGHT_DISTANT,

          LIGHT_BACKGROUND,

          LIGHT_AREA,

          LIGHT_AO

} LightType;

/* Camera Type */

enum CameraType {

          CAMERA_PERSPECTIVE,

          CAMERA_ORTHOGRAPHIC,

          CAMERA_ENVIRONMENT

};

/* Differential */

typedef struct differential3 {

          float3 dx;

          float3 dy;

} differential3;

typedef struct differential {

          float dx;

          float dy;

} differential;

/* Ray */

typedef struct Ray {

          float3 P;

          float3 D;

          float t;

#ifdef __RAY_DIFFERENTIALS__

          differential3 dP;

          differential3 dD;

#endif

} Ray;

/* Intersection */

typedef struct Intersection {

          float t, u, v;

          int prim;

          int object;

} Intersection;

/* Attributes */

typedef enum AttributeElement {

          ATTR_ELEMENT_FACE,

          ATTR_ELEMENT_VERTEX,

          ATTR_ELEMENT_CORNER,

          ATTR_ELEMENT_VALUE,

          ATTR_ELEMENT_NONE

} AttributeElement;

/* Closure data */

#define MAX_CLOSURE 8

typedef struct ShaderClosure {

          ClosureType type;

          float3 weight;

#ifdef __MULTI_CLOSURE__

          float sample_weight;

#endif

#ifdef __OSL__

          void *prim;

#else

          float data0;

          float data1;

#endif

} ShaderClosure;

/* Shader Data

*

* Main shader state at a point on the surface or in a volume. All coordinates

* are in world space. */

enum ShaderDataFlag {

          /* runtime flags */

          SD_BACKFACING = 1,                    /* backside of surface? */

          SD_EMISSION = 2,                    /* have emissive closure? */

          SD_BSDF = 4,                              /* have bsdf closure? */

          SD_BSDF_HAS_EVAL = 8,          /* have non-singular bsdf closure? */

          SD_BSDF_GLOSSY = 16,          /* have glossy bsdf */

          SD_HOLDOUT = 32,                    /* have holdout closure? */

          SD_VOLUME = 64,                              /* have volume closure? */

          /* shader flags */

          SD_SAMPLE_AS_LIGHT = 128,                              /* direct light sample */

          SD_HAS_SURFACE_TRANSPARENT = 256,          /* has surface transparency */

          SD_HAS_VOLUME = 512,                                        /* has volume shader */

          SD_HOMOGENEOUS_VOLUME = 1024                    /* has homogeneous volume */

};

typedef struct ShaderData {

          /* position */

          float3 P;

          /* smooth normal for shading */

          float3 N;

          /* true geometric normal */

          float3 Ng;

          /* view/incoming direction */

          float3 I;

          /* shader id */

          int shader;

          /* booleans describing shader, see ShaderDataFlag */

          int flag;

          /* primitive id if there is one, ~0 otherwise */

          int prim;

          /* parametric coordinates

           * - barycentric weights for triangles */

          float u, v;

          /* object id if there is one, ~0 otherwise */

          int object;

#ifdef __RAY_DIFFERENTIALS__

          /* differential of P. these are orthogonal to Ng, not N */

          differential3 dP;

          /* differential of I */

          differential3 dI;

          /* differential of u, v */

          differential du;

          differential dv;

#endif

#ifdef __DPDU__

          /* differential of P w.r.t. parametric coordinates. note that dPdu is

           * not readily suitable as a tangent for shading on triangles. */

          float3 dPdu, dPdv;

#endif

#ifdef __MULTI_CLOSURE__

          /* Closure data, we store a fixed array of closures */

          ShaderClosure closure[MAX_CLOSURE];

          int num_closure;

          float randb_closure;

#else

          /* Closure data, with a single sampled closure for low memory usage */

          ShaderClosure closure;

#endif

#ifdef __OSL__

          /* OSL context */

          void *osl_ctx;

#endif

} ShaderData;

/* Constrant Kernel Data

*

* These structs are passed from CPU to various devices, and the struct layout

* must match exactly. Structs are padded to ensure 16 byte alignment, and we

* do not use float3 because its size may not be the same on all devices. */

typedef struct KernelCamera {

          /* type */

          int type;

          int pad1, pad2, pad3;

          /* matrices */

          Transform cameratoworld;

          Transform rastertocamera;

          /* differentials */

          float4 dx;

          float4 dy;

          /* depth of field */

          float aperturesize;

          float blades;

          float bladesrotation;

          float focaldistance;

          /* motion blur */

          float shutteropen;

          float shutterclose;

          /* clipping */

          float nearclip;

          float cliplength;

          /* more matrices */

          Transform screentoworld;

          Transform rastertoworld;

          Transform ndctoworld;

          Transform worldtoscreen;

          Transform worldtoraster;

          Transform worldtondc;

          Transform worldtocamera;

} KernelCamera;

typedef struct KernelFilm {

          float exposure;

          int pass_flag;

          int pass_stride;

          int use_light_pass;

          int pass_combined;

          int pass_depth;

          int pass_normal;

          int pass_pad;

          int pass_uv;

          int pass_object_id;

          int pass_material_id;

          int pass_diffuse_color;

          int pass_glossy_color;

          int pass_transmission_color;

          int pass_diffuse_indirect;

          int pass_glossy_indirect;

          int pass_transmission_indirect;

          int pass_diffuse_direct;

          int pass_glossy_direct;

          int pass_transmission_direct;

          int pass_emission;

          int pass_background;

          int pass_ao;

          int pass_shadow;

} KernelFilm;

typedef struct KernelBackground {

          /* only shader index */

          int shader;

          int transparent;

          /* ambient occlusion */

          float ao_factor;

          float ao_distance;

} KernelBackground;

typedef struct KernelSunSky {

          /* sun direction in spherical and cartesian */

          float theta, phi, pad3, pad4;

          /* perez function parameters */

          float zenith_Y, zenith_x, zenith_y, pad2;

          float perez_Y[5], perez_x[5], perez_y[5];

          float pad5;

} KernelSunSky;

typedef struct KernelIntegrator {

          /* emission */

          int use_direct_light;

          int use_ambient_occlusion;

          int num_distribution;

          int num_all_lights;

          float pdf_triangles;

          float pdf_lights;

          int pdf_background_res;

          /* bounces */

          int min_bounce;

          int max_bounce;

          int max_diffuse_bounce;

          int max_glossy_bounce;

          int max_transmission_bounce;

          /* transparent */

          int transparent_min_bounce;

          int transparent_max_bounce;

          int transparent_shadows;

          /* caustics */

          int no_caustics;

          /* seed */

          int seed;

          /* render layer */

          int layer_flag;

          /* clamp */

          float sample_clamp;

          /* padding */

          int pad;

} KernelIntegrator;

typedef struct KernelBVH {

          /* root node */

          int root;

          int attributes_map_stride;

          int pad1, pad2;

} KernelBVH;

typedef struct KernelData {

          KernelCamera cam;

          KernelFilm film;

          KernelBackground background;

          KernelSunSky sunsky;

          KernelIntegrator integrator;

          KernelBVH bvh;

} KernelData;

CCL_NAMESPACE_END

#endif /*  __KERNEL_TYPES_H__ */

96 Replies
binying
Challenger

where can we download blender cycles? Maybe it's easier to answer your question after we know how this file causes trouble.

0 Likes

Thanks for your response, this is the folder with the kernel file(s)...found in the folder called kernel

0 Likes

Hi Nazim,

I am able to compile the "kernel.cl" and create a kernel object (clCreateKernel) for "kernel_ocl_path_trace".

I see that one of the kernels is commented in "kernel.cl" (kernel_ocl_shader). Hope this is intended.

I am on a windows 7 64-bit system with AMD Cayman GPU.

But the application is 32-bit only.

AMD APP 2.8 (32-bit) is installed on my machine

The driver version I see in device manager is 9.2.0.0

The driver packaging version I see in AMD control center is 9.002-120928m-xxxxxxxx

Catalyst version 12.10

While compiling kernel.cl, the maximum memory usage of the application was ~525MB.

How much memory do you have on your machines?

I will post the exact code snippets that I used in the next post.

Best Regards,

Work Item 6

0 Likes

Here is a copy of the host code that I used to compile in Windows using Visual Studio 2010. Note that OCL is a helper C++ class that I use for internal use. It just avoids the hassle of complex initialization.

#include <Windows.h>
#include <WinBase.h>
#include <ocl.hpp>
#include <iostream>

int main()
{
     ocl::platform_t platformType = ocl::platform_t::amd;
     ocl::device_t deviceType = ocl::device_t::gpu;

     ocl O(platformType, deviceType);

     HANDLE fileHandle = CreateFile("kernel.cl", GENERIC_READ, FILE_SHARE_READ, NULL,
                                             OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
     HANDLE mapHandle = CreateFileMapping(fileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
     LPVOID fileContent = MapViewOfFile(mapHandle, FILE_MAP_READ, 0, 0, 0);
     DWORD size = GetFileSize(fileHandle, NULL);

      cl_int err = O.buildKernel((const char*)fileContent, "kernel_ocl_path_trace", "-I." );
      std::cout << "Build Kernel Returned " << err << std::endl;
      return 0;
}

The buildKernel in OCL wrapper does this:

{
    int err;
    char* buildLog = NULL;
    size_t buildLogSize = 0;

    program = clCreateProgramWithSource(context, 1, &kernelString, NULL, &err);
    if(err != CL_SUCCESS)
    {
        #ifndef PRINT_MUTE
            std::cerr << " ocl: couldn't create program from the provided kernel string " << err << std::endl;
        #endif
        return err;
    }

    err = clBuildProgram(program, 1, &device_id, buildOptions, NULL, NULL);
    if(err != CL_SUCCESS)
    {
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &buildLogSize);
        buildLog = (char*)(::malloc)(buildLogSize+1);

        #ifndef PRINT_MUTE
            std::cerr << " ocl: Failed to build program executable Build log Size:" << buildLogSize << "\tBuild Log:\n";
        #endif
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL);
        #ifndef PRINT_MUTE
            std::cerr << buildLog << std::endl;
        #endif
        ::free(buildLog);;
        return err;
    }

    kernel = clCreateKernel(program, kernelFuncName, &err);
    if ( (!kernel) || (err != CL_SUCCESS) )
    {
        #ifndef PRINT_MUTE
            std::cerr << "ocl: kernel could not be created " << err << std::endl;
        #endif
        return err;
    }
    return err;
}
0 Likes

Hi Nazim,

Since the peak memory consumption is 512MB, this can be a problem if your application has already loaded a lot of memory (in the form of images or frames or any others).

As a workaround, Consider compiling the OpenCL program at start of day -- before loading any images or frames or whatever. Once the compilation is done, the memory consumption goes back to normal and hence you will have enough memory to do your other operations....This can be a workaround.

Best Regards,

WorkItem 6

Hia,

Thanks for your time, hopefully this works...

How much memory do you have on your machines?

I have 16gb of ram.

This code is an addon that belongs to a free open-source, software called blender which itself is written in python, c, and c++.

A developer named Brecht Van Lommel et al, created a path tracing renderer in blender, with the aim of realistic lighting and shading for 3D animation(artist). As you may have gathered this addon is aimed at working on opencl, cuda, and cpu platforms(maybe more(despite its good feature set it is still in beta)). Sadly, for AMD GPU users the opencl functionality does not work as it should i.e. memory peaks beyond 16gb ram when trying to compile the opencl kernel, it also produces a kernel path trace error.

Here's one of the main forums dedicated to the Blender software community



0 Likes

Hi Nazim,

I have far less RAM on my machine... Probably just 4G.

I guess your app (or python script) is loading other objects that is eating up lot of RAM.

Can you pre-compile the OpenCL code before loading the objects?

That can help. Please let me know on this.

Also, I think you missed posting the forum link of blender...

Can you please re-post it?

Thanks,

Best Regards,

Workitem 6

0 Likes

developer: download Blender 2.63 http://download.blender.org/release/Blender2.63/. Enable OpenCL device in file->user preference->system. Download this scene http://dl.dropbox.com/u/1742071/1m/BMW1M-MikePan.blend open it. Then click render button. It should begin compiling kernels. It takes GB of memory.

0 Likes

Hello Nou,

THanks for the detailed steps.

I am using a Win64 box with AMD 6950 GPU (Cayman) here.

Nazim was talking about an add-on. Does the link given by you contain that add-on?

oiow, Can I reproduce what Nazim is saying with your steps?

OR Are you just suggesting the high memory usage?

In any case, I will try this out this week and let you all know.

Thanks,

Best Regards,

Workitem 6

0 Likes

Yes Nazim want working Blender Cycles render which utilize GPU through OpenCL. You can also try 2.64 http://download.blender.org/release/Blender2.64/ but not 2.65 as this version have disabled OpenCL tracing. With this it clogs my 8GB of RAM. Then I killed it manually.

0 Likes

http://dl.dropbox.com/u/1742071/1m/BMW1M-MikePan.blend - I am unable to access this file.

If there is a spell mistake in the URL, can you please correct and post a new one? Thank you.

0 Likes

Nou,

As far as Nazim's steps to repro, the initial test on the fix-driver has been successful - at least no crash.
I will need some more time before I can fully commit on this.

At the same time, for the 30GB memory gulp issue - I would like to test this with the blend file you had posted.

However, I am unable to download the mikepan blend file.

Can you post it here? It will be useful to test further.

Also, if you could just post the steps to reproduce with the blend file, it will be useful.

Thanks for your time on this,

0 Likes

here you go.

0 Likes

Kind Regards,

Nazim

0 Likes
npm1
Adept II

Hia, the developer of blender cycles, has responded to these changes...

If you compile it like this, most features will be disabled, just

giving you an ambient occlusion style display without any materials.

This is known to work on AMD cards, no surprises there. But if you

enable more features (with e.g. __KERNEL_OPENCL_AMD__), that's when

you run into problems.

0 Likes

Hello Nazim,

Will the steps provided by Nou sufficient to repro this problem?

Please help me reproduce the problem. I will definitely work with you to get this resolved.

Is the behaviour observed in Linux or windows? Or both?

32 or 64bit? or both?

By __KERNEL_OPENCL_AMD__, Do you mean a "Compiler" option that I can enable in my previous repro attempt?

Thanks,

Best Regards,

Workitem 6

0 Likes

Hia,

Thanks...

Heres the link to the full application....

http://download.blender.org/release/Blender2.64/

please download the blender 2.64a version...for either linux or windows

On either platform this problem re-occurs...

once downloaded unzip open "blender.exe"

on the bar at the top change blender internal(drop down menu) to cycles

press spacebar on keyboard type "user preferences"

go to the "system"steps.png

change the device from "CPU" to "OPENCL"

Then:

RENDERTAB.png

ONCE YOU CLICK RENDER WAIT...

THIS HOW YOU REPRODUCE THE SAME ERRORS AS I HAVE DONE(RUN OUT OF MEMORY AND OTHER ERRORS-THE COMPILING KERNEL STAGE TAKES TO LONG, RAM USAGE EXCEEDS 15GB)

(HOPE YOUR USING WINDOWS VISTA AND ABOVE)

0 Likes

Thanks for the detailed steps. I will first confirm the repro and then will forward the issue to the correct people. Thanks!

Hi,

I want to confirm issues with OpenCL on Ubuntu x64 with HD7950 Tahiti card.

My specs:

FX-8350 CPU

Sapphire HD7950 OC boost

32GB RAM

Ubuntu 12.10 Linux 64bit

Latest SVN version of blender

Compilation of kernel takes also looong time and memory here. Additional issue is startup time of the finished OpenCL kernel and its memory usage (my memory usage jumps to 30GB of RAM before start of render) until it actually starts doing something. Then rendering is fast enough but together with loading/startup time it takes longer than CPU render.

Very simple scene takes 1 minute to render on CPU and 4 minutes on GPU !!!!

When you read some blogs and forums about issue of Blender + OpenCL conclusion of Blender development team is OpenCL is unusable for out purposes. This way AMD is loosing considerable number of customers to NVidia. Blender is very popular SW used also in professional 3D production and all of those guys buy NVidia cards to get GPU acceleration.

I do not know if kernel is poorly written or it is problem of OpenCL compiler or AMD driver, but "official" statement from Blender devs is AMD compiler doesn't work for bigger kernels + some other minor reasons. Very disappointing for all of us AMD guys out there.

If you need some help from me (additional tests or more detailed report) I am willing to invest some time to make this work. Just don't have experience with OpenCL.

0 Likes

Hi,

I forgot to mention that if you use latest version of blender you have to manually enable OpenCL support. It was disabled by default by Blender devs after they decided OpenCL is too problematic (and putting further development on hold until better SW is provided)

Official statement about OpenCL from Blender devs (stating problems they encountered during attempt to make it work):

http://wiki.blender.org/index.php/Dev:2.6/Source/Render/Cycles/OpenCL

On that page you can also read how to enable testing opencl support in latest release:

"The environment variable CYCLES_OPENCL_TEST can be defined to show it, which can be useful for developers that want to test it."

After that use steps in picture from Nazim Mer.

0 Likes

We are looking into this issue. After repro, we will forward to the right team.

Thanks for your patience. We can get this working together.

0 Likes

In that version of blender, go to the bar at the top and click window from the drop down list select terminal, in the terminal u'll see the errors that may come up whilst compiling blender cycles' kernel

0 Likes

Hi Nazim,

I could reproduce the problem on my Windows 64 setup.

I clicked on "view more details" in the windows error message and got crash details.

See the excerpt below. The fault module indicates the OpenCL runtime.

Will forward this to an appropriate team for a detailed analysis. btw, when blender crashed, it was taking around 770MB of RAM.

My system has 4GB RAM in it.... and there were still free pages left

"

Problem signature:

 

Problem Event Name: APPCRASH

Application Name: blender.exe

Application Version: 2.6.4.0

Application Timestamp: 5074808c

Fault Module Name: amdocl64.dll

Fault Module Version: 10.0.1016.4

Fault Module Timestamp: 5065fc35

Exception Code: 40000015

Exception Offset: 0000000000f53679

OS Version: 6.1.7600.2.0.0.256.48

"

0 Likes

Hi at my place the OpenCL kernel is working. Problem is it takes long time to compile and takes 31 GB of RAM out of 32. I need to compile without any other software on (as I do not have swap on my system). But after compilation it seems to be working just fine.

0 Likes

Underhood,

I tried this on a machine with 4GB of RAM... I followed the repro case given by Nazim (the one with screenshots).

I was not using any external blend file. Just the default one that Blender starts with (in line with Nazim's steps for repro)

The application consumed some 800MB of RAM before crashing and there was still some free memory left.

I am using 12.10 Cat Driver on Windows 7, 64-bit machine.

AMD's engineering team is already aware of this issue and are tracking the same. FYI.
At the moment, there is no known workaround.

However, if the Blender team is open to optimizing their code for AMD HW, the engg team may be able to suggest some changes. 

For example, the VRay App that suffers from a similar issue got some massive improvements by moving to native functions. 

Hope this helps,

0 Likes

Blender is opensource. AMD can write patches to make it work

0 Likes
npm1
Adept II

Hello ladies and gentlemen,

thank you for your interest in this issue.

@developer the amount ram used by blender cycles exceeds the 512mb you mention

@underhood can you take a couple of screen shots: task manager, user preferences and other things that may justify blender cycles working

@nou if AMD are able to create a patch for this software can they do this please...

that would be awsome...

PS i've recently brought a Geforce card(i didn't want but had to due to some work) would it be possible to use nvidia's opencl implementation/compiler with AMD GPUs i.e. so that i could utilize the power of all my opencl gpus...

PSS with the latest drivers 13.1 CCC a new error comes up "insuficient private resources" with this new error the AMD compiler aborts compilation half way

PSSS can opencl use windows 7 swap partitions...

and finaly i will be posting a video which will further clarify the steps to reproduce the issue

THANK YOU

0 Likes

PS i've recently brought a Geforce card(i didn't want but had to due to some work) would it be possible to use nvidia's opencl implementation/compiler with AMD GPUs i.e. so that i could utilize the power of all my opencl gpus...

NVIDIA and AMD GPUs cannot be accessed using just NVIDIA OpenCL SDK. They will only show up in their own platforms, if an OpenCL implementation is found for them.

0 Likes

Nazim,

I think the issue that Underhood is talking about is different from what you are mentioning.

The issue that I reproduced is by following your steps listed in one of your prev replies (with screenshots)

Also, I dont quite understand what you meant by "OpenCL using swap partitions". If you are just talking about the memory usage - The Swap is managed by OS and is not visible to applications and libraries. So, openCL will not directly deal with Swap

0 Likes

Hi i know swap is system thing. I just don't use swap on my system as I have 32GIGs of RAM. When Linux finds out it is out of memory then OOM killer kicks in and kills process that is using most of RAM (in this case AMD opencl compiler). As on my system it was using 31 GIGs of RAM so i had to close all other apps so I have enough memory.

Anyway since update to latest stable linux driver from amd website it crashes anyway so basiclly with new driver it is not working at all.

I will investigate more when I am back on my main workstation pc as i travel a lot

0 Likes

Underhood,

I understand. I might get a fix for testing soon. I will update this post, once it is done.

Thanks,

0 Likes
gtype
Journeyman III

Bumping in hopes of a resolution for this inexcusable situation. Cycles has been out for over a year and somehow amd still hasn't been able to update a silly driver to support some subset of the API. Just plain sad. If this issue is not resolved by the time nvidia 700 series come out, I'll have no choice but to change. This is not a threat nor a tantrum, I am just expressing how I feel here, I would very much like to continue with ATI, I just wont have a choice.

Anyway, BUMP for great justice

0 Likes

Hi gtype,

I have not seen any post prior to this from you on this thread. So I have no idea what problem you are facing with AMD cards. The problem reported by nazim, was reproduced at our end, as is being looked into by AMD engineers.

In case you have some different problem to report, please start a new thread out of it.

0 Likes

Hi Gtype,

I may just be in a position to test a fix. I will keep this thread posted if something interesting turns up...

Meanwhile, Thanks a lot for the support and patience. We understand this is a bit frustrating....

Hopefully, a fix is just round the corner...

0 Likes

Hey there, great news!

Thanks for the hard word, and for keeping us in the loop.

Feeling somewhat hopeful now, good luck.

0 Likes
mj1020
Journeyman III

this is working here without no problem

0 Likes

can you provide some more information...screenshots and driver version

0 Likes
mano-wii
Adept I

Hello,

I believe the problem occurs due to limitations in the current driver from AMD because the Blender Cycles render using Nvidia OpenCL almost as well as CUDA.

Regards

Germano Cavalcante

0 Likes

Hi,

I have just got access to a test-driver. Will be posting an update soon. Thanks for your patience.

0 Likes