cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

npm1
Adept II

What's wrong with this file?

THIS CODE IS FROM A PROGRAM CALLED BLENDER CYCLES...IT WORKS...BUT DOESN'T WORK WITH THE AMD OPENCL COMPILER,...IN THE BLENDER COMMUNITY SOME BELIEVE THAT THIS CODE IS PROGRAMMED USING CUDA SYNTAX, OTHERS BELIEVE THAT THIS CODE IS GENERIC ENOUGH FOR BOTH OPENCL AND CUDA,....PLEASE MAKE YOUR JUDGEMENT!!!

#ifndef __KERNEL_TYPES_H__

#define __KERNEL_TYPES_H__

#include "kernel_math.h"

#include "svm/svm_types.h"

CCL_NAMESPACE_BEGIN

/* constants */

#define OBJECT_SIZE                     16

#define LIGHT_SIZE                              4

#define FILTER_TABLE_SIZE          256

#define RAMP_TABLE_SIZE                    256

/* device capabilities */

#ifdef __KERNEL_CPU__

#define __KERNEL_SHADING__

#define __KERNEL_ADV_SHADING__

#endif

#ifdef __KERNEL_CUDA__

#define __KERNEL_SHADING__

#if __CUDA_ARCH__ >= 200

#define __KERNEL_ADV_SHADING__

#endif

#endif

#ifdef __KERNEL_OPENCL__

#define __KERNEL_SHADING__

#define __KERNEL_ADV_SHADING__

#endif

/* kernel features */

#define __SOBOL__

#define __INSTANCING__

#define __DPDU__

#define __UV__

#define __BACKGROUND__

#define __CAUSTICS_TRICKS__

#define __VISIBILITY_FLAG__

#define __RAY_DIFFERENTIALS__

#define __CAMERA_CLIPPING__

#define __INTERSECTION_REFINE__

#define __CLAMP_SAMPLE__

#ifdef __KERNEL_SHADING__

#define __SVM__

#define __EMISSION__

#define __TEXTURES__

#define __HOLDOUT__

#endif

#ifdef __KERNEL_ADV_SHADING__

#define __MULTI_CLOSURE__

#define __TRANSPARENT_SHADOWS__

#define __PASSES__

#define __BACKGROUND_MIS__

#define __AO__

#endif

#define __MULTI_LIGHT__

#define __OSL__

#define __SOBOL_FULL_SCREEN__

#define __MODIFY_TP__

#define __QBVH__

/* Shader Evaluation */

enum ShaderEvalType {

          SHADER_EVAL_DISPLACE,

          SHADER_EVAL_BACKGROUND

};

/* Path Tracing */

enum PathTraceDimension {

          PRNG_FILTER_U = 0,

          PRNG_FILTER_V = 1,

          PRNG_LENS_U = 2,

          PRNG_LENS_V = 3,

          PRNG_BASE_NUM = 4,

          PRNG_BSDF_U = 0,

          PRNG_BSDF_V = 1,

          PRNG_BSDF = 2,

          PRNG_LIGHT = 3,

          PRNG_LIGHT_U = 4,

          PRNG_LIGHT_V = 5,

          PRNG_LIGHT_F = 6,

          PRNG_TERMINATE = 7,

          PRNG_BOUNCE_NUM = 8

};

/* these flag values correspond exactly to OSL defaults, so be careful not to

* change this, or if you do, set the "raytypes" shading system attribute with

* your own new ray types and bitflag values.

*

* for ray visibility tests in BVH traversal, the upper 20 bits are used for

* layer visibility tests. */

enum PathRayFlag {

          PATH_RAY_CAMERA = 1,

          PATH_RAY_REFLECT = 2,

          PATH_RAY_TRANSMIT = 4,

          PATH_RAY_DIFFUSE = 8,

          PATH_RAY_GLOSSY = 16,

          PATH_RAY_SINGULAR = 32,

          PATH_RAY_TRANSPARENT = 64,

          PATH_RAY_SHADOW_OPAQUE = 128,

          PATH_RAY_SHADOW_TRANSPARENT = 256,

          PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),

          PATH_RAY_MIS_SKIP = 512,

          PATH_RAY_ALL = (1|2|4|8|16|32|64|128|256|512),

          PATH_RAY_LAYER_SHIFT = (32-20)

};

/* Closure Label */

typedef enum ClosureLabel {

          LABEL_NONE = 0,

          LABEL_CAMERA = 1,

          LABEL_LIGHT = 2,

          LABEL_BACKGROUND = 4,

          LABEL_TRANSMIT = 8,

          LABEL_REFLECT = 16,

          LABEL_VOLUME = 32,

          LABEL_OBJECT = 64,

          LABEL_DIFFUSE = 128,

          LABEL_GLOSSY = 256,

          LABEL_SINGULAR = 512,

          LABEL_TRANSPARENT = 1024,

          LABEL_STOP = 2048

} ClosureLabel;

/* Render Passes */

typedef enum PassType {

          PASS_NONE = 0,

          PASS_COMBINED = 1,

          PASS_DEPTH = 2,

          PASS_NORMAL = 8,

          PASS_UV = 16,

          PASS_OBJECT_ID = 32,

          PASS_MATERIAL_ID = 64,

          PASS_DIFFUSE_COLOR = 128,

          PASS_GLOSSY_COLOR = 256,

          PASS_TRANSMISSION_COLOR = 512,

          PASS_DIFFUSE_INDIRECT = 1024,

          PASS_GLOSSY_INDIRECT = 2048,

          PASS_TRANSMISSION_INDIRECT = 4096,

          PASS_DIFFUSE_DIRECT = 8192,

          PASS_GLOSSY_DIRECT = 16384,

          PASS_TRANSMISSION_DIRECT = 32768,

          PASS_EMISSION = 65536,

          PASS_BACKGROUND = 131072,

          PASS_AO = 262144,

          PASS_SHADOW = 524288

} PassType;

#define PASS_ALL (~0)

#ifdef __PASSES__

typedef float3 PathThroughput;

typedef struct PathRadiance {

          int use_light_pass;

          float3 emission;

          float3 background;

          float3 ao;

          float3 indirect;

          float3 direct_throughput;

          float3 direct_emission;

          float3 color_diffuse;

          float3 color_glossy;

          float3 color_transmission;

          float3 direct_diffuse;

          float3 direct_glossy;

          float3 direct_transmission;

          float3 indirect_diffuse;

          float3 indirect_glossy;

          float3 indirect_transmission;

          float4 shadow;

} PathRadiance;

typedef struct BsdfEval {

          int use_light_pass;

          float3 diffuse;

          float3 glossy;

          float3 transmission;

          float3 transparent;

} BsdfEval;

#else

typedef float3 PathThroughput;

typedef float3 PathRadiance;

typedef float3 BsdfEval;

#endif

/* Shader Flag */

typedef enum ShaderFlag {

          SHADER_SMOOTH_NORMAL = (1 << 31),

          SHADER_CAST_SHADOW = (1 << 30),

          SHADER_AREA_LIGHT = (1 << 29),

          SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT)

} ShaderFlag;

/* Light Type */

typedef enum LightType {

          LIGHT_POINT,

          LIGHT_DISTANT,

          LIGHT_BACKGROUND,

          LIGHT_AREA,

          LIGHT_AO

} LightType;

/* Camera Type */

enum CameraType {

          CAMERA_PERSPECTIVE,

          CAMERA_ORTHOGRAPHIC,

          CAMERA_ENVIRONMENT

};

/* Differential */

typedef struct differential3 {

          float3 dx;

          float3 dy;

} differential3;

typedef struct differential {

          float dx;

          float dy;

} differential;

/* Ray */

typedef struct Ray {

          float3 P;

          float3 D;

          float t;

#ifdef __RAY_DIFFERENTIALS__

          differential3 dP;

          differential3 dD;

#endif

} Ray;

/* Intersection */

typedef struct Intersection {

          float t, u, v;

          int prim;

          int object;

} Intersection;

/* Attributes */

typedef enum AttributeElement {

          ATTR_ELEMENT_FACE,

          ATTR_ELEMENT_VERTEX,

          ATTR_ELEMENT_CORNER,

          ATTR_ELEMENT_VALUE,

          ATTR_ELEMENT_NONE

} AttributeElement;

/* Closure data */

#define MAX_CLOSURE 8

typedef struct ShaderClosure {

          ClosureType type;

          float3 weight;

#ifdef __MULTI_CLOSURE__

          float sample_weight;

#endif

#ifdef __OSL__

          void *prim;

#else

          float data0;

          float data1;

#endif

} ShaderClosure;

/* Shader Data

*

* Main shader state at a point on the surface or in a volume. All coordinates

* are in world space. */

enum ShaderDataFlag {

          /* runtime flags */

          SD_BACKFACING = 1,                    /* backside of surface? */

          SD_EMISSION = 2,                    /* have emissive closure? */

          SD_BSDF = 4,                              /* have bsdf closure? */

          SD_BSDF_HAS_EVAL = 8,          /* have non-singular bsdf closure? */

          SD_BSDF_GLOSSY = 16,          /* have glossy bsdf */

          SD_HOLDOUT = 32,                    /* have holdout closure? */

          SD_VOLUME = 64,                              /* have volume closure? */

          /* shader flags */

          SD_SAMPLE_AS_LIGHT = 128,                              /* direct light sample */

          SD_HAS_SURFACE_TRANSPARENT = 256,          /* has surface transparency */

          SD_HAS_VOLUME = 512,                                        /* has volume shader */

          SD_HOMOGENEOUS_VOLUME = 1024                    /* has homogeneous volume */

};

typedef struct ShaderData {

          /* position */

          float3 P;

          /* smooth normal for shading */

          float3 N;

          /* true geometric normal */

          float3 Ng;

          /* view/incoming direction */

          float3 I;

          /* shader id */

          int shader;

          /* booleans describing shader, see ShaderDataFlag */

          int flag;

          /* primitive id if there is one, ~0 otherwise */

          int prim;

          /* parametric coordinates

           * - barycentric weights for triangles */

          float u, v;

          /* object id if there is one, ~0 otherwise */

          int object;

#ifdef __RAY_DIFFERENTIALS__

          /* differential of P. these are orthogonal to Ng, not N */

          differential3 dP;

          /* differential of I */

          differential3 dI;

          /* differential of u, v */

          differential du;

          differential dv;

#endif

#ifdef __DPDU__

          /* differential of P w.r.t. parametric coordinates. note that dPdu is

           * not readily suitable as a tangent for shading on triangles. */

          float3 dPdu, dPdv;

#endif

#ifdef __MULTI_CLOSURE__

          /* Closure data, we store a fixed array of closures */

          ShaderClosure closure[MAX_CLOSURE];

          int num_closure;

          float randb_closure;

#else

          /* Closure data, with a single sampled closure for low memory usage */

          ShaderClosure closure;

#endif

#ifdef __OSL__

          /* OSL context */

          void *osl_ctx;

#endif

} ShaderData;

/* Constrant Kernel Data

*

* These structs are passed from CPU to various devices, and the struct layout

* must match exactly. Structs are padded to ensure 16 byte alignment, and we

* do not use float3 because its size may not be the same on all devices. */

typedef struct KernelCamera {

          /* type */

          int type;

          int pad1, pad2, pad3;

          /* matrices */

          Transform cameratoworld;

          Transform rastertocamera;

          /* differentials */

          float4 dx;

          float4 dy;

          /* depth of field */

          float aperturesize;

          float blades;

          float bladesrotation;

          float focaldistance;

          /* motion blur */

          float shutteropen;

          float shutterclose;

          /* clipping */

          float nearclip;

          float cliplength;

          /* more matrices */

          Transform screentoworld;

          Transform rastertoworld;

          Transform ndctoworld;

          Transform worldtoscreen;

          Transform worldtoraster;

          Transform worldtondc;

          Transform worldtocamera;

} KernelCamera;

typedef struct KernelFilm {

          float exposure;

          int pass_flag;

          int pass_stride;

          int use_light_pass;

          int pass_combined;

          int pass_depth;

          int pass_normal;

          int pass_pad;

          int pass_uv;

          int pass_object_id;

          int pass_material_id;

          int pass_diffuse_color;

          int pass_glossy_color;

          int pass_transmission_color;

          int pass_diffuse_indirect;

          int pass_glossy_indirect;

          int pass_transmission_indirect;

          int pass_diffuse_direct;

          int pass_glossy_direct;

          int pass_transmission_direct;

          int pass_emission;

          int pass_background;

          int pass_ao;

          int pass_shadow;

} KernelFilm;

typedef struct KernelBackground {

          /* only shader index */

          int shader;

          int transparent;

          /* ambient occlusion */

          float ao_factor;

          float ao_distance;

} KernelBackground;

typedef struct KernelSunSky {

          /* sun direction in spherical and cartesian */

          float theta, phi, pad3, pad4;

          /* perez function parameters */

          float zenith_Y, zenith_x, zenith_y, pad2;

          float perez_Y[5], perez_x[5], perez_y[5];

          float pad5;

} KernelSunSky;

typedef struct KernelIntegrator {

          /* emission */

          int use_direct_light;

          int use_ambient_occlusion;

          int num_distribution;

          int num_all_lights;

          float pdf_triangles;

          float pdf_lights;

          int pdf_background_res;

          /* bounces */

          int min_bounce;

          int max_bounce;

          int max_diffuse_bounce;

          int max_glossy_bounce;

          int max_transmission_bounce;

          /* transparent */

          int transparent_min_bounce;

          int transparent_max_bounce;

          int transparent_shadows;

          /* caustics */

          int no_caustics;

          /* seed */

          int seed;

          /* render layer */

          int layer_flag;

          /* clamp */

          float sample_clamp;

          /* padding */

          int pad;

} KernelIntegrator;

typedef struct KernelBVH {

          /* root node */

          int root;

          int attributes_map_stride;

          int pad1, pad2;

} KernelBVH;

typedef struct KernelData {

          KernelCamera cam;

          KernelFilm film;

          KernelBackground background;

          KernelSunSky sunsky;

          KernelIntegrator integrator;

          KernelBVH bvh;

} KernelData;

CCL_NAMESPACE_END

#endif /*  __KERNEL_TYPES_H__ */

96 Replies

AMD really need fix this as developer of LuxRender found similar issue when he implemented recursive procedural materials into LuxRender.

0 Likes

Since we are on the subject, I think I may help you debug this issue once and for all:

The bug is about an OpenCL implementation on a Render Engine (Cycles) in Blender

Versions of Blender that have this bug:

Blender 2.60

Blender 2.61

Blender 2.62

Blender 2.63

Blender 2.64

Download of Blender: Official Blender Releases

In version 2.65 and probably on the next release too, OpenCL is / will be deactivated because of this bug.

Operational Systems known to have this Bug:

Windows 8: x86, x86_64

Windows 7: x86, x86_64

Windows XP:  x86, x86_64

Ubuntu 12.04 LTS:  x86, x86_64

Ubuntu 12.10:  x86, x86_64

Mac OSX

Thread on Blender forums about this bug:

A good news for AMD/ATI Graphic Cards Owners

Cycles improvements on AMD with Public Driver Release

File to test the bug:
Mike Pan BMW Scene
But any file should reproduce the bug. Even the startup cube when set to render in Cycles

Hardware that the bug shows:

AMD / ATI GPU cards OpenCL

Notice that this bug DOES NOT APPEAR on NVidia GPU OpenCL implementation neither on Intel/AMD CPU OpenCL implementations.

Latest driver tested that show the bug:

Catalyst Software Suit 13.1 for any OS

Catalyst Software Suit 13.2 Beta 6 for any OS

Steps to reproduce Bug #1:

Open the desired blender scene;

On the top middle, change from Blender Render to Cycles Render;

On the top left, go to File > User preferences > System;

Choose OpenCL; (now you can either close this windows or save)

On the left middle, change the feature Se from Supported to Experimental (Now you are compiling with OpenCL)

Right bellow, change CPU to GPU Compute;

Click on Render, a little above;

Description of Bug #1 (May be more than only one bug, don't know):

Long time to compile the OpenCL kernel.

RAM usage going to the maximum, even crashing the program.

Scene not rendering (Black frame);

Sometimes, a scene may render (Depends on the situation - Read the forums mentioned above) but the colors are wrong.

OpenCL kernel compiling for each frame (If its an animation).

OpenCL kernel compiling each time you click on render.

Long time of wait between the finished OpenCL kernel and the startup of the Render (Have no Ideia of what this might be, but only happens on AMD GPUs)

Bug #2:

Textures does not work with AMD GPUs with OpenCL.

This bug is more dificult to reproduce, but nevertheless it's there. Textures does not work with AMD/ATI but work with other OpenCL implementations;

I hope this info helps you guys solve this issues. Blender is a very powerfull Open Source engine and it would be nice to see some support from your end to make it work properly on your hardware. Any questions, I'm here to answer.

0 Likes

From the preliminary test of the bootleg driver, I hear from the testers that mikepan is working without crashing.

I will confirm this again tomorrow (its a local holiday here today)

Textures stuff is new. we need a repro case for that...

0 Likes

Hi himanshu!

I've made a little research. Textures problems were related to older versions (2.62 and before), and since 2.63 and above are not working with AMD/ATI OpenCL compilers at all (Some trick can be made here, but it didn't have the corrections of 2.63), I can't say for sure that textures aren't working. Maybe if you could test with your driver.

Simple image texture on the default cube, rendered with Cycles on OpenCL for AMD. It's quite simple and there's a lot of tutorials out there. Remember to test on versions 2.63 or up.

We all apreciate you effords, very much!

Regards,

Marcelo

0 Likes

Hi mo92,

I guess we are still stuck with the large memory requirements of the blender software. Although, the software is no longer crashing(with internal drivers), it is giving Errors while OpenCL compilation.

I ran the blender software without BMW file. I checked the commandline window showing the log. It can be opened from the window menu in the blender. It gives "OpenCL compilation failed : Insufficient Private Resources!"

I ran the Blender with Mike’s BMW file, and the program gives the same error again.

I have not been able to see the rendered BMW using OpenCL. Blender always shows black screen, which looks reasonable as kernel compilation itself is failing. We are still working on solving this issue. Appreciate your patience on this.

0 Likes

This happens in the AMD drivers available for the public also. The software does no crashing if you wait until the end. It gives "OpenCL compilation failed : Insufficient Private Resources!" too

0 Likes

Thank you and the Engineers behind, looking to fix these issues.

My guess would be memory leaking, from the OpenCL compiler. Perhaps it would be easier to debug the compiler by looking into Blender code, compiling it and deactivate piece by piece. Someone within the Blender developers should be able to provide assistance, if required.

If you, or any of the Engineers could get in touch with the main Cycles developer, Brecht van Lommel, it might be easier to debug also.

I can't state my appreciation for what you are doing for us. Thank you!

Regards,

Marcelo

0 Likes

Hi Marcelo,

You are most welcome. We can't thank you all enough for extending support and more imporatntly for your patience.

I will venture into blender forums and see if I can connect the developer with the engineering team.

May be, they could solve this easily.

I have one last question:

1. I know Blender is written for CUDA as well. But, Have you ever compiled this for NVIDIA's OpenCL platform?

    Can you share your results?

Thanks,

0 Likes

Hi Himanshu,


The threads below shows the status and compares the rendering speed between CUDA and Nvidia's OpenCL on various platforms.

It may be useful for research.

http://blenderartists.org/forum/showthread.php?239480-2-61-Cycles-render-benchmark

http://blenderartists.org/forum/showthread.php?281227-Cycles-GPU-rendering-with-CUDA-Feedback-needed

Regards,

Germano

0 Likes

Thank you Germano.  Will pass it on.

As I can see, mikepan blend (bmw) works on nvidia Opencl platform.

Do you have any data on how much memory their OpenCL compilation take (and how long do they take?)

I will see if I can get a local setup here to measure it.... But, if you have some ready made data - it will be useful.

Thanks,

0 Likes

Do you have any data on how much memory their OpenCL compilation take (and how long do they take?)

I don't have information about that.

In http://blenderartists.org/forum/showthread.php?249705-Cycles-improvements-on-AMD-with-public-driver-...

Zalamander  wrote:

"It took 19 minutes and at its height consumed more than 6.5GB of RAM.

Here's my system configuration:

AMD Phenom x6@2.8Ghz

12GB RAM

Radeon 5850

Windows 7x64

using Blender 2.62 64-bit official release"

The video below shows how the card "AMD Radeon HD 6800 Series" rendering with GPU (computer device Barts)

https://www.youtube.com/watch?v=fL6H5mw4zrk

This site compares the time of the CUDA and OpenCL and teaches how to reproduce the tests.

http://www.austeregrim.com/2012/08/cycles-cuda-vs-opencl.html

0 Likes

On 26 February 2013: Brecht Van Lommel wrote:

"If you enable all kernel features, at least 8 GB before it crashes here. When disabling most features and using an ambient occlusion only render, it's about 1 GB.


The way I understand it is basically that AMD graphics cards do not support true function calls, everything must be inlined. This means the amount of code becomes so big that the compiler runs out of memory or some sort of GPU limitation is crossed. This means we either have to wait for AMD to find a solution to that problem and support non-inlined functions, or that we rewrite our kernel such that it's split into small pieces which would be quite complicated to do."

0 Likes

Sorry, I no longer have nVidia GPU cards.

Have you successfully contacted Bretch? Thanks!

0 Likes

Yet to contact Bretch. Will do..Thanks.

0 Likes

Have passed on Brecht's contact details to the engineering team looking into this issue.

I think the team understands the reason why the compiler is running out of resources.

They will touch base with Brecht, if needed.

Thanks,

0 Likes

(Forgive me i'm a native french speaker)

Hi Himanshu thanks a lot for efforts mad to make cycles work on AMD GPU. Maybe AMD engineer think 3D rendering isn't a good or big buisness and blender is opensource. But there is many reason to try make things work well for this software.

1/ many guy that work with 3D software are gamers that pay lot of dollars for high gpu perf (I'am a gamers and many guy i know in computer graphics are gamer too)

2/ For consumers AMD is an opencl prophet with it APU and HSA promotion. it is sad to see AMD work bad with opencl when Nvidia and intel work.

3/ AMD GCN arch are very good device for computing but hardware optimisation are not enougth driver support are needed.

4/ Many little studio are waiting for AMD opencl support in blender cycles to build new configs.My studio Too.

PLEASE PLEASE delivere US !

thank a lot Himanshu

0 Likes

Hi Sharly,

As I understand, AMD engineers too share the same technical view that some1 shared above (lot of inlining...)

There is an internal problem report and people are working on it.

So, the issue is being looked at now and we are not going to leave you guys in the cold.

The URL of this thread is under track and I will get back and post an update --whenever it is available.

Things will turn around.

Meantime, we truly appreciate your support and patience on this,

0 Likes

Thanks a lot Himanshu.

Today I've make some research to see in deeply way why AMD card don't work with many complex renderer:

VRay

Octane

INDIGO

Cycles

Centileo

ARION

the reason seems to bee more than a simple opencl driver bug (optimisation).The Hadware too, need more improvement .and opencl is more difficult to set than Cuda.

look at the last answer down of the post from "grimm"

http://render.otoy.com/forum/viewtopic.php?f=25&t=28207

Brecht said that they need more hardware optimisation to:

http://wiki.blender.org/index.php/Dev:2.6/Source/Render/Cycles/OpenCL

<< We will need major driver or hardware improvements to get full cycles support on AMD hardware>>

if one days all theses renderer are full supported By AMD I will celebrate with lot of CG one my PC.

Wait and see maybe all will be fix soon.

0 Likes

the reason seems to bee more than a simple opencl driver bug (optimisation).The Hadware too, need more improvement .and opencl is more difficult to set than Cuda.

Is there a reason why you say "hardware improvement" is needed?

I don't see NVIDIA hardware radically different from AMD hardware.

Except for the dynamic parallelism, possibly aimed at imbalanced workload - I dont see any major differences.

In fact AMD hardware deliver much more GFLOPs and memory bandwidth than NVIDIA.

Please share with us any hardware improvements - that you think is necessary from AMD's side.

The big-kernel and function inlining are the chief problms - that result in "insufficient private resources" message.

People are working to fix this. If this is fixed, i have very little doubts about AMD OpenCL running these renders as good as anybody else, if not better.

0 Likes

I also believe no hardware improvements is needed.

AMD's approach is very good at it's hardware level. On a personal note: For me, it is always more stable.

About it's processor power on GPGPU, aplications that OpenCL does work (simpler ones), results are as good as nVidia and Intel, if not better, as said. This could be verified easily, with LuxMark and other apps. And that is keeping good performance also at graphical levels (gaming).

AMD's issue is the way their OpenCL compiler works, from my point of view.

I don't know if Himanshu is not telling us , but I believe is that for AMD driver to be able to handle complex OpenCL codes, like Cycles and other render engines, it's going to need MAJOR changes, things that *might* take months .

From my personal experience, AMD has better hardware, drivers and support than any other company out there. I think that the issue with Cycles is now being repassed to proper Support and Engineers, as Himanshu tell us. That's why it could have gone so long without being fixed.

Please keep us updated about the status on this matter Himanshu. I can guarantee you there are tons of people out there wanting this to happen

Regards, Marcelo.

0 Likes

Hello guy ! I'm fine!

But I see that it make month and there no news about our matter.So I want to suggest something.

To help us and all the CG community wait we must get some little info about how days after days people are working around to find solution.little info like comment on a work in progress.

infos between AMD devs and BF (Blender foundation) collaboration.some news about prototype test or more little fings like Roadmap.

pleae don't worry about this I only want to trust in AMD future.

Sharly,

I can guarantee you that active work is happening here. Fixing cycles involves work at both OpenCL compiler level and also in layers beneath it.

The work is pretty involved and will take a fair amount of time.

I have not got any timelines from AMD engineers. But it looks like this is going to take a while.

Please bear with us.

0 Likes

As I understand problem is that all function calls are inlined. But there exist CALL instruction http://developer.amd.com/wordpress/media/2012/10/R600-R700-Evergreen_Assembly_Language_Format.pdf so it just compiler problem.

0 Likes

I'M Very Happy if it's only opencl code matter very happy ! Thanks you Guy for clarification.

I'm newbie in hardware Arch way and my first opinion about AMD device since GCN is that they have solid hardware but some little matter of driver support that can change radically how people see them on the market.however in my previous post I've simply report BRECHT VAN LOMMEL words and the link from wiki.

Thank for help us.

0 Likes

Hey there,
I read this thread and im very happy that something is going on at this problem. I'm planning to buy a graphics card in the near future and I would love seing the AMD drivers working with Cycles. I'm curiosly observing this thread every day. If the compiler will work with Cycles I'm definitely going for an AMD Card
Just wanted to let you know that there are other people out there craving for a solution! Thanks for the efforts!

Hi All,

AMD Engineers are working hard on making blender (and many other similar softwares) to work on AMD GPUs. I will post here  if there is any progress.

I don't know if helps but I want to tell my experience with the Latest Beta Driver 13.3

The card AMD Radeon HD 7570 no longer detects OpenCL

The Intel (R) HD Graphics 4000 is now detecting OpenCL (instead of AMD)

Smalllux GPU (another renderer that works with OpenCL) is still working perfectly.

Attached a picture with the strangeness.

OpenCL Error 2,1.png

0 Likes

Hi Germano,

Thanks for reporting. I have forwarded the issue to the driver team. Hopefully, it will get fixed.

0 Likes

If this gets fixed, will Blender Cycles work on present AMD cards or will it anyway only be fixed in the next generation?

0 Likes

As I understand from AMD engineers, this is basically software-stack issue. So, most likely that after the fix, things should work.

But I cannot guarantee this.

Could you please give us an update on progress made so far, if any? It's been almost a month since the last update now, and I would like to know more.

Hi kylen,

I would request you to wait for the next driver release. As of now I cannot give any further information.

0 Likes

great news all friends! after many try and retry session and moving  from one render engine to another I finally find the holy grail of lightspeed rendering under AMD GCN GPU.of course i try indigo render RT / Luxrender / and finally SLG. but who's the winner.

I run the same scène under each render engine and for the same quality (less noisy image) i've got :

Luxrender : 8 hours ( in hybridpath cpu+gpu) with sharp fireflies

INDIGO RT: 6 hours (hybridpath too)

SmallluxGPU 3.0:  5 min ( pathocl mod  Full Gpu)

I7 2600/HD 7950 royalking/16gb ddr3.

all the test are mad with only one of the GPU because i must change my motherboard and my power supply before setting the second HD 7950 in the beast.

I will post rendered image and tutorial about SLG later. but i think every AMD user must try it.very awesome.

for example the HD 7950 is 6X faster than my leadtek GTX 560 non ti.and 15X faster than my i7 2600.

0 Likes

I love you man.

Go here: http://www.luxrender.net/en_GB/standalone

Download the OpenCL archive for your operating system. Extract that somewhere you can easily find it, you'll need to find it again in Blender.

Now open up Blender. "File" -> "User Preferences" (Control + Alt + U) -> "Addons" tab -> "Install from file" (at the bottom).

Browse to the folder you just extracted, select the zip file.

My version is called "LuxBlend26_1.2.1_64bit_OpenCL.zip"

Now make sure it is enabled by making sure that it is ticked. If it does not show up automagically, click on the "Render" button at the left. "LuxRender" will show up in the list.

When that is done, do the same, but with "render_smallluxgpu.py". Make sure that is ticked as well.

Next, in Blender, at the top, next to the "Scene" textbox, change the render engine to "SmallLuxGPU"

At the right, in the "Render" tab, under "Full path to SmallLuxGPU's executable", enter the the path to "LuxRender_64_OpenCL\slg3.exe".

Next, under "Full path where the scene is exported", enter the path for the exported scenes to be stored. Entering "//" here will store the exported files in the directory of your Blender .blend.

If the "Rendering Type" is not yet set to "PathOCL", make sure it is. You could also try other "Rendering Types".

Now render to your heart's content!

Should you get any error messages, make sure you have installed the AMD APP SDK, which you can find here: http://developer.amd.com/tools-and-sdks/heterogeneous-computing/amd-accelerated-parallel-processing-...

Any other error messages, you can Google around.

Edit: you have to add a sun lamp though: Shift+A -> Lamp -> Sun (S). In the "Object Data" on the right (sun icon), make sure "Sky" is enabled.

0 Likes

thank Kylen! Now we must spread the News.. I think that AMD must use Smalluxgpu as an opencl promoting software as they do with luxmark Blender(compositing).

0 Likes
ridgerunner
Journeyman III

I really hope this issue gets resolved soon. I'm getting ready to replace my amd radeon hd 4870 (It's getting long in the tooth). One of the things that will effect my next purchase for a video card will be Cycles support (I might be forced to go with nvidia, I don't like that idea). Anyway, thanks for all the effort so far guys!

oh my god if only this get fixed soon imagine what event it will be on CG community. for it will be the time to build a beast powered by AMD. what great deal i dream every days about it.

1 W8000    for    display  large wide heavy 3D scene from blender an 3DSMAX

2 HD8990   for    very very fast rendering  on cycles octane indigo arion LUXRENDER oh my god

1 cpu            the best of AMD

my employer are waiting for this to change every config in our studio.

0 Likes

finally i've got two HD 7950 from club 3D for luxrender and try to give a chance to AMD device. I hope that I will never regret it. already i 'm in a testing stage. will send feedback later. if this experimentation outperform my old blender + cycles pippeline it will be great.

blender+luxrender under : I7 2600 /16 gb ddr3/2X HD 7950

Hi, I'm new here but I've been following this thread since summer.

To sharlybg, I've noticed that you keep mentioning Octane Render, and it seems to me you are waiting for AMD to fix their problems with OpenCL so you can use Octane. Is my assumption correct?

But I'm sorry but AMD cannot do anything for you right now in that matter.  Octane Render is a CUDA software, and it requires an nvidia card.

The are three ways you can get Octane to work on your AMD GPUs. One is if AMD decided to support CUDA, which I don't think will happen.  And the second is if somebody makes a working Windows build of gpuocelot, but it looks like they are having problems building for Windows.  Anybody here know about it?

And the last way is if Octane devs at Otoy decide to support OpenCL. 

0 Likes

you dn't have to wait too long to render what you want under AMD graphique card. this is one of my final render with the HD 7950 from club 3d royal king.follow the steps described by Kylen to install Smallluxgpu.And enjoy your powerfull GCN GPU.I'AM Very happy today.Thanks to AMD. if you dn't believe us try Luxmark 2.0.

http://sdz-upload.s3.amazonaws.com/prod/upload/00093%20%288%29.jpg

0 Likes