cancel
Showing results for 
Search instead for 
Did you mean: 

Archives Discussions

npm1
Adept II

What's wrong with this file?

THIS CODE IS FROM A PROGRAM CALLED BLENDER CYCLES...IT WORKS...BUT DOESN'T WORK WITH THE AMD OPENCL COMPILER,...IN THE BLENDER COMMUNITY SOME BELIEVE THAT THIS CODE IS PROGRAMMED USING CUDA SYNTAX, OTHERS BELIEVE THAT THIS CODE IS GENERIC ENOUGH FOR BOTH OPENCL AND CUDA,....PLEASE MAKE YOUR JUDGEMENT!!!

#ifndef __KERNEL_TYPES_H__

#define __KERNEL_TYPES_H__

#include "kernel_math.h"

#include "svm/svm_types.h"

CCL_NAMESPACE_BEGIN

/* constants */

#define OBJECT_SIZE                     16

#define LIGHT_SIZE                              4

#define FILTER_TABLE_SIZE          256

#define RAMP_TABLE_SIZE                    256

/* device capabilities */

#ifdef __KERNEL_CPU__

#define __KERNEL_SHADING__

#define __KERNEL_ADV_SHADING__

#endif

#ifdef __KERNEL_CUDA__

#define __KERNEL_SHADING__

#if __CUDA_ARCH__ >= 200

#define __KERNEL_ADV_SHADING__

#endif

#endif

#ifdef __KERNEL_OPENCL__

#define __KERNEL_SHADING__

#define __KERNEL_ADV_SHADING__

#endif

/* kernel features */

#define __SOBOL__

#define __INSTANCING__

#define __DPDU__

#define __UV__

#define __BACKGROUND__

#define __CAUSTICS_TRICKS__

#define __VISIBILITY_FLAG__

#define __RAY_DIFFERENTIALS__

#define __CAMERA_CLIPPING__

#define __INTERSECTION_REFINE__

#define __CLAMP_SAMPLE__

#ifdef __KERNEL_SHADING__

#define __SVM__

#define __EMISSION__

#define __TEXTURES__

#define __HOLDOUT__

#endif

#ifdef __KERNEL_ADV_SHADING__

#define __MULTI_CLOSURE__

#define __TRANSPARENT_SHADOWS__

#define __PASSES__

#define __BACKGROUND_MIS__

#define __AO__

#endif

#define __MULTI_LIGHT__

#define __OSL__

#define __SOBOL_FULL_SCREEN__

#define __MODIFY_TP__

#define __QBVH__

/* Shader Evaluation */

enum ShaderEvalType {

          SHADER_EVAL_DISPLACE,

          SHADER_EVAL_BACKGROUND

};

/* Path Tracing */

enum PathTraceDimension {

          PRNG_FILTER_U = 0,

          PRNG_FILTER_V = 1,

          PRNG_LENS_U = 2,

          PRNG_LENS_V = 3,

          PRNG_BASE_NUM = 4,

          PRNG_BSDF_U = 0,

          PRNG_BSDF_V = 1,

          PRNG_BSDF = 2,

          PRNG_LIGHT = 3,

          PRNG_LIGHT_U = 4,

          PRNG_LIGHT_V = 5,

          PRNG_LIGHT_F = 6,

          PRNG_TERMINATE = 7,

          PRNG_BOUNCE_NUM = 8

};

/* these flag values correspond exactly to OSL defaults, so be careful not to

* change this, or if you do, set the "raytypes" shading system attribute with

* your own new ray types and bitflag values.

*

* for ray visibility tests in BVH traversal, the upper 20 bits are used for

* layer visibility tests. */

enum PathRayFlag {

          PATH_RAY_CAMERA = 1,

          PATH_RAY_REFLECT = 2,

          PATH_RAY_TRANSMIT = 4,

          PATH_RAY_DIFFUSE = 8,

          PATH_RAY_GLOSSY = 16,

          PATH_RAY_SINGULAR = 32,

          PATH_RAY_TRANSPARENT = 64,

          PATH_RAY_SHADOW_OPAQUE = 128,

          PATH_RAY_SHADOW_TRANSPARENT = 256,

          PATH_RAY_SHADOW = (PATH_RAY_SHADOW_OPAQUE|PATH_RAY_SHADOW_TRANSPARENT),

          PATH_RAY_MIS_SKIP = 512,

          PATH_RAY_ALL = (1|2|4|8|16|32|64|128|256|512),

          PATH_RAY_LAYER_SHIFT = (32-20)

};

/* Closure Label */

typedef enum ClosureLabel {

          LABEL_NONE = 0,

          LABEL_CAMERA = 1,

          LABEL_LIGHT = 2,

          LABEL_BACKGROUND = 4,

          LABEL_TRANSMIT = 8,

          LABEL_REFLECT = 16,

          LABEL_VOLUME = 32,

          LABEL_OBJECT = 64,

          LABEL_DIFFUSE = 128,

          LABEL_GLOSSY = 256,

          LABEL_SINGULAR = 512,

          LABEL_TRANSPARENT = 1024,

          LABEL_STOP = 2048

} ClosureLabel;

/* Render Passes */

typedef enum PassType {

          PASS_NONE = 0,

          PASS_COMBINED = 1,

          PASS_DEPTH = 2,

          PASS_NORMAL = 8,

          PASS_UV = 16,

          PASS_OBJECT_ID = 32,

          PASS_MATERIAL_ID = 64,

          PASS_DIFFUSE_COLOR = 128,

          PASS_GLOSSY_COLOR = 256,

          PASS_TRANSMISSION_COLOR = 512,

          PASS_DIFFUSE_INDIRECT = 1024,

          PASS_GLOSSY_INDIRECT = 2048,

          PASS_TRANSMISSION_INDIRECT = 4096,

          PASS_DIFFUSE_DIRECT = 8192,

          PASS_GLOSSY_DIRECT = 16384,

          PASS_TRANSMISSION_DIRECT = 32768,

          PASS_EMISSION = 65536,

          PASS_BACKGROUND = 131072,

          PASS_AO = 262144,

          PASS_SHADOW = 524288

} PassType;

#define PASS_ALL (~0)

#ifdef __PASSES__

typedef float3 PathThroughput;

typedef struct PathRadiance {

          int use_light_pass;

          float3 emission;

          float3 background;

          float3 ao;

          float3 indirect;

          float3 direct_throughput;

          float3 direct_emission;

          float3 color_diffuse;

          float3 color_glossy;

          float3 color_transmission;

          float3 direct_diffuse;

          float3 direct_glossy;

          float3 direct_transmission;

          float3 indirect_diffuse;

          float3 indirect_glossy;

          float3 indirect_transmission;

          float4 shadow;

} PathRadiance;

typedef struct BsdfEval {

          int use_light_pass;

          float3 diffuse;

          float3 glossy;

          float3 transmission;

          float3 transparent;

} BsdfEval;

#else

typedef float3 PathThroughput;

typedef float3 PathRadiance;

typedef float3 BsdfEval;

#endif

/* Shader Flag */

typedef enum ShaderFlag {

          SHADER_SMOOTH_NORMAL = (1 << 31),

          SHADER_CAST_SHADOW = (1 << 30),

          SHADER_AREA_LIGHT = (1 << 29),

          SHADER_MASK = ~(SHADER_SMOOTH_NORMAL|SHADER_CAST_SHADOW|SHADER_AREA_LIGHT)

} ShaderFlag;

/* Light Type */

typedef enum LightType {

          LIGHT_POINT,

          LIGHT_DISTANT,

          LIGHT_BACKGROUND,

          LIGHT_AREA,

          LIGHT_AO

} LightType;

/* Camera Type */

enum CameraType {

          CAMERA_PERSPECTIVE,

          CAMERA_ORTHOGRAPHIC,

          CAMERA_ENVIRONMENT

};

/* Differential */

typedef struct differential3 {

          float3 dx;

          float3 dy;

} differential3;

typedef struct differential {

          float dx;

          float dy;

} differential;

/* Ray */

typedef struct Ray {

          float3 P;

          float3 D;

          float t;

#ifdef __RAY_DIFFERENTIALS__

          differential3 dP;

          differential3 dD;

#endif

} Ray;

/* Intersection */

typedef struct Intersection {

          float t, u, v;

          int prim;

          int object;

} Intersection;

/* Attributes */

typedef enum AttributeElement {

          ATTR_ELEMENT_FACE,

          ATTR_ELEMENT_VERTEX,

          ATTR_ELEMENT_CORNER,

          ATTR_ELEMENT_VALUE,

          ATTR_ELEMENT_NONE

} AttributeElement;

/* Closure data */

#define MAX_CLOSURE 8

typedef struct ShaderClosure {

          ClosureType type;

          float3 weight;

#ifdef __MULTI_CLOSURE__

          float sample_weight;

#endif

#ifdef __OSL__

          void *prim;

#else

          float data0;

          float data1;

#endif

} ShaderClosure;

/* Shader Data

*

* Main shader state at a point on the surface or in a volume. All coordinates

* are in world space. */

enum ShaderDataFlag {

          /* runtime flags */

          SD_BACKFACING = 1,                    /* backside of surface? */

          SD_EMISSION = 2,                    /* have emissive closure? */

          SD_BSDF = 4,                              /* have bsdf closure? */

          SD_BSDF_HAS_EVAL = 8,          /* have non-singular bsdf closure? */

          SD_BSDF_GLOSSY = 16,          /* have glossy bsdf */

          SD_HOLDOUT = 32,                    /* have holdout closure? */

          SD_VOLUME = 64,                              /* have volume closure? */

          /* shader flags */

          SD_SAMPLE_AS_LIGHT = 128,                              /* direct light sample */

          SD_HAS_SURFACE_TRANSPARENT = 256,          /* has surface transparency */

          SD_HAS_VOLUME = 512,                                        /* has volume shader */

          SD_HOMOGENEOUS_VOLUME = 1024                    /* has homogeneous volume */

};

typedef struct ShaderData {

          /* position */

          float3 P;

          /* smooth normal for shading */

          float3 N;

          /* true geometric normal */

          float3 Ng;

          /* view/incoming direction */

          float3 I;

          /* shader id */

          int shader;

          /* booleans describing shader, see ShaderDataFlag */

          int flag;

          /* primitive id if there is one, ~0 otherwise */

          int prim;

          /* parametric coordinates

           * - barycentric weights for triangles */

          float u, v;

          /* object id if there is one, ~0 otherwise */

          int object;

#ifdef __RAY_DIFFERENTIALS__

          /* differential of P. these are orthogonal to Ng, not N */

          differential3 dP;

          /* differential of I */

          differential3 dI;

          /* differential of u, v */

          differential du;

          differential dv;

#endif

#ifdef __DPDU__

          /* differential of P w.r.t. parametric coordinates. note that dPdu is

           * not readily suitable as a tangent for shading on triangles. */

          float3 dPdu, dPdv;

#endif

#ifdef __MULTI_CLOSURE__

          /* Closure data, we store a fixed array of closures */

          ShaderClosure closure[MAX_CLOSURE];

          int num_closure;

          float randb_closure;

#else

          /* Closure data, with a single sampled closure for low memory usage */

          ShaderClosure closure;

#endif

#ifdef __OSL__

          /* OSL context */

          void *osl_ctx;

#endif

} ShaderData;

/* Constrant Kernel Data

*

* These structs are passed from CPU to various devices, and the struct layout

* must match exactly. Structs are padded to ensure 16 byte alignment, and we

* do not use float3 because its size may not be the same on all devices. */

typedef struct KernelCamera {

          /* type */

          int type;

          int pad1, pad2, pad3;

          /* matrices */

          Transform cameratoworld;

          Transform rastertocamera;

          /* differentials */

          float4 dx;

          float4 dy;

          /* depth of field */

          float aperturesize;

          float blades;

          float bladesrotation;

          float focaldistance;

          /* motion blur */

          float shutteropen;

          float shutterclose;

          /* clipping */

          float nearclip;

          float cliplength;

          /* more matrices */

          Transform screentoworld;

          Transform rastertoworld;

          Transform ndctoworld;

          Transform worldtoscreen;

          Transform worldtoraster;

          Transform worldtondc;

          Transform worldtocamera;

} KernelCamera;

typedef struct KernelFilm {

          float exposure;

          int pass_flag;

          int pass_stride;

          int use_light_pass;

          int pass_combined;

          int pass_depth;

          int pass_normal;

          int pass_pad;

          int pass_uv;

          int pass_object_id;

          int pass_material_id;

          int pass_diffuse_color;

          int pass_glossy_color;

          int pass_transmission_color;

          int pass_diffuse_indirect;

          int pass_glossy_indirect;

          int pass_transmission_indirect;

          int pass_diffuse_direct;

          int pass_glossy_direct;

          int pass_transmission_direct;

          int pass_emission;

          int pass_background;

          int pass_ao;

          int pass_shadow;

} KernelFilm;

typedef struct KernelBackground {

          /* only shader index */

          int shader;

          int transparent;

          /* ambient occlusion */

          float ao_factor;

          float ao_distance;

} KernelBackground;

typedef struct KernelSunSky {

          /* sun direction in spherical and cartesian */

          float theta, phi, pad3, pad4;

          /* perez function parameters */

          float zenith_Y, zenith_x, zenith_y, pad2;

          float perez_Y[5], perez_x[5], perez_y[5];

          float pad5;

} KernelSunSky;

typedef struct KernelIntegrator {

          /* emission */

          int use_direct_light;

          int use_ambient_occlusion;

          int num_distribution;

          int num_all_lights;

          float pdf_triangles;

          float pdf_lights;

          int pdf_background_res;

          /* bounces */

          int min_bounce;

          int max_bounce;

          int max_diffuse_bounce;

          int max_glossy_bounce;

          int max_transmission_bounce;

          /* transparent */

          int transparent_min_bounce;

          int transparent_max_bounce;

          int transparent_shadows;

          /* caustics */

          int no_caustics;

          /* seed */

          int seed;

          /* render layer */

          int layer_flag;

          /* clamp */

          float sample_clamp;

          /* padding */

          int pad;

} KernelIntegrator;

typedef struct KernelBVH {

          /* root node */

          int root;

          int attributes_map_stride;

          int pad1, pad2;

} KernelBVH;

typedef struct KernelData {

          KernelCamera cam;

          KernelFilm film;

          KernelBackground background;

          KernelSunSky sunsky;

          KernelIntegrator integrator;

          KernelBVH bvh;

} KernelData;

CCL_NAMESPACE_END

#endif /*  __KERNEL_TYPES_H__ */

96 Replies

Thanks sharlybg, for sharing your experiences. I hope smalluxrender will be helping a lot of people to accelerate their rendering. Also work to enable cycles is also under progress. Hopefully cycles will also work some time in near future.

0 Likes

Thank you for looking into accelerating cycles. Luxrender is not sufficiently integrated into blender to make GPU acceleration an added-value experience (no live preview, which is the real gamechanger in an editing workflow).

I had to trade my AMD 7970s for NVIDIA 580s because of this issue. I lost a ton of benchmark score and AMD lost a high margin customer, it was a sad day all around. I hope this issue gets fixed before I go shopping again: the new macpro looks awesome, but it has AMD cards...

Thank you, for the taking this issue seriously! Still, is there any possibility, that one of the responsible developers give us a status report and a technical insight of the problem (and if they think / know, whether it's a software or a hardware limitation)? That would really help to bridge the time.

0 Likes

AFAIK even Evergreen GPU family (5xxx) has support for function call so it is limitation of compiler.

0 Likes
sachsproject
Journeyman III

It is great to see that the OpenCL issue with Blender is being looked into.  For interest, I have a Firepro V7900 which I will be testing once we have a solution, it will be interesting to see how the workstation and consumer cards match up.  Many times I have considered ditching this card and going for a GTX 580 or a mid range Quadro but have held out because of the benchmarks I've seen with this card.  A solution can't come quick enough!

Thank you Himanshu and team for your work.

sharlybg
Adept II

No news again? waiting become too long without bit of news.yes i've get smallux work for me but it'is only me, and this render engine is features limited compared to Vray octane cycles and all this soft where artist want acceleration with AMD cards.some of us get completely discourage look at this ( Amd/ati Opencl+ Blenderheads+blender Cycles=harmony | Facebook ). is it so hard to solve this issues ?

Now AMD is speaking about HSA for APUs and next HD 9000 volcanics island does it mean that radeon 7XXX series will never get cycles vray octane work ?

why AMD why why ? we need true answer ?

tommie
Journeyman III

No updates yet?
I am beginning to doubt these developers :S pretending to be working on this problem!? why dont we see any updates? I spend over 3000$ into your company, recommended you to friends and never had any trouble using your products. Happy customer is losing faith here! does anyone care? AMD? no, really not?

Thanks for caring.

0 Likes
sdar
Journeyman III

Thanks, i had beginning to doubt that the developers were working on this but now i realized that indeed they're working on it and it seems it's going to run soon.

better late than never, thank you amd.

0 Likes

Well, it's semi-working with Windows 8.1 OpenGL 4.3 Beta preview driver. It compiles, but the final render has some serious artifacts. The latest OpenGL 4.3 13.8 Beta driver (released on 1st August), is a step back again: the kernel fails at compiling ("Error 103, insufficient resources").

0 Likes

0 Likes

I am expecting that next SDK version big feature will be compilation of big kernels.

0 Likes

Good to keep waiting for a miracle at this point!.
Much catalyst 13.8 but still awaiting solutions. Amd can spend some report or something at least as they are with blender cycles. As Brecht van lommel know is trying to create a compiler for OpenCL amd works with smaller blocks this will also help the nvidia graphics render very large files.
I leave a render engine to see.
wn

youtube
Siyana Renderer v0.5
http://www.youtube.com/watch?v=S3y8lY9vSMU SaveFrom.net SaveFrom.net
Siyana Renderer v0.4
http://www.youtube.com/watch?v=pDrg3tHDQHA SaveFrom.net SaveFrom.net

Web
http://jvillella.com/wp/?p=546#more-546

Downloard
https://code.google.com/p/siyana-render ... akechanges

Info
http://graphics.stanford.edu/papers/i3d ... kd-i3d.pdf

Seeing that LuxRender are having serious problems and are discouraged with amd I think deveria to seek new solutions urgently. There is also an application online that have been made for amd took some desicion and urgently!

https://www.change.org/petitions/advanced-micro-devices-fix-bugs-in-opencl-compiler-drivers-and-even...

____________

Español

Bueno a seguir esperando algún milagro a estas alturas!.
Mucho catalyst 13.8 pero las soluciones siguen en espera. Amd puede pasar algun informe o algo por lo menos como estan con blender cycles. Por lo que se sabe Brecht van lommel esta tratando de crear un compilador para amd opencl que funciona con bloques mas pequeños esto tambien ayudara a las graficas nvidia en archivos de render muy grandes.
Les dejo un motor de render para que vean.

0 Likes

Thanks for sharing your work here

As of now I can just assure that AMD is taking blender issue very seriously. We hope to make blender work ASAP.

No news so far?

0 Likes

Any progress?

0 Likes

Ton Roosendaal (Blender Foundation Chairman and Developer) just tweeted: Twitter / tonroosendaal: Mail of day: Apple is getting ... Now, Apple is on the boat aswell. This can only speed things up. Guys, I think we can expect Cycles Support VERY soon! New MacPro's are rumored to come out somewhere between September and November. Finally, this looks very promising!

0 Likes

oooOh god if this happen .........?

i will stop gtx 680 command and buy the next volcanic island gpu.

0 Likes