Archives Discussions

Andreas999 · ‎10-02-2008

Hi,

I wrote a Mandelbrot shader with a fixed point arithmetic for a higher precision. Here is my code:

varying vec2 position;
//uniform sampler2D myColorTable;

uniform float maxIter;
uniform vec4 deltaVectorX1;
uniform vec4 deltaVectorX2;
uniform vec4 deltaVectorY1;
uniform vec4 deltaVectorY2;
uniform vec4 lowerLeftCornerX1;
uniform vec4 lowerLeftCornerX2;
uniform vec4 lowerLeftCornerY1;
uniform vec4 lowerLeftCornerY2;

//1/1024.0=0.0009765625
// the float mantissa has only 23 bits => number->number1 should be enough
void convertToBigSize(in float number,out vec4 number1, out vec4 number2){
    number1=vec4(0.0);
    number2=vec4(0.0);
    float signum=sign(number);
    number=abs(number);
    float temp=floor(number);
    number1.r=temp;
    number-=temp;
    number*=1024.0;
    temp=floor(number);
    number1.g=temp;
    number-=temp;
    number*=1024.0;
    temp=floor(number);
    number1.b=temp;
    number-=temp;
    number*=1024.0;
    temp=floor(number);
    number1.a=temp;
    number1*=signum;
}

//41 Ops
void add(inout vec4 a1, inout vec4 a2, in vec4 b1, in vec4 b2){
    a1+=b1;
    a2+=b2;
    //the values after the point have to be greater than 0.0
    a1.r-=2.0;//-2
    a2.a+=2048.0;
    a1.gba+=2046.0;
    a2.rgb+=2046.0;

    float temp=floor(a2.a*0.0009765625);
    a2.b+=temp;
    a2.a-=temp*1024.0;

    temp=floor(a2.b*0.0009765625);
    a2.g+=temp;
    a2.b-=temp*1024.0;

    temp=floor(a2.g*0.0009765625);
    a2.r+=temp;
    a2.g-=temp*1024.0;

    temp=floor(a2.r*0.0009765625);
    a1.a+=temp;
    a2.r-=temp*1024.0;

    temp=floor(a1.a*0.0009765625);
    a1.b+=temp;
    a1.a-=temp*1024.0;

    temp=floor(a1.b*0.0009765625);
    a1.g+=temp;
    a1.b-=temp*1024.0;

    temp=floor(a1.g*0.0009765625);
    a1.r+=temp;
    a1.g-=temp*1024.0;

}
//44+30+63=137ops
void mult(inout vec4 a1, inout vec4 a2, in vec4 b1, in vec4 b2){
    vec4 z11=a1*b1.r;
    vec4 z12=a2*b1.r;
    vec4 z21=a1*b1.g;
    vec4 z22=a2*b1.g;
    vec4 z31=a1*b1.b;
    vec4 z32=a2*b1.b;
    vec4 z41=a1*b1.a;
    vec4 z42=a2*b1.a;
    vec4 z51=a1*b2.r;
    vec4 z52=a2*b2.r;
    vec4 z61=a1*b2.g;
    vec4 z62=a2*b2.g;
    vec4 z71=a1*b2.b;
    vec4 z72=a2*b2.b;
    vec4 z81=a1*b2.a;
    vec4 z82=a2*b2.a;
    vec3 temp2=z82.gba;
    temp2.rg+=z72.ba;
    temp2.r+=z62.a;
    vec4 temp1=z52;
    temp1.a+=z82.r;
    temp1.rgb+=z81.gba;
    temp1.ba+=z72.rg;
    temp1.rg+=z71.ba;
    temp1.gba+=z62.rgb;
    temp1.r+=z61.a;
    temp1.rgb+=z42.gba;
    temp1.rg+=z32.ba;
    temp1.r+=z22.a;
    a2=z51;
    a2.a+=z81.r;
    a2.ba+=z71.rg;
    a2.gba+=z61.rgb;
    a2.a+=z42.r;
    a2.rgb+=z41.gba;
    a2.ba+=z32.rg;
    a2.rg+=z31.ba;
    a2.gba+=z22.rgb;
    a2.r+=z21.a;
    a2+=z12;
    a1=z11;
    a1.gba+=z21.rgb;
    a1.ba+=z31.rg;
    a1.a+=z41.r;

    float tempSign=sign(temp2.g);
    float temp=floor(abs(temp2.g)*0.0009765625);
    temp2.r+=temp*tempSign;
    tempSign=sign(temp2.r);
    temp=floor(abs(temp2.r)*0.0009765625);
    temp1.a+=temp*tempSign;
    tempSign=sign(temp1.a);
    temp=floor(abs(temp1.a)*0.0009765625);
    temp1.b+=temp*tempSign;
    tempSign=sign(temp1.b);
    temp=floor(abs(temp1.b)*0.0009765625);
    temp1.g+=temp*tempSign;
    tempSign=sign(temp1.g);
    temp=floor(abs(temp1.g)*0.0009765625);
    temp1.r+=temp*tempSign;
    tempSign=sign(temp1.r);
    temp=floor(abs(temp1.r)*0.0009765625);
    a2.a+=temp*tempSign;

    tempSign=sign(a2.a);
    temp=floor(abs(a2.a)*0.0009765625);
    a2.b+=temp*tempSign;
    a2.a-=temp*1024.0*tempSign;

    tempSign=sign(a2.b);
    temp=floor(abs(a2.b)*0.0009765625);
    a2.g+=temp*tempSign;
    a2.b-=temp*1024.0*tempSign;

    tempSign=sign(a2.g);
    temp=floor(abs(a2.g)*0.0009765625);
    a2.r+=temp*tempSign;
    a2.g-=temp*1024.0*tempSign;

    tempSign=sign(a2.r);
    temp=floor(abs(a2.r)*0.0009765625);
    a1.a+=temp*tempSign;
    a2.r-=temp*1024.0*tempSign;

    tempSign=sign(a1.a);
    temp=floor(abs(a1.a)*0.0009765625);
    a1.b+=temp*tempSign;
    a1.a-=temp*1024.0*tempSign;

    tempSign=sign(a1.b);
    temp=floor(abs(a1.b)*0.0009765625);
    a1.g+=temp*tempSign;
    a1.b-=temp*1024.0*tempSign;

    tempSign=sign(a1.g);
    temp=floor(abs(a1.g)*0.0009765625);
    a1.r+=temp*tempSign;
    a1.g-=temp*1024.0*tempSign;

}

/*//only for test purposes
float convertToFloat(in vec4 X1, in vec4 X2){
    float number=X1.r;
    float divi=1024.0;
    number+=X1.g/divi;
    divi*=1024.0;
    number+=X1.b/divi;
    divi*=1024.0;
    number+=X1.a/divi;
    divi*=1024.0;
    number+=X2.r/divi;
    divi*=1024.0;
    number+=X2.g/divi;
    divi*=1024.0;
    number+=X2.b/divi;
    divi*=1024.0;
    number+=X2.a/divi;
    return number;
}*/

//a=a+b-4, 41 Ops
void add2(inout vec4 a1, inout vec4 a2, in vec4 b1, in vec4 b2){
    a1+=b1;
    a2+=b2;
    //the values after the point have to be smaller than 0.0
    a1.r-=2.0;//-4+2
    a2.a-=2048.0;
    a1.gba-=2046.0;
    a2.rgb-=2046.0;

    float temp=ceil(a2.a*0.0009765625);
    a2.b+=temp;
    a2.a-=temp*1024.0;

    temp=ceil(a2.b*0.0009765625);
    a2.g+=temp;
    a2.b-=temp*1024.0;

    temp=ceil(a2.g*0.0009765625);
    a2.r+=temp;
    a2.g-=temp*1024.0;

    temp=ceil(a2.r*0.0009765625);
    a1.a+=temp;
    a2.r-=temp*1024.0;

    temp=ceil(a1.a*0.0009765625);
    a1.b+=temp;
    a1.a-=temp*1024.0;

    temp=ceil(a1.b*0.0009765625);
    a1.g+=temp;
    a1.b-=temp*1024.0;

    temp=ceil(a1.g*0.0009765625);
    a1.r+=temp;
    a1.g-=temp*1024.0;

    //a1.r=floor(a1.r);
    //a1.g=floor(a1.g);
}

void main ()
{
    //position=lowerLeftCorner+(gl_Position.xy+vec2(1.0,1.0))*deltaVector;
    vec4 tempPositionX1=vec4(0.0);
    vec4 tempPositionX2=vec4(0.0);
    vec4 tempPositionY1=vec4(0.0);
    vec4 tempPositionY2=vec4(0.0);

    vec2 tmpPosition=position+1.0;
    convertToBigSize(tmpPosition.r,tempPositionX1, tempPositionX2);
    convertToBigSize(tmpPosition.g,tempPositionY1, tempPositionY2);

    mult(tempPositionX1,tempPositionX2, deltaVectorX1, deltaVectorX2);
    mult(tempPositionY1,tempPositionY2, deltaVectorY1, deltaVectorY2);
    vec4 positionX1=lowerLeftCornerX1;
    vec4 positionX2=lowerLeftCornerX2;
    vec4 positionY1=lowerLeftCornerY1;
    vec4 positionY2=lowerLeftCornerY2;
    add(positionX1, positionX2, tempPositionX1, tempPositionX2);
    add(positionY1, positionY2, tempPositionY1, tempPositionY2);
    //z=position
    vec4 zX1=positionX1;
    vec4 zX2=positionX2;
    vec4 zY1=positionY1;
    vec4 zY2=positionY2;
    gl_FragColor = vec4(1.0,1.0,1.0,1.0);


    //vec2 tempZ=z*z;

    vec4 tempZX1=zX1;
    vec4 tempZX2=zX2;
    vec4 tempZY1=zY1;
    vec4 tempZY2=zY2;
    mult(tempZX1, tempZX2, zX1, zX2);
    mult(tempZY1, tempZY2, zY1, zY2);
    vec4 temp1=tempZX1;
    vec4 temp2=tempZX2;

    //tempZ.x+tempZ.y
    add2(temp1,temp2,tempZY1, tempZY2);

    float i=float(0.0);
    //(tempZ.x+tempZ.y)<=4 equivalent to tempZ.x+tempZ.y-4<=0.0
    while (i<=maxIter && all(lessThanEqual(temp1,vec4(0.0))) && all(lessThanEqual(temp2,vec4(0.0))))
    {
        //z = vec2(tempZ.x - tempZ.y, 2.0*z.x*z.y) + position;

        add(tempZX1, tempZX2, -tempZY1, -tempZY2);

        mult(zY1, zY2, zX1, zX2);
        add(zY1, zY2, zY1, zY2);
        zX1=tempZX1;
        zX2=tempZX2;
        add(zX1, zX2, positionX1, positionX2);
        add(zY1, zY2, positionY1, positionY2);

        //tempZ=z*z;

        tempZX1=zX1;
        tempZX2=zX2;
        tempZY1=zY1;
        tempZY2=zY2;
        mult(tempZX1, tempZX2, zX1, zX2);
        mult(tempZY1, tempZY2, zY1, zY2);

        //tempZ.x+tempZ.y

        temp1=tempZX1;
        temp2=tempZX2;
        add2(temp1,temp2, tempZY1, tempZY2);

        i+=1.0;

    }
    if (i <= maxIter)
        {
            float color=i/maxIter;
            gl_FragColor=vec4(color,color,1.0,1.0);

        }

}

At my HD 2600, it's working fine. But, looking at the gpu shaderanalyzer this program is not compiled for x1x00 cards. They support the shader model 3.0 and I don't understand, why this is not working.

Would be nice, if you could help.

Best,
Andreas

bpurnomo · ‎10-03-2008

It seems that the problem is with the loop condition

(i<=maxIter && all(lessThanEqual(temp1,vec4(0.0))) && all(lessThanEqual(temp2,vec4(0.0))))

If you change the function add2() which modifies temp1 and temp2 to be returning constant then the shader will compile for the older hardware. That is not the fix though since the function will not do what you intend to do. It could be a driver bug for the old hardware or anything.

Andreas999 · ‎10-03-2008

Hi,

thx for your answer. Yes, I got that ,too. But using a simpler version of my program like this:

varying vec2 position;
//uniform sampler2D myColorTable;
uniform float maxIter;
void main ()
{
    //gl_FragColor=vec4(1.0,1.0,0.0,1.0);
    vec2 z = position;
    gl_FragColor = vec4(1.0,1.0,1.0,1.0);
    //if(c.x<0)gl_FragColor = vec4(0.0,1.0,0.0,0.0);

    vec2 tempZ=z*z;
    float i=0.0;
    while (i<=maxIter && (tempZ.x+tempZ.y)<=4.0)
    {

        z = vec2(tempZ.x - tempZ.y, 2.0*z.x*z.y) + position;
        tempZ=z*z;
        i+=1.0;

    }
    if (i <= maxIter)
        {
            float color=i/maxIter;
            gl_FragColor=vec4(color,color,1.0,1.0);
        }


}

all is fine. Then I commented some parts of my loop out and it was accepted by the shader analyzer, even if temp1 and temp2 was changed by the multiplication. I believe, it's a software (=driver) and not a hardware problem. What do you think? I will send that to amd and hope they will update the driver.

Best,
Andreas

Andreas999 · ‎10-03-2008

Hi,

at my first shader, if I used while(i<=maxIter) only and in this loop

bool result=false;

if(all(lessThanEqual(temp1,vec4(0.0))) && all(lessThanEqual(temp2,vec4(0.0))))result=true;

it was compiled. But, if I used this if clause for break or for changing i, I got again the message, that this shader is not supported by HW. I don't understand it. I also simplified the if clause to if(temp1.r<=0.0) and had no success.

Waiting for the amd answer... Do you know further tricks?

Best,
Andreas

bpurnomo · ‎10-12-2008

It does look like a bug. I played with several variations of the shader. Removing some mult() operations in the body of your control flow will also make the shader to compile.

Is there a specific reason that you want to run this in older hardware?

Andreas999 · ‎10-15-2008

Hi,

a x1600 at a laptop is not so old :-). I only want to make sure, the shader model 3.0 is supported and that a lot of people can execute it. Here is the program: http://www.lichtundliebe.info/projects/2DFractal/2DFractal.jnlp and here the video http://www.youtube.com/watch?v=1x8_gRtnB_s.

Best,
Andreas

splitline · ‎11-18-2008

I tried to run your Java program on my Mac os x 10.4 with intel graphics 945. Did not work. Only works with Ati radeon cards? I have another notebook with amd processor and ATI 1250 express chipset. Will this work? Does your shader work with this ati chipset, yet? Interested in seeing your neat program.

Also what does your shader exactly do to enhance your program. Thanks i'm kind of new to all this. Sorry can't read German.

Thanks

Rick

Andreas999 · ‎11-18-2008

Hi
it should work with all cards with the shader model 3.0. Your intel card doesn't seem to have this feature. With your ati x1250, it should work. But, the deeper zoom (5-6x click at the image) still does not work. I tested that with a friend's computer and an ati x1600. The problem appears if I write to multiple 128 bits float textures. But, with my card, all is fine. For the next time, I have no access to the friend's computer for further experiments.

If you have no deep zoom, mandelbrot and julia fractals can be calculated in real time. With deeper zoom, the program uses a fixed point arithmetic to increase the precision. That's the first posted shader. There, a number consists of 80 bits. These 80 bits are splitted into 8 float values. That means, each float value uses 10bits (2^10=1024) to display a part of the number. Each float value has a mantissa of 23 bits, where 10 bits a used for the number. The rest of the mantissa, I need for addition and multiplication overflow. The vec4 is a 4d vector consisting of 4 float values. That means, for a number, I need 2 vec4 values.

Best,
Andreas

Andreas999 · ‎11-18-2008

If you are interested, here is the code: http://www.lichtundliebe.info/projects/2DFractal.zip. It's written in GLS and Java using the jMonkeyEngine (http://www.jmonkeyengine.com).

splitline · ‎11-18-2008

Thanks for your reply. Hey I was wandering if you can direct me to some good tutorials on shaders for ATI and/or GForce.(Newbie here.lol) I have both. I've been looking at 2d fractals (made by pc) since my time at high school. Very interesting. Now I can bedazzle my friends unaware of fractals with your nice software. I have a Gforce with shader model 3.0. I will let you what happens, since I see you want it to run on as many different computers as possible. Also, why did you write the software in java? More portable? Have you ever thought about writing software for cellular automata. These cool little mathematical formulas produce some real nice eye candy as well.

Thanks.

Andreas999 · ‎11-18-2008

Hi,
yes, I wanted execute my program at the most popular OS(windows, mac os, linux). So, I keep it portable. With java, you can directly execute my application with java webstart. Here is also a nice fractal program: http://www.lichtundliebe.info/projects/3DFractal/3DFractal.jnlp written with the help of Keenan Crane: http://www.devmaster.net/forums/showthread.php?t=4448. Here is how to use it:

The hot keys work only, if the fractal image is focused with the mouse.

left mouse click: rotation of the fractal
ctrl+left mouse click: zoom
alt+left mouse click: movement of the fractal

Shift+left mouse click: rotation of the light relative to the eye's position
Shift+Ctrl+left mouse click: move the light from or to the rotation center
Shift+Alt+left mouse click: movement of the light source

l/L: cx
i/I: cy
j/J: cz
k/K: cw

"space": toggle animation on/off

+/-: iterations
e/E: Epsilon
s: shadow
m/M: maximum step size

The colors can be changed by Fractal->color settings

If you decrease epsilon or maximum step size too much, the shader could exceed the limit of 65536 instructions => crash. So, this program has to be modified. That's why I still did not commented my code as I did with the 2D fractal: http://www.lichtundliebe.info/projects/3DFractal.zip. With a shader model 4.0 card, that's not so critical.

It's my aim to program graphical applications for the web. The most PCs still don't have cell processors at the mainboard.

Use google with "glsl tutorial" or "gpgpu" and you will find some tutorials. I used a German tutorial 🙂 and learned from different sources. What do you want? If you want portability, use opengl and glsl. If windows only is ok for you, then directx with hlsl could be your way: http://http.developer.nvidia.com/GPUGems/gpugems_part01.html.
If you understood the basics, you also can port the code from hlsl to glsl.

Best,
Andreas

splitline · ‎11-19-2008

I tried both programs on my ati radeon 1250 neither worked correctly. I got a few java errors heres one com.jme.system.JmeException: Error in opengl:Invalid operation at ......... .checkCardError. This error happened on both systems mac and amd ati radeon express 1250. WIth the Julia Set 3d program I only saw a colored image that could change color with the mouse. This was with the ATI 1250. I have a gforce with pixel 3 capabilities. Would this help? What ATI graphics card do you have?

Andreas999 · ‎11-19-2008

Hi,

after my research, your ati express 1250 is based on the x700. At first, I confused express 1250 with x1250. The x700 does not support the shader model 3.0. Opengl includes a glsl compiler which compiles to assember for the gpu. If you compile a shader 3.0 program for a shader model 2.0 gpu, you willl probably get an error. If there is working something, then because it's emulated by the cpu. Better don't use onboard chips.
With a shader model 3.0 card, which errors did you get? Could you post the full message, please?

I have a hd 2600 xt. If you move the sliders in the julia3d program or click at the image and hit space, the image will change. At the program start, you will see a colored sphere. Because the normal vector is used for colorization, the colors will change, if you rotate it with the mouse.

Best,
Andreas

splitline · ‎11-20-2008

So X700 supports shader 2.0 ? Thanks for all the great links. When i'm finished with soccer season and school I'll be sure to go through the tutorials you sent me and ask you some more questions. lOl.. Do you know of any good game engines? I want to give my chess program more eye candy while keeping the processor working on AI. Do you know of any optimization?

Thanks.

Andreas999 · ‎11-20-2008

Yes, x700 supports the shader model 2.0, only. I only cared about game engines for java and decided for the jmonkeyengine(http://www.jmonkeyengine.com). Outside java, I shortly read about the irrlicht engine (http://irrlicht.sourceforge.net/). But, I have no deeper knowledge about it. I recommend, you start your own research to answer a lot of your questions by yourself. It's very important to become self-reliant.

Best,
Andreas

Andreas999 · ‎12-03-2008

Hi,
the cause, why my program did not work with x1000 cards, was, my shader had too many alu instructions. So, I wrote the results before the loop to two textures and read them out in a new shader with this loop.

Best,
Andreas

bpurnomo · ‎12-03-2008

Nice job! I'm glad you didn't give up on the problem.

Archives Discussions

Mandelbrot fragment shader at gpu shaderanalyzer