16 Replies Latest reply on Dec 3, 2008 12:04 PM by bpurnomo

    Mandelbrot fragment shader at gpu shaderanalyzer

    Andreas999

      Hi,

      I wrote a Mandelbrot shader with a fixed point arithmetic for a higher precision. Here is my code:

      varying vec2 position;
      //uniform sampler2D myColorTable;

      uniform float maxIter;
      uniform vec4 deltaVectorX1;
      uniform vec4 deltaVectorX2;
      uniform vec4 deltaVectorY1;
      uniform vec4 deltaVectorY2;
      uniform vec4 lowerLeftCornerX1;
      uniform vec4 lowerLeftCornerX2;
      uniform vec4 lowerLeftCornerY1;
      uniform vec4 lowerLeftCornerY2;

      //1/1024.0=0.0009765625
      // the float mantissa has only 23 bits => number->number1 should be enough
      void convertToBigSize(in float number,out vec4 number1, out vec4 number2){
          number1=vec4(0.0);
          number2=vec4(0.0);
          float signum=sign(number);
          number=abs(number);
          float temp=floor(number);
          number1.r=temp;
          number-=temp;
          number*=1024.0;
          temp=floor(number);
          number1.g=temp;
          number-=temp;
          number*=1024.0;
          temp=floor(number);
          number1.b=temp;
          number-=temp;
          number*=1024.0;
          temp=floor(number);
          number1.a=temp;
          number1*=signum;
      }

      //41 Ops
      void add(inout vec4 a1, inout vec4 a2, in vec4 b1, in vec4 b2){
          a1+=b1;
          a2+=b2;
          //the values after the point have to be greater than 0.0
          a1.r-=2.0;//-2
          a2.a+=2048.0;
          a1.gba+=2046.0;
          a2.rgb+=2046.0;
             
          float temp=floor(a2.a*0.0009765625);
          a2.b+=temp;
          a2.a-=temp*1024.0;
             
          temp=floor(a2.b*0.0009765625);
          a2.g+=temp;
          a2.b-=temp*1024.0;
         
          temp=floor(a2.g*0.0009765625);
          a2.r+=temp;
          a2.g-=temp*1024.0;
         
          temp=floor(a2.r*0.0009765625);
          a1.a+=temp;
          a2.r-=temp*1024.0;
         
          temp=floor(a1.a*0.0009765625);
          a1.b+=temp;
          a1.a-=temp*1024.0;
         
          temp=floor(a1.b*0.0009765625);
          a1.g+=temp;
          a1.b-=temp*1024.0;
         
          temp=floor(a1.g*0.0009765625);
          a1.r+=temp;
          a1.g-=temp*1024.0;
         
      }
      //44+30+63=137ops
      void mult(inout vec4 a1, inout vec4 a2, in vec4 b1, in vec4 b2){
          vec4 z11=a1*b1.r;
          vec4 z12=a2*b1.r;
          vec4 z21=a1*b1.g;
          vec4 z22=a2*b1.g;
          vec4 z31=a1*b1.b;
          vec4 z32=a2*b1.b;
          vec4 z41=a1*b1.a;
          vec4 z42=a2*b1.a;
          vec4 z51=a1*b2.r;
          vec4 z52=a2*b2.r;
          vec4 z61=a1*b2.g;
          vec4 z62=a2*b2.g;
          vec4 z71=a1*b2.b;
          vec4 z72=a2*b2.b;
          vec4 z81=a1*b2.a;
          vec4 z82=a2*b2.a;
          vec3 temp2=z82.gba;
          temp2.rg+=z72.ba;
          temp2.r+=z62.a;
          vec4 temp1=z52;
          temp1.a+=z82.r;
          temp1.rgb+=z81.gba;
          temp1.ba+=z72.rg;
          temp1.rg+=z71.ba;
          temp1.gba+=z62.rgb;
          temp1.r+=z61.a;
          temp1.rgb+=z42.gba;
          temp1.rg+=z32.ba;
          temp1.r+=z22.a;
          a2=z51;
          a2.a+=z81.r;
          a2.ba+=z71.rg;
          a2.gba+=z61.rgb;
          a2.a+=z42.r;
          a2.rgb+=z41.gba;
          a2.ba+=z32.rg;
          a2.rg+=z31.ba;
          a2.gba+=z22.rgb;
          a2.r+=z21.a;
          a2+=z12;
          a1=z11;
          a1.gba+=z21.rgb;
          a1.ba+=z31.rg;
          a1.a+=z41.r;
         
          float tempSign=sign(temp2.g);   
          float temp=floor(abs(temp2.g)*0.0009765625);
          temp2.r+=temp*tempSign;
          tempSign=sign(temp2.r);
          temp=floor(abs(temp2.r)*0.0009765625);
          temp1.a+=temp*tempSign;
          tempSign=sign(temp1.a);
          temp=floor(abs(temp1.a)*0.0009765625);
          temp1.b+=temp*tempSign;
          tempSign=sign(temp1.b);
          temp=floor(abs(temp1.b)*0.0009765625);
          temp1.g+=temp*tempSign;
          tempSign=sign(temp1.g);
          temp=floor(abs(temp1.g)*0.0009765625);
          temp1.r+=temp*tempSign;
          tempSign=sign(temp1.r);
          temp=floor(abs(temp1.r)*0.0009765625);
          a2.a+=temp*tempSign;
         
          tempSign=sign(a2.a);
          temp=floor(abs(a2.a)*0.0009765625);
          a2.b+=temp*tempSign;
          a2.a-=temp*1024.0*tempSign;
             
          tempSign=sign(a2.b);   
          temp=floor(abs(a2.b)*0.0009765625);
          a2.g+=temp*tempSign;
          a2.b-=temp*1024.0*tempSign;
         
          tempSign=sign(a2.g);   
          temp=floor(abs(a2.g)*0.0009765625);
          a2.r+=temp*tempSign;
          a2.g-=temp*1024.0*tempSign;
         
          tempSign=sign(a2.r);   
          temp=floor(abs(a2.r)*0.0009765625);
          a1.a+=temp*tempSign;
          a2.r-=temp*1024.0*tempSign;
         
          tempSign=sign(a1.a);   
          temp=floor(abs(a1.a)*0.0009765625);
          a1.b+=temp*tempSign;
          a1.a-=temp*1024.0*tempSign;
         
          tempSign=sign(a1.b);   
          temp=floor(abs(a1.b)*0.0009765625);
          a1.g+=temp*tempSign;
          a1.b-=temp*1024.0*tempSign;
         
          tempSign=sign(a1.g);   
          temp=floor(abs(a1.g)*0.0009765625);
          a1.r+=temp*tempSign;
          a1.g-=temp*1024.0*tempSign;
             
      }

      /*//only for test purposes
      float convertToFloat(in vec4 X1, in vec4 X2){
          float number=X1.r;
          float divi=1024.0;
          number+=X1.g/divi;
          divi*=1024.0;
          number+=X1.b/divi;
          divi*=1024.0;
          number+=X1.a/divi;
          divi*=1024.0;
          number+=X2.r/divi;
          divi*=1024.0;
          number+=X2.g/divi;
          divi*=1024.0;
          number+=X2.b/divi;
          divi*=1024.0;
          number+=X2.a/divi;
          return number;
      }*/

      //a=a+b-4, 41 Ops
      void add2(inout vec4 a1, inout vec4 a2, in vec4 b1, in vec4 b2){
          a1+=b1;
          a2+=b2;
          //the values after the point have to be smaller than 0.0
          a1.r-=2.0;//-4+2
          a2.a-=2048.0;
          a1.gba-=2046.0;
          a2.rgb-=2046.0;
         
          float temp=ceil(a2.a*0.0009765625);
          a2.b+=temp;
          a2.a-=temp*1024.0;
             
          temp=ceil(a2.b*0.0009765625);
          a2.g+=temp;
          a2.b-=temp*1024.0;
         
          temp=ceil(a2.g*0.0009765625);
          a2.r+=temp;
          a2.g-=temp*1024.0;
         
          temp=ceil(a2.r*0.0009765625);
          a1.a+=temp;
          a2.r-=temp*1024.0;
         
          temp=ceil(a1.a*0.0009765625);
          a1.b+=temp;
          a1.a-=temp*1024.0;
         
          temp=ceil(a1.b*0.0009765625);
          a1.g+=temp;
          a1.b-=temp*1024.0;
         
          temp=ceil(a1.g*0.0009765625);
          a1.r+=temp;
          a1.g-=temp*1024.0;
         
          //a1.r=floor(a1.r);
          //a1.g=floor(a1.g);
      }





      void main ()
      {
          //position=lowerLeftCorner+(gl_Position.xy+vec2(1.0,1.0))*deltaVector;
          vec4 tempPositionX1=vec4(0.0);
          vec4 tempPositionX2=vec4(0.0);
          vec4 tempPositionY1=vec4(0.0);
          vec4 tempPositionY2=vec4(0.0);
         
          vec2 tmpPosition=position+1.0;
          convertToBigSize(tmpPosition.r,tempPositionX1, tempPositionX2);
          convertToBigSize(tmpPosition.g,tempPositionY1, tempPositionY2);
         
          mult(tempPositionX1,tempPositionX2, deltaVectorX1, deltaVectorX2);
          mult(tempPositionY1,tempPositionY2, deltaVectorY1, deltaVectorY2);
          vec4 positionX1=lowerLeftCornerX1;
          vec4 positionX2=lowerLeftCornerX2;
          vec4 positionY1=lowerLeftCornerY1;
          vec4 positionY2=lowerLeftCornerY2;
          add(positionX1, positionX2, tempPositionX1, tempPositionX2);
          add(positionY1, positionY2, tempPositionY1, tempPositionY2);
          //z=position
          vec4 zX1=positionX1;
          vec4 zX2=positionX2;
          vec4 zY1=positionY1;
          vec4 zY2=positionY2;
          gl_FragColor = vec4(1.0,1.0,1.0,1.0);
         
         
          //vec2 tempZ=z*z;
         
          vec4 tempZX1=zX1;
          vec4 tempZX2=zX2;
          vec4 tempZY1=zY1;
          vec4 tempZY2=zY2;
          mult(tempZX1, tempZX2, zX1, zX2);
          mult(tempZY1, tempZY2, zY1, zY2);
          vec4 temp1=tempZX1;
          vec4 temp2=tempZX2;
         
          //tempZ.x+tempZ.y
          add2(temp1,temp2,tempZY1, tempZY2);
         
          float i=float(0.0);
          //(tempZ.x+tempZ.y)<=4 equivalent to tempZ.x+tempZ.y-4<=0.0
          while (i<=maxIter && all(lessThanEqual(temp1,vec4(0.0))) && all(lessThanEqual(temp2,vec4(0.0))))
          {
              //z = vec2(tempZ.x - tempZ.y, 2.0*z.x*z.y) + position;
             
              add(tempZX1, tempZX2, -tempZY1, -tempZY2);
             
              mult(zY1, zY2, zX1, zX2);
              add(zY1, zY2, zY1, zY2);
              zX1=tempZX1;
              zX2=tempZX2;
              add(zX1, zX2, positionX1, positionX2);
              add(zY1, zY2, positionY1, positionY2);
         
              //tempZ=z*z;
             
              tempZX1=zX1;
              tempZX2=zX2;
              tempZY1=zY1;
              tempZY2=zY2;
              mult(tempZX1, tempZX2, zX1, zX2);
              mult(tempZY1, tempZY2, zY1, zY2);
             
              //tempZ.x+tempZ.y
             
              temp1=tempZX1;
              temp2=tempZX2;
              add2(temp1,temp2, tempZY1, tempZY2);
             
              i+=1.0;   
             
          }
          if (i <= maxIter)
              {
                  float color=i/maxIter;
                  gl_FragColor=vec4(color,color,1.0,1.0);   
                     
              }
             
      }


      At my HD 2600, it's working fine. But, looking at the gpu shaderanalyzer this program is not compiled for x1x00 cards. They support the shader model 3.0 and I don't understand, why this is not working.

      Would be nice, if you could help.

      Best,
      Andreas

        • Mandelbrot fragment shader at gpu shaderanalyzer
          bpurnomo

          It seems that the problem is with the loop condition

          (i<=maxIter && all(lessThanEqual(temp1,vec4(0.0))) && all(lessThanEqual(temp2,vec4(0.0))))

          If you change the function add2() which modifies temp1 and temp2 to be returning constant then the shader will compile for the older hardware.  That is not the fix though since the function will not do what you intend to do.   It could be a driver bug for the old hardware or anything.

           

           

           

            • Mandelbrot fragment shader at gpu shaderanalyzer
              Andreas999

              Hi,

              thx for your answer. Yes, I got that ,too. But using a simpler version of my program like this:

              varying vec2 position;
              //uniform sampler2D myColorTable;
              uniform float maxIter;
              void main ()
              {
                  //gl_FragColor=vec4(1.0,1.0,0.0,1.0);
                  vec2 z = position;
                  gl_FragColor = vec4(1.0,1.0,1.0,1.0);
                  //if(c.x<0)gl_FragColor = vec4(0.0,1.0,0.0,0.0);
                 
                  vec2 tempZ=z*z;
                  float i=0.0;
                  while (i<=maxIter && (tempZ.x+tempZ.y)<=4.0)
                  {
                     
                      z = vec2(tempZ.x - tempZ.y, 2.0*z.x*z.y) + position;
                      tempZ=z*z;
                      i+=1.0;   
                     
                  }
                  if (i <= maxIter)
                      {
                          float color=i/maxIter;
                          gl_FragColor=vec4(color,color,1.0,1.0);       
                      }
                     
                     
              }

              all is fine. Then I commented some parts of my loop out and it was accepted by the shader analyzer, even if temp1 and temp2 was changed by the multiplication. I believe, it's a software (=driver) and not a hardware problem. What do you think? I will send that to amd and hope they will update the driver.

              Best,
              Andreas

                • Mandelbrot fragment shader at gpu shaderanalyzer
                  Andreas999

                  Hi,

                  at my first shader, if I used while(i<=maxIter) only and in this loop

                  bool result=false;

                  if(all(lessThanEqual(temp1,vec4(0.0))) && all(lessThanEqual(temp2,vec4(0.0))))result=true;

                  it was compiled. But, if I used this if clause for break or for changing i, I got again the message, that this shader is not supported by HW. I don't understand it. I also simplified the if clause to if(temp1.r<=0.0) and had no success.

                  Waiting for the amd answer... Do you know further tricks?

                  Best,
                  Andreas

                    • Mandelbrot fragment shader at gpu shaderanalyzer
                      bpurnomo

                      It does look like a bug.  I played with several variations of the shader.  Removing some mult() operations in the body of your control flow will also make the shader to compile.

                      Is there a specific reason that you want to run this in older hardware?

                       

                        • Mandelbrot fragment shader at gpu shaderanalyzer
                          Andreas999

                          Hi,

                          a x1600 at a laptop is not so old :-). I only want to make sure, the shader model 3.0 is supported and that a lot of people can execute it. Here is the program: http://www.lichtundliebe.info/projects/2DFractal/2DFractal.jnlp and here the video http://www.youtube.com/watch?v=1x8_gRtnB_s.

                          Best,
                          Andreas

                            • Mandelbrot fragment shader at gpu shaderanalyzer
                              splitline

                              I tried to run your Java program on my Mac os x 10.4 with intel graphics 945. Did not work. Only works with Ati radeon cards?  I have another notebook with amd processor and ATI 1250 express chipset. Will this work?  Does your shader work with this ati chipset, yet?  Interested in seeing your neat program.  

                              Also what does your shader exactly do to enhance your program.  Thanks i'm kind of new to all this.  Sorry can't read German.  

                              Thanks

                              Rick

                               

                                • Mandelbrot fragment shader at gpu shaderanalyzer
                                  Andreas999

                                  Hi
                                  it should work with all cards with the shader model 3.0. Your intel card doesn't seem to have this feature. With your ati x1250, it should work. But, the deeper zoom (5-6x click at the image) still does not work. I tested that with a friend's computer and an ati x1600. The problem appears if I write to multiple 128 bits float textures. But, with my card, all is fine. For the next time, I have no access to the friend's computer for further experiments.

                                  If you have no deep zoom, mandelbrot and julia fractals can be calculated in real time. With deeper zoom, the program uses a fixed point arithmetic to increase the precision. That's the first posted shader. There, a number consists of 80 bits. These 80 bits are splitted into 8 float values. That means, each float value uses 10bits (2^10=1024) to display a part of the number. Each float value has a mantissa of 23 bits, where 10 bits a used for the number. The rest of the mantissa, I need for addition and multiplication overflow. The vec4 is a 4d vector consisting of 4 float values. That means, for a number, I need 2 vec4 values.

                                  Best,
                                  Andreas

                                    • Mandelbrot fragment shader at gpu shaderanalyzer
                                      Andreas999

                                      If you are interested, here is the code: http://www.lichtundliebe.info/projects/2DFractal.zip. It's written in GLS and Java using the jMonkeyEngine (http://www.jmonkeyengine.com).

                                        • Mandelbrot fragment shader at gpu shaderanalyzer
                                          splitline

                                          Thanks for your reply.  Hey I was wandering if you can direct me to some good tutorials on shaders for ATI and/or GForce.(Newbie here.lol) I have both. I've been looking at 2d fractals (made by pc) since my time at high school.  Very interesting.  Now I can bedazzle my friends unaware of fractals with your nice software.  I have a Gforce with shader model 3.0.  I will let you what happens, since I see you want it to run on as many different computers as possible.  Also, why did you write the software in java?  More portable?   Have you ever thought about writing software for cellular automata. These cool little mathematical formulas produce some real nice eye candy as well.  

                                          Thanks. 

                                            • Mandelbrot fragment shader at gpu shaderanalyzer
                                              Andreas999

                                              Hi,
                                              yes, I wanted execute my program at the most popular OS(windows, mac os, linux). So, I keep it portable. With java, you can directly execute my application with java webstart. Here is also a nice fractal program: http://www.lichtundliebe.info/projects/3DFractal/3DFractal.jnlp written with the help of Keenan Crane: http://www.devmaster.net/forums/showthread.php?t=4448. Here is how to use it:

                                              The hot keys work only, if the fractal image is focused with the mouse.

                                              left mouse click: rotation of the fractal
                                              ctrl+left mouse click: zoom
                                              alt+left mouse click: movement of the fractal

                                              Shift+left mouse click: rotation of the light relative to the eye's position
                                              Shift+Ctrl+left mouse click: move the light from or to the rotation center
                                              Shift+Alt+left mouse click: movement of the light source

                                              l/L: cx
                                              i/I: cy
                                              j/J: cz
                                              k/K: cw

                                              "space": toggle animation on/off

                                              +/-: iterations
                                              e/E: Epsilon
                                              s: shadow
                                              m/M: maximum step size

                                              The colors can be changed by Fractal->color settings

                                              If you decrease epsilon or maximum step size too much, the shader could exceed the limit of 65536 instructions => crash. So, this program has to be modified. That's why I still did not commented my code as I did with the 2D fractal: http://www.lichtundliebe.info/projects/3DFractal.zip. With a shader model 4.0 card, that's not so critical.

                                              It's my aim to program graphical applications for the web. The most PCs still don't have cell processors at the mainboard.

                                              Use google with "glsl tutorial" or "gpgpu" and you will find some tutorials. I used a German tutorial :-) and learned from different sources. What do you want? If you want portability, use opengl and glsl. If windows only is ok for you, then directx with hlsl could be your way: http://http.developer.nvidia.com/GPUGems/gpugems_part01.html.
                                              If you understood the basics, you also can port the code from hlsl to glsl.

                                              Best,
                                              Andreas

                                               

                                                • Mandelbrot fragment shader at gpu shaderanalyzer
                                                  splitline

                                                  I tried both programs on my ati radeon 1250 neither worked correctly.  I got a few java errors heres one com.jme.system.JmeException: Error in opengl:Invalid operation at ......... .checkCardError.  This error happened on both systems mac and amd ati radeon express 1250.  WIth the Julia Set 3d program I only saw a  colored image that could change color with the mouse.  This was with the ATI 1250.  I have a gforce with pixel 3 capabilities. Would this help? What ATI graphics card do you have?