wtorek, 9 maja 2017

quicker version of fourier transform radix-4 without using inside object like complex numbers only tables

  //source:
    //https://www.google.ch/patents/US6957241
    //http://www.google.ch/patents/US20020083107
    //https://www.beechwood.eu/fft-implementation-r2-dit-r4-dif-r8-dif/
    //http://www.cmlab.csie.ntu.edu.tw/cml/dsp/training/coding/transform/fft.html
    //http://dsp.stackexchange.com/questions/3481/radix-4-fft-implementation

    //https://community.arm.com/graphics/b/blog/posts/speeding-up-fast-fourier-transform-mixed-radix-on-mobile-arm-mali-gpu-by-means-of-opencl---part-1
    //book: "Cyfrowe przetwarzanie sygnalow" - Tomasz Zielinski it has quick version of radix-2 because it calculates less sin() and cos()




    //author marcin matysek (r)ewertyn.PL
    //quicker version of fourier transform radix-4 without using inside object like complex numbers only tables
    //inverse fast fourier transform iFFT radix-4 combinations witch shift for N= points algorithm c++ source code implementation
    //szybka odwrotna transformacja fouriera iFFT radix-4 z przesunieciem dla N= punktów algorytm c++ kod zródlowy implementacja

   void fun_fourier_transform_FFT_radix_4(int N,std::complex<double> tab[],std::complex<double> w10,std::complex<double> w11,std::complex<double> w20,std::complex<double> w21)
{
    //double *(tab)[1]={};
    //tab[0]=&tab2[0].real();
    const double pi=3.141592653589793238462;
    //std::complex<double>  w111[1]={{1,0}};
    //std::complex<double>  w2[1]={{1,0}};
    //std::complex<double>  w3[1]={{1,0}};
    //std::complex<double>  w4[1]={{1,0}};
    //std::complex<double>  w5[1]={{1,0}};

    double w1[4][2]={};
    double tab1[4][2]={};
    double tmp1[4][2]={};
    double tmp2[4][2]={};
    double tmp3[4][2]={};


    //std::complex<double> tmp01,tmp02,tmp03,tmp04,tmp6,tmp7,tmp8,tmp9;
    //std::complex<double> tmp10,tmp20,tmp30,tmp40;
    int nr1=0,nr2=0,nr3=0,nr4=0,nr5=0;
    int nr10=0,nr20=0,nr31=0,nr32=0,nr33=0;
    int nb_stages=0,rx4=0;
    double tmp5,tmp100,tmp200;
    int increment=0;
    int nn=0;

    //w5[0].real()=0;
    //w5[0].imag()=1;//???
    tab1[0][0]=w10.real();
    tab1[0][1]=w10.imag();
    tab1[1][0]=w11.real();
    tab1[1][1]=w11.imag();
    tab1[2][0]=w20.real();
    tab1[2][1]=w20.imag();
    tab1[3][0]=w21.real();
    tab1[3][1]=w21.imag();


   // cout<<tab1[2][0]<<" "<<tab1[2][1]<<endl;
  //  cout<<tab1[3][0]<<" "<<tab1[3][1]<<endl;system("pause");
  /*
    int M=2;
    int **tab9 = new int *[N];//alokacja pamieci
    for ( int i = 0; i < N; ++i )
    {
        tab9[i] = new int [M]; //alokacja pamieci

    }
*/
    double fi2=0;//always 0

    tmp5=2*pi/(N/1);
    rx4=4;//radix-4
    nn=N;
    for(int i=0;i<100;i++)
    {

        nn=(float)nn/(float)rx4;
         if(nn>=1.0)
         {
         nb_stages++;
         }
    }
//stage 1
          w1[0][0]=cos(0*tmp5+fi2);
          w1[0][1]=sin(0*tmp5+fi2);
          w1[1][0]=cos(0*tmp5+fi2);
          w1[1][1]=sin(0*tmp5+fi2);
          w1[2][0]=cos(0*tmp5+fi2);
          w1[2][1]=sin(0*tmp5+fi2);
          w1[3][0]=cos(0*tmp5+fi2);
          w1[3][1]=sin(0*tmp5+fi2);
        for(int i=0;i<(N/rx4);i++)
        {
            tmp1[0][0]=w1[0][0]*tab[i].real()-w1[0][1]*tab[i].imag();
            tmp1[0][1]=w1[0][0]*tab[i].imag()+w1[0][1]*tab[i].real();
            tmp1[1][0]=w1[1][0]*tab[i+N/4].real()-w1[1][1]*tab[i+N/4].imag();
            tmp1[1][1]=w1[1][0]*tab[i+N/4].imag()+w1[1][1]*tab[i+N/4].real();
            tmp1[2][0]=w1[2][0]*tab[i+N/2].real()-w1[2][1]*tab[i+N/2].imag();
            tmp1[2][1]=w1[2][0]*tab[i+N/2].imag()+w1[2][1]*tab[i+N/2].real();
            tmp1[3][0]=w1[3][0]*tab[i+3*N/4].real()-w1[3][1]*tab[i+3*N/4].imag();
            tmp1[3][1]=w1[3][0]*tab[i+3*N/4].imag()+w1[3][1]*tab[i+3*N/4].real();

            tmp2[0][0]=tab1[0][0]*(tmp1[0][0]-tmp1[2][0])-tab1[0][1]*(tmp1[0][1]-tmp1[2][1]);
            tmp2[0][1]=tab1[0][0]*(tmp1[0][1]-tmp1[2][1])+tab1[0][1]*(tmp1[0][0]-tmp1[2][0]);
            tmp2[1][0]=tab1[1][0]*(tmp1[1][0]-tmp1[3][0])-tab1[1][1]*(tmp1[1][1]-tmp1[3][1]);
            tmp2[1][1]=tab1[1][0]*(tmp1[1][1]-tmp1[3][1])+tab1[1][1]*(tmp1[1][0]-tmp1[3][0]);
            tmp2[2][0]=tmp1[0][0]+tmp1[2][0];
            tmp2[2][1]=tmp1[0][1]+tmp1[2][1];
            tmp2[3][0]=tmp1[1][0]+tmp1[3][0];
            tmp2[3][1]=tmp1[1][1]+tmp1[3][1];

            //radix-4
            tmp3[0][0]=tab1[0][0]*(tmp2[2][0]+tmp2[3][0])-tab1[0][1]*(tmp2[2][1]+tmp2[3][1]);
            tmp3[0][1]=tab1[0][0]*(tmp2[2][1]+tmp2[3][1])+tab1[0][1]*(tmp2[2][0]+tmp2[3][0]);
            tmp3[1][0]=tmp2[0][0]+tmp2[1][0];
            tmp3[1][1]=tmp2[0][1]+tmp2[1][1];
            tmp3[2][0]=tab1[0][0]*(tmp2[2][0]-tmp2[3][0])-tab1[0][1]*(tmp2[2][1]-tmp2[3][1]);
            tmp3[2][1]=tab1[0][0]*(tmp2[2][1]-tmp2[3][1])+tab1[0][1]*(tmp2[2][0]-tmp2[3][0]);
            tmp3[3][0]=tmp2[0][0]-tmp2[1][0];
            tmp3[3][1]=tmp2[0][1]-tmp2[1][1];

            tab[i].real()=tmp3[0][0];
            tab[i].imag()=tmp3[0][1];
            tab[i+N/4].real()=tmp3[1][0];
            tab[i+N/4].imag()=tmp3[1][1];
            tab[i+N/2].real()=tmp3[2][0];
            tab[i+N/2].imag()=tmp3[2][1];
            tab[i+3*N/4].real()=tmp3[3][0];
            tab[i+3*N/4].imag()=tmp3[3][1];

            /*
          tmp01=w111[0]*tab[i+0];
          tmp02=w2[0]*tab[i+N/4];
          tmp03=w3[0]*tab[i+N/2];
          tmp04=w4[0]*tab[i+3*N/4];
            tmp6=w10*(tmp01-tmp03);
            tmp7=w11*(tmp02-tmp04);
            tmp8=tmp01+tmp03;
            tmp9=tmp02+tmp04;
            //radix-4
           tmp10=(w10*(tmp8+tmp9));
           tmp20=(tmp6+tmp7);
           tmp30=(w10*(tmp8-tmp9));
           tmp40=(tmp6-tmp7);

          tab[i]       =tmp10;
          tab[i+N/4]   =tmp20;
          tab[i+N/2]   =tmp30;
          tab[i+3*N/4] =tmp40;
          */
        }
///////////////////////////////////////////


    increment=0;
    int flag2=0;
    for(int stg=1;stg<nb_stages;stg++)
    {
          //  cout<<"..if(flag2%2==0).."<<endl;
          //  system("pause");
         nr1=N/pow(rx4,0+increment);
        nr2=N/pow(rx4,1+increment);
        nr3=N/pow(rx4,2+increment);
        nr4=pow(rx4,(nb_stages-2-increment));
        nr5=pow(rx4,0+increment);
        nr31=1*nr3;
        nr32=2*nr3;
        nr33=3*nr3;
        for(int j=0;j<nr4;j++)
        {
            for(int i=0;i<rx4;i++)
            {
                nr20=nr2*i;
                tmp100=nr5*i*(j+nr4);
                tmp200=nr5*i*j;

                w1[0][0]=cos(tmp200*tmp5+fi2);
                w1[0][1]=-sin(tmp200*tmp5+fi2);
                w1[1][0]=cos(tmp100*tmp5+fi2);
                w1[1][1]=-sin(tmp100*tmp5+fi2);
                w1[2][0]=cos((2*tmp100-  tmp200)*tmp5+fi2);
                w1[2][1]=-sin((2*tmp100-  tmp200)*tmp5+fi2);
                w1[3][0]=cos((3*tmp100-2*tmp200)*tmp5+fi2);
                w1[3][1]=-sin((3*tmp100-2*tmp200)*tmp5+fi2);
                for(int m=0;m<nr5;m++)
                {
                    nr10=nr1*m;

                    tmp1[0][0]=w1[0][0]*tab[     nr20+nr10+j].real()-w1[0][1]*tab[     nr20+nr10+j].imag();
                    tmp1[0][1]=w1[0][0]*tab[     nr20+nr10+j].imag()+w1[0][1]*tab[     nr20+nr10+j].real();
                    tmp1[1][0]=w1[1][0]*tab[nr31+nr20+nr10+j].real()-w1[1][1]*tab[nr31+nr20+nr10+j].imag();
                    tmp1[1][1]=w1[1][0]*tab[nr31+nr20+nr10+j].imag()+w1[1][1]*tab[nr31+nr20+nr10+j].real();
                    tmp1[2][0]=w1[2][0]*tab[nr32+nr20+nr10+j].real()-w1[2][1]*tab[nr32+nr20+nr10+j].imag();
                    tmp1[2][1]=w1[2][0]*tab[nr32+nr20+nr10+j].imag()+w1[2][1]*tab[nr32+nr20+nr10+j].real();
                    tmp1[3][0]=w1[3][0]*tab[nr33+nr20+nr10+j].real()-w1[3][1]*tab[nr33+nr20+nr10+j].imag();
                    tmp1[3][1]=w1[3][0]*tab[nr33+nr20+nr10+j].imag()+w1[3][1]*tab[nr33+nr20+nr10+j].real();

                    //tab1[2][0]=1
                    //tab1[2][1]=0
                    //tmp2[0][0]=tab1[2][0]*(tmp1[0][0]-tmp1[2][0])-tab1[2][1]*(tmp1[0][1]-tmp1[2][1]);
                    //tmp2[0][1]=tab1[2][0]*(tmp1[0][1]-tmp1[2][1])+tab1[2][1]*(tmp1[0][0]-tmp1[2][0]);
                    tmp2[0][0]=tmp1[0][0]-tmp1[2][0];
                    tmp2[0][1]=tmp1[0][1]-tmp1[2][1];
                    tmp2[1][0]=tab1[3][0]*(tmp1[1][0]-tmp1[3][0])-tab1[3][1]*(tmp1[1][1]-tmp1[3][1]);
                    tmp2[1][1]=tab1[3][0]*(tmp1[1][1]-tmp1[3][1])+tab1[3][1]*(tmp1[1][0]-tmp1[3][0]);
                    tmp2[2][0]=tmp1[0][0]+tmp1[2][0];
                    tmp2[2][1]=tmp1[0][1]+tmp1[2][1];
                    tmp2[3][0]=tmp1[1][0]+tmp1[3][0];
                    tmp2[3][1]=tmp1[1][1]+tmp1[3][1];

                    //radix-4
                    //tab1[2][0]=1
                    //tab1[2][1]=0
                    //tmp3[0][0]=tab1[2][0]*(tmp2[2][0]+tmp2[3][0])-tab1[2][1]*(tmp2[2][1]+tmp2[3][1]);
                    //tmp3[0][1]=tab1[2][0]*(tmp2[2][1]+tmp2[3][1])+tab1[2][1]*(tmp2[2][0]+tmp2[3][0]);
                    tmp3[0][0]=tmp2[2][0]+tmp2[3][0];
                    tmp3[0][1]=tmp2[2][1]+tmp2[3][1];
                    tmp3[1][0]=tmp2[0][0]+tmp2[1][0];
                    tmp3[1][1]=tmp2[0][1]+tmp2[1][1];
                    //tmp3[2][0]=tab1[2][0]*(tmp2[2][0]-tmp2[3][0])-tab1[2][1]*(tmp2[2][1]-tmp2[3][1]);
                    //tmp3[2][1]=tab1[2][0]*(tmp2[2][1]-tmp2[3][1])+tab1[2][1]*(tmp2[2][0]-tmp2[3][0]);
                    tmp3[2][0]=tmp2[2][0]-tmp2[3][0];
                    tmp3[2][1]=tmp2[2][1]-tmp2[3][1];
                    tmp3[3][0]=tmp2[0][0]-tmp2[1][0];
                    tmp3[3][1]=tmp2[0][1]-tmp2[1][1];

                    tab[     nr20+nr10+j].real()=tmp3[0][0];
                    tab[     nr20+nr10+j].imag()=tmp3[0][1];
                    tab[nr31+nr20+nr10+j].real()=tmp3[1][0];
                    tab[nr31+nr20+nr10+j].imag()=tmp3[1][1];
                    tab[nr32+nr20+nr10+j].real()=tmp3[2][0];
                    tab[nr32+nr20+nr10+j].imag()=tmp3[2][1];
                    tab[nr33+nr20+nr10+j].real()=tmp3[3][0];
                    tab[nr33+nr20+nr10+j].imag()=tmp3[3][1];


                    //complex numbers version:
                   /*
                    tmp01=w111[0]*tab[     nr20+nr10+j];
                    tmp02=w2[0]*tab[nr31+nr20+nr10+j];
                    tmp03=w3[0]*tab[nr32+nr20+nr10+j];
                    tmp04=w4[0]*tab[nr33+nr20+nr10+j];

                    tmp6=w20*(tmp01-tmp03);
                    tmp7=w21*(tmp02-tmp04);
                    tmp8=tmp01+tmp03;
                    tmp9=tmp02+tmp04;
                   //radix-4
                   tmp10=(w20*(tmp8+tmp9));
                   tmp20=(tmp6+tmp7);
                   tmp30=(w20*(tmp8-tmp9));
                   tmp40=(tmp6-tmp7);

                    tab[     nr20+nr10+j]=tmp10;
                    tab[nr31+nr20+nr10+j]=tmp20;
                    tab[nr32+nr20+nr10+j]=tmp30;
                    tab[nr33+nr20+nr10+j]=tmp40;
                    */
                }
            }
        }
        increment++;
    }
    /*
    //zniszcz tablice 2D
    for ( int i(0); i < N; ++i )
    {
        delete [] tab9[i]; //uwolnienie pamieci
    }
    delete [] tab9; //uwolnienie pamieci
    tab9 = NULL;
    */
    ////////////////////////////////////////////////////
    for(int j=0;j<N;j++)
    {
      tab[j].real() =tab[j].real()*2/N;
      tab[j].imag() =tab[j].imag()*2/N;
    }

}
////////////////////////////////

void fun_radix_4_basis_for_stage1(std::complex<double> &w0,std::complex<double> &w1)
{
    const double pi=3.141592653589793238462;
    //std::complex<double>    w0,w1,w2,w3,w4,w6,w9;
   
    double tmp5=2*pi/4;
   
   
    double fi2=fi;
   
   
    w0.real()=round(cos(0*tmp5+fi2)*10000000)/10000000;
    w0.imag()=round(-sin(0*tmp5+fi2)*10000000)/10000000;
    w1.real()=round(cos(1*tmp5+fi2)*10000000)/10000000;
    w1.imag()=round(-sin(1*tmp5+fi2)*10000000)/10000000;
    w0=w0;
    w1=w1;
    w0.real()=w0.real();
    w0.imag()=w0.imag();
    w1.real()=w1.real();
    w1.imag()=w1.imag();

    //w2.real()=round(cos(2*tmp5)*1000)/1000;//==-w0
    //w2.imag()=round(-sin(2*tmp5)*1000)/1000;//==-w0
    //w3.real()=round(cos(3*tmp5)*1000)/1000;//==-w1
    //w3.imag()=round(-sin(3*tmp5)*1000)/1000;//==-w1
    //w4.real()=round(cos(4*tmp5)*1000)/1000;//==+w0
    //w4.imag()=round(-sin(4*tmp5)*1000)/1000;//==+w0
    //w6.real()=round(cos(6*tmp5)*1000)/1000;//==-w0
    //w6.imag()=round(-sin(6*tmp5)*1000)/1000;//==-w0
    //w9.real()=round(cos(9*tmp5)*1000)/1000;//==+w1
    //w9.imag()=round(-sin(9*tmp5)*1000)/1000;//==+w1

   //cout<<" "<<w0<<" "<<w0<<" "<<w0<<" "<<w0<<endl;
   //cout<<" "<<w0<<" "<<w1<<" "<<w2<<" "<<w3<<endl;
   //cout<<" "<<w0<<" "<<w2<<" "<<w4<<" "<<w6<<endl;
   //cout<<" "<<w0<<" "<<w3<<" "<<w6<<" "<<w9<<endl;
   //system("pause");
}
//////////////////
void fun_radix_4_basis_for_stage_rest(std::complex<double> &w0,std::complex<double> &w1)
{
    const double pi=3.141592653589793238462;
    //std::complex<double>    w0,w1,w2,w3,w4,w6,w9;
   
    double tmp5=2*pi/4;
   
    double fi2=0;//always 0
   
    w0.real()=round(cos(0*tmp5+fi2)*10000000)/10000000;
    w0.imag()=round(-sin(0*tmp5+fi2)*10000000)/10000000;
    w1.real()=round(cos(1*tmp5+fi2)*10000000)/10000000;
    w1.imag()=round(-sin(1*tmp5+fi2)*10000000)/10000000;
    w0=w0;
    w1=w1;
    w0.real()=w0.real();
    w0.imag()=w0.imag();
    w1.real()=w1.real();
    w1.imag()=w1.imag();

    //w2.real()=round(cos(2*tmp5)*1000)/1000;//==-w0
    //w2.imag()=round(-sin(2*tmp5)*1000)/1000;//==-w0
    //w3.real()=round(cos(3*tmp5)*1000)/1000;//==-w1
    //w3.imag()=round(-sin(3*tmp5)*1000)/1000;//==-w1
    //w4.real()=round(cos(4*tmp5)*1000)/1000;//==+w0
    //w4.imag()=round(-sin(4*tmp5)*1000)/1000;//==+w0
    //w6.real()=round(cos(6*tmp5)*1000)/1000;//==-w0
    //w6.imag()=round(-sin(6*tmp5)*1000)/1000;//==-w0
    //w9.real()=round(cos(9*tmp5)*1000)/1000;//==+w1
    //w9.imag()=round(-sin(9*tmp5)*1000)/1000;//==+w1

   //cout<<" "<<w0<<" "<<w0<<" "<<w0<<" "<<w0<<endl;
   //cout<<" "<<w0<<" "<<w1<<" "<<w2<<" "<<w3<<endl;
   //cout<<" "<<w0<<" "<<w2<<" "<<w4<<" "<<w6<<endl;
   //cout<<" "<<w0<<" "<<w3<<" "<<w6<<" "<<w9<<endl;
   //system("pause");
}












Brak komentarzy:

Prześlij komentarz