#include <stdio.h>
#include <time.h>
#include <math.h>
#define N 270000
int nv[]={128,512,16384,32768,131072,262144};
main()
{
   float *d1,*e1,*f1,*b1,*x1,*m,*u;
   float *d2,*e2,*f2,*b2,*x2;
   float seed,t1,t2,big,err;
   float xx[2],ggl();
   void fcr(int,float*,float*,float*,float*,float*);
   void forsmol(int,float*,float*,float*,float*,float*,float*,float*);
   int i,k,n;
/*
   tests tridiagonal solvers
*/
   printf(" **********************************************\n");
   printf(" Error in cyclic reduction vs. Forsythe-Moler:\n");
   printf(" for n = %d %d %d %d %d %d %d %d %d %d\n",
        nv[0],nv[1],nv[2],nv[3],nv[4],nv[5]);
   printf(" ----------------------------------------------\n");
/* allocate arrays for diagonals and x,b vectors */
   d1 = (float *) malloc(N*sizeof(float));
   e1 = (float *) malloc(N*sizeof(float));
   f1 = (float *) malloc(N*sizeof(float));
   b1 = (float *) malloc(N*sizeof(float));
   x1 = (float *) malloc(N*sizeof(float));
   m  = (float *) malloc(N*sizeof(float));
   u  = (float *) malloc(N*sizeof(float));
   d2 = (float *) malloc(2*N*sizeof(float));
   e2 = (float *) malloc(2*N*sizeof(float));
   f2 = (float *) malloc(2*N*sizeof(float));
   b2 = (float *) malloc(2*N*sizeof(float));
   x2 = (float *) malloc(2*N*sizeof(float));

   seed = 331.0; 
   for(k=0;k<6;k++){
       n = nv[k];
       for(i=0;i<n;i++){
           e1[i]=0.25 + 0.1*ggl(&seed); 
           d1[i]=1.0  + 0.1*ggl(&seed); 
           f1[i]=0.25 + 0.1*ggl(&seed);
           d2[i]=d1[i]; e2[i]=e1[i];
           f2[i]=f1[i]; b1[i]=ggl(&seed); 
           b2[i]=b1[i]; m[i]=0.0; u[i]=0.0;
       }

       t1 = ((double)clock())/((double) CLOCKS_PER_SEC);
       forsmol(n,d1,e1,f1,x1,b1,m,u);
       t1 = ((double)clock())/((double) CLOCKS_PER_SEC) - t1;
       printf(" For n=%d, forsmol time = %e\n",n,t1);

       t2 = ((double)clock())/((double) CLOCKS_PER_SEC);
       fcr(n,d2,e2,f2,x2,b2);
       t2 = ((double)clock())/((double) CLOCKS_PER_SEC) - t2;
       printf(" For n=%d,     fcr time = %e\n",n,t2);

       err = 0.0;
       for(i=0;i<n;i++){
           err += (x2[i]-x1[i])*(x2[i]-x1[i]);
       }
       err = sqrt(err/((float) n));
       printf("    for n = %d, err = %e\n",n,err);
       printf(" ----------------------------------------------\n");
/* end of n loop */
   }
}
void forsmol(int n,float *d,float *e,float *f,
          float *x,float *b,float *m,float *u)
{
/*
    solves tridiagonal system according to 
    G.E. Forsythe and C.B. Moler " Computer 
    solutions to linear algebraic systems,"
    Prentice-Hall publ., 1967
*/
   int i;

   u[0]    = d[0];
   for(i=1;i<n;i++){
      m[i] = e[i]/u[i-1];
      u[i] = d[i] - m[i]*f[i-1];
   }
   x[0]     = b[0];
   for(i=1;i<n;i++){
       x[i] = b[i] - m[i]*x[i-1];
   }
   x[n-1]   = x[n-1]/u[n-1];
   for(i=n-2;i>=0;i--){
       x[i] = (x[i] - f[i]*x[i+1])/u[i];
   }

   return;
}
#include <math.h>
float ggl(float *ds)
{

/* generate u(0,1) distributed random numbers. 
   Seed ds must be saved between calls. ggl is 
   essentially the same as the IMSL routine RNUM. 

   W. Petersen and M. Troyer, 24 Oct. 2002, ETHZ: 
   a modification of a fortran version from 
   I. Vattulainen, Tampere Univ. of Technology, 
   Finland, 1992 */

   double t,d2=0.2147483647e10;
   t   = (float) *ds;
   t   = fmod(0.16807e5*t,d2);
   *ds = (float) t;
   return((float) ((t-1.0e0)/(d2-1.0e0)));
}
void fcr(int n, float *d, float *e, float *f, float *x, float *y){
/*
 ****************************************************************
 *  Solution of tridiagonal system of equations Tx = y
 *  by cyclic reduction
 *
 *  Input:
 *
 *    n:  order of the tridiagonal system matrix T
 *    d[0]...d[n-1]   diagonal entries of T,
 *    e[1]...e[n-1]   lower off-diagonal entries of T, 
 *    f[0]...f[n-2]   upper off-diagonal entries of T, 
 *    y[0]...y[n-1]   right hand side.
 *
 *  Output:
 *
 *    x[0]...x[n-1]   solution,
 *
 *  Attention: The 'real' size of all the above vectors has to be at 
 *             least 2*n. The additional elements are used for 
 *             intermediate results.
 *
 *  Remark: The lower triangular matrix factor is destroyed
 *          for saving memory space.
 *
 *  Nov 16, 2002 PA
 ****************************************************************
*/

    int b, bb, i, j, jj, m, nn[21];

/* Initializations */

    m = n; nn[0] = n; i = 0; bb = 0;
    e[0] = 0.0; f[n-1] = 0.0;

/*  Gaussian elimination / updating of right hand side */

    while (m > 1){
	b = bb;	bb = b + m; nn[++i] = m - m/2; m = m/2;

	e[bb] = e[b+1]/d[b];
#pragma ivdep
	for (j=0; j < m-1; j++){
            jj = 2*j + b + 2;
	    f[bb+j] = f[jj-1]/d[jj];
	    e[bb+j+1] = e[jj+1]/d[jj];
	}
	if (m != nn[i]) f[bb+m-1] = f[bb-2]/d[bb-1];

#pragma ivdep
	for (j=0; j < m; j++){
            jj = b + 2*j + 1;
            y[bb+j] = y[jj] - y[jj-1]*e[bb+j] - y[jj+1]*f[bb+j];
            d[bb+j] = d[jj] - f[jj-1]*e[bb+j] - e[jj+1]*f[bb+j];
            f[bb+j] = -f[jj+1]*f[bb+j];
            e[bb+j] = -e[jj-1]*e[bb+j];
	}
    }

/* Backward substitution */

    x[bb] = y[bb]/d[bb];
    while (i > 0){
#pragma ivdep
	for (j=0; j < m; j++){
            jj = b + 2*j; 
            x[jj] = (y[jj] - e[jj]*x[bb+j-1] - f[jj]*x[bb+j])/d[jj];
            x[jj+1] = x[bb+j];
	}
	if (m != nn[i]) x[bb-1] = (y[bb-1] - e[bb-1]*x[bb+m-1])/d[bb-1];

	m = m + nn[i]; bb = b; b = b - nn[--i] - m;
    }
    return;
}