#include #include #include main() { /* vectorized polynomial evaluation example Section 3.5.1 in Petersen and Arbenz, "Intro. to Parallel Computing," Oxford Univ. Press, 2003. This is via recursive doubling and uses BLAS-1 routine ddot (sdot for Cray version) simplest procedure for linking BLAS-1 ddot is gcc -O3 -c rpoly.c g77 rpoly.o -lblas alternative procedure is gcc -O3 rpoly.c -lm -lblas -lg2c which will bring in the Fortran I/O routines used in the BLAS-1 error messages (if any). wpp, ETH Zurich, 3 June 1996 */ double a[10000], v[10000]; double poly(); double aa,bb; double err, x, p; int i,j,m,n,nk; int nv[15] = {1,2,4,8,16,32,64,128,192,256,512,1024,2048,4096,8192}; for(nk=0;nk<=14;nk++){ n = nv[nk]-1; x = 1.0; bb = 1/x; #pragma ivdep for(i=0;i<=n;i++){ aa = i; a[i] = pow(bb,aa); } p = poly(n,a,v,x); err = (double)(n+1) - p; printf("For n = %4d, error = %e \n",n,err); } } double poly(n,a,v,x) int n; double a[]; double v[]; double x; { int i,one,id,nd,itd; /* Cray version: fortran double SDOT(); */ /* generic Linux version */ double ddot_(int*,double*,int*,double*,int*); double xi,p; v[0] = 1.; v[1] = x; v[2] = x*x; id = 2; one = 1; nd = id; # while(nd < n){ if(id <= n-nd) itd = id; else itd = n-nd; # xi = v[id]; #pragma ivdep for (i=1;i<=itd;i++){ v[id+i] = xi*v[i]; } id = id + id; nd = nd + itd; } nd = n+1; /* Cray version p = SDOT(&nd,a,&one,v,&one); end Cray version */ /* generic Unix version */ p = ddot_(&nd,a,&one,v,&one); /* end generic Unix version */ return(p); }