#include #include #include #define N 1027 main() { /* Mac G-4 unit step isamax for arbitrary N code This is an Altivec version of isamax0 from Section 3.5.7 in Arbenz and Petersen, "Intro. to Parallel Computing" Oxford Univ. Press, 2004 wpp 5/8/2002 */ float x[N]; float xb; int err,flag,i,im,k,ki,kl,ib,n0,n; int isamax(int,float *); static float seed = 331.0; float ggl(float*); flag = 0; // error flag kl = 3; n0 = 1; for(k=0;k<5;k++){ for(ki=0;ki xb){ xb = fabs(x[i]); ib = i; } } im = isamax(n,x); err = ib - im; if(err != 0){ printf(" err in isamax: n = %d, ib = %d, im = %d\n",n,ib,im); flag = 1; } } n0 *= 4; // increase n kl = 4; // for n > 1, 3 steps of increase in n } if(flag==0) printf(" All n tests pass\n"); } #define NS 12 int isamax(int n, float *x) { float rbig,*xp; int i,ii,nres,nsegs,ibig,irbig; vector float V0,V1,V6; vector bool int V3; vector float V2 = (vector float) (0.0,1.0,2.0,3.0); vector float V7 = (vector float) (0.0,1.0,2.0,3.0); const vector float incr_4 = (vector float) (4.0,4.0,4.0,4.0); const vector float minus0 = (vector float) (-0.0,-0.0,-0.0,-0.0); float big,xbig[4],indx[4]; // n < NS done in scalar mode if(n < NS){ ibig = 0; rbig = 0.0; for(i=0;i rbig){ rbig = fabs(x[i]); ibig = i; } } return(ibig); } // n >= NS case done with altivec nsegs = (n >> 2) - 1; nres = n - ((nsegs+1) << 2); // nres = n mod 4 V2 = vec_add(V2,incr_4); // increment next index xp = x; V0 = vec_ld(0,xp); xp += 4; // first four V1 = vec_ld(0,xp); xp += 4; // next four V0 = vec_abs(V0); // absolute value of first four for(i=0;i> 2) << 2); big = 0.0; ibig = 0.0; for(i=0;i big){ big = fabs(x[ii]); ibig = ii; } ii++; } for(i=0;i<4;i++){ if(xbig[i] > big){ big = xbig[i]; ibig = (int) indx[i]; } } return(ibig); } #undef NS #include float ggl(float *ds) { /* generate u(0,1) distributed random numbers. Seed ds must be saved between calls. ggl is essentially the same as the IMSL routine RNUM. W. Petersen and M. Troyer, 24 Oct. 2002, ETHZ: a modification of a fortran version from I. Vattulainen, Tampere Univ. of Technology, Finland, 1992 */ double t,d2=0.2147483647e10; t = (float) *ds; t = fmod(0.16807e5*t,d2); *ds = (float) t; return((float) ((t-1.0e0)/(d2-1.0e0))); }