/*********************************************************** * mv_pblas.c -- parallel matrix - vector multiplication * * * * Compute y = A * x by means of the * * Parallel Basic Linear Algebra Subroutines (PBLAS) * * 3.11.2002 PA * ***********************************************************/ #include #include #include #include "mpi.h" #define max(a,b) ((a) > (b) ? (a) : (b)) #define abs(a) max((a),-(a)) /*********************************************************** * Globals * ***********************************************************/ int M=50, N=100, ZERO=0, ONE=1; /*********************************************************** * Main * ***********************************************************/ main(int argc, char* argv[]) { int myid; /* process rank */ int p; /* number of processes */ int ctxt, pr, pc, myrow, mycol; /* BLACS stuff*/ int descA[9], descx[9], descy[9], info; double *A, *x, *y, alpha, beta; int m, mb, n, nb, i, i0, j, j0; /* Start the MPI engine */ MPI_Init(&argc, &argv); /* Find out number of processes */ MPI_Comm_size(MPI_COMM_WORLD, &p); /* Find out process rank */ MPI_Comm_rank(MPI_COMM_WORLD, &myid); /* Get a BLACS context */ Cblacs_get(0, 0, &ctxt); /* Determine pr and pc for the pr x pc process grid */ for (pc=p/2; p%pc; pc--) ; pr = p/pc; if (pr > pc){ pc = pr; pr = p/pc; } /* Initialize the pr x pc process grid */ Cblacs_gridinit(&ctxt, "Row-major", pr, pc); Cblacs_pcoord(ctxt, myid, &myrow, &mycol); /* Determine block sizes */ mb = (M - 1)/pr + 1; /* mb = ceil(M/pr) */ nb = (N - 1)/pc + 1; /* nb = ceil(N/pc) */ /* Allocate memory space */ x = (double*) malloc(nb*sizeof(double)); y = (double*) malloc(mb*sizeof(double)); A = (double*) malloc(mb*nb*sizeof(double)); /* Determine true local sizes and base indices */ i0 = myrow*mb; j0 = mycol*nb; m = mb; n = nb; if (myrow == pr-1) m = M - i0; if (mycol == pc-1) n = N - j0; descinit_(descA,&M, &N, &mb, &nb, &ZERO,&ZERO,&ctxt,&mb, &info); descinit_(descx,&ONE,&N, &ONE,&nb, &ZERO,&ZERO,&ctxt,&ONE,&info); descinit_(descy,&M, &ONE,&mb, &ONE,&ZERO,&ZERO,&ctxt,&mb, &info); /* Initialize matrix A with A[i,j] = |i-j| */ for (j = 0; j < n; j++) for (i = 0; i < m; i++) A[i+j*mb] = abs(j0+j-i0-i); /* Initialize vector x = [1,2,..,N] */ for (j = 0; j < n; j++) x[j] = (double)(j+j0); /* Multiply y = alpha*A*x + beta*y (alpha=1, beta=0) */ alpha = 1.0; beta = 0.0; pdgemv_("No Transpose",&M,&N,&alpha,A,&ONE,&ONE,descA, x,&ONE,&ONE,descx,&ONE,&beta,y,&ONE,&ONE,descy,&ONE); /* Print result */ if (mycol == 0) for (i = 0; i < m; i++) printf("y[%d] = %12.2f\n", i+i0, y[i]); /* Free memory */ free(A); free(x); free(y); /* Release process grid */ Cblacs_gridexit(ctxt); /* Shut down MPI */ MPI_Finalize(); } /* main */