Programming Resources
For Fun and Learning
Charles Cusack
Computer Science
Hope College
main

Python
C++
JAVA
PHP
SQL
Alice

MatMul


matmul_par.c

/*
**  PROGRAM: Parallel Matrix Multiply (using OpenMP)
**
**  PURPOSE: This is a simple matrix multiply program. 
**           It will compute the product
**
**                C  = A * B
**
**           A and B are set to constant matrices so we
**           can make a quick test of the multiplication.
**
**  USAGE:   Right now, I hardwire the martix dimensions. 
**           later, I'll take them from the command line.
**  
**  HISTORY: Written by Tim Mattson, Nov 1999.
**  Modified: Loop for 1-24 threads added, a few other tweaks made.
**     Chuck Cusack, February 2019.
*/
#ifdef APPLE
#include <stdlib.h>
#else
#include <malloc.h>
#endif
#include <stdio.h>
#include <omp.h>

#define ORDER 1000
#define AVAL 3.0
#define BVAL 5.0
#define TOL  0.001

int main(int argc, char *argv[])
{
	int Ndim, Pdim, Mdim;   /* A[N][P], B[P][M], C[N][M] */
	int i,j,k,l;
	
	double *A, *B, *C, cval, tmp, err, errsq;
      double dN, mflops;
	double start_time, run_time;


	Ndim = ORDER;
	Pdim = ORDER;
	Mdim = ORDER;

	A = (double *)malloc(Ndim*Pdim*sizeof(double));
  B = (double *)malloc(Pdim*Mdim*sizeof(double));
  C = (double *)malloc(Ndim*Mdim*sizeof(double));

printf("threads   n     seconds    mflops\n");
for(l=1;l<=24;l++) {
  omp_set_num_threads(l);
  
	/* Initialize matrices */
	for (i=0; i<Ndim; i++)
		for (j=0; j<Pdim; j++)
			*(A+(i*Ndim+j)) = AVAL;

	for (i=0; i<Pdim; i++)
		for (j=0; j<Mdim; j++)
			*(B+(i*Pdim+j)) = BVAL;

	for (i=0; i<Ndim; i++)
		for (j=0; j<Mdim; j++)
			*(C+(i*Ndim+j)) = 0.0;
	
		start_time = omp_get_wtime();

	/* Do the matrix product */

#pragma omp parallel for
	for (i=0; i<Ndim; i++){
		for (j=0; j<Mdim; j++){
      tmp = 0.0;
			for(k=0;k<Pdim;k++){
				/* C(i,j) = sum(over k) A(i,k) * B(k,j) */
				tmp += *(A+(i*Ndim+k)) *  *(B+(k*Pdim+j));
			}
			*(C+(i*Ndim+j)) = tmp;
		}
	}
 
 /* print out the stats */
	run_time = omp_get_wtime() - start_time;
  dN = (double)ORDER;
  mflops = 2.0 * dN * dN * dN/(1000000.0* run_time);
  printf("%6d %4d %8.8f %12.6f\n", omp_get_max_threads(), ORDER, run_time,mflops);

	/* Check the answer */
	cval = Pdim * AVAL * BVAL;
	errsq = 0.0;
	for (i=0; i<Ndim; i++){
		for (j=0; j<Mdim; j++){
			err = *(C+i*Ndim+j) - cval;
		    errsq += err * err;
		}
	}

	if (errsq > TOL) {
		printf("\n Errors in multiplication: %f\n",errsq);
  }
  
} // main for loop
}