# Makefile for the matrix multiply contest # # To make the driver, # make matmul # To time a dgemm routine, # make timing # To plot the timing results, # make timing.ps # # The driver (matmul.c) is written in C, but you can write your code # in C or in Fortran. The file f2c_dgemm.c provides an interface # between the C driver and a Fortran matrix multiply routine, if you # would prefer to work with Fortran. It should work on Intel/Linux # systems and Sun systems, and it may or may not work on other systems. # Mixed-language programming is a pain that way. # # I have also provided commented-out settings to link a version against # an optimized of the Basic Linear Algebra Subroutines (BLAS). I use # the ATLAS BLAS, available from # # www.netlib.org/atlas # # You might find it interesting to compare the speed of your matrix # multiply to the speed of the ATLAS routines. Then compare how long it # took you to install and figure out ATLAS (or another optimized library) # compared to how long you spent trying to optimize it yourself. Using # existing libraries can save you a lot of time and grief, and let # you concentrate on wheels that haven't already been invented. # --- # Add -DFASTTEST to the CFLAGS variable in order to use a shorter # list of test cases. Add -DNOVALIDATE to turn off the routine to check # your answers. You should probably only use -DNOVALIDATE when you're # really sure that everything is working. # For Solaris, optimized for UltraSparcs only when compiled on an Ultra... #CC = cc #CFLAGS = -xO5 -xtarget=native -xarch=v8plusa # For Linux on a PPro or better: # ... and a recent (2.95+) version of gcc. previous versions may # encounter Internal Compiler Errors with P6 insns #CC = gcc #CFLAGS = -march=pentiumpro -mcpu=pentiumpro -O4 \ # -funroll-all-loops -fmove-all-movables -freduce-all-givs \ # -falign-loops -falign-loops -falign-functions \ # -fschedule-insns # For the T3E: #CC = cc #CFLAGS = -O3 # Generic: #CC = cc #CFLAGS = -O # gprof testing #LDFLAGS = -pg #CFLAGS = -O -pg #DRIVER_CFLAGS = -DFASTTEST -DNOVALIDATE # --- # Compile a C version (using basic_dgemm.c, in this case): LIBS = -lm OBJS = matmul.o basic_dgemm.o # Compile a Fortran version (basic_fdgemm.f, in this case): #LIBS = -lg2c -lm #OBJS = matmul.o f2c_dgemm.o basic_fdgemm.o # Compile a version using a vendor BLAS (ATLAS, in this case): #LIBS = /home/eecs/dbindel/work/ATLAS/lib/Linux_PII/libf77blas.a \ # /home/eecs/dbindel/work/ATLAS/lib/Linux_PII/libatlas.a \ # -lg2c -lm #OBJS = matmul.o wrap_dgemm.o # --- matmul: $(OBJS) $(CC) $(OBJS) $(LIBS) $(LDFLAGS) -o matmul matmul.o: matmul.c $(CC) $(DRIVER_CFLAGS) -c matmul.c # --- # This is just a suggestion on how to generate timing plots... Feel # free to improve on these, so long as you show MFlop/s v. matrix size. timing: matmul ./matmul > timing timing.ps: timing echo "set term postscript; set output 'timing.ps';" \ | gnuplot - timing.gnuplot timing.ppm: timing echo "set term ppm color; set output 'timing.ppm';" \ | gnuplot - timing.gnuplot # --- clean: rm -f matmul *.o # --- # This is just to help me maintain the web page... MAINFILES= Makefile basic_dgemm.c blocked_dgemm.c \ basic_fdgemm.f f2c_dgemm.c wrap_dgemm.c \ matmul.c timing.gnuplot tar: tar -cf tuning-matmul.tar $(MAINFILES) web: tar mkdir -p $(WEB267)/matmul/files cp tuning-matmul.tar $(MAINFILES) $(WEB267)/matmul/files