Section 8.2: Time Matrix Multiply#
Adapted from: “Guide to Fortran 2008 Programming” by Walter S. Brainerd (Springer 2015)
Program to demonstrate the performance improvement of using matmul over do loops.#
program time_matrix_multiply
! Compare times of the matmul intrinsic cs DO loops
implicit none
integer, parameter :: n = 10
real, dimension (n, n) :: a, b, c1, c2
character(len=8) :: date
real :: start_time_1, stop_time_1, start_time_2, stop_time_2
real :: total_time_1, total_time_2
integer :: i, j, k
character(len=*), parameter :: form = "(t2, a, f0.10, a)"
! Get date to print on report
call date_and_time(date = date)
print *, "Timing report dated: " // date(1:4) &
// "-" // date(5:6) // "-" // date(7:8)
call random_seed()
call random_number(a)
call random_number(b)
call cpu_time(start_time_1)
! Lines below added for effect
! The matrices of random values will be printed to the screen
write (*, "(a)") "Matrix A"
write (*, "(10f10.3)") (a(i,:), i=1,n)
print *
write (*, "(a)") "Matrix B"
write (*, "(10f10.3)") (b(i,:), i=1,n)
c1 = 0
do k = 1, n
do j = 1, n
do i = 1, n
c1(i, j) = c1(i, j) + a(i, k) * b(k, j)
end do
end do
end do
call cpu_time(stop_time_1)
total_time_1 = stop_time_1 - start_time_1
print *
write (*, "(a)") "Matrix C1 is AxB using loops."
write (*, "(10f10.3)") (c1(i,:), i=1,n)
call cpu_time(start_time_2)
c2 = matmul(a, b)
call cpu_time(stop_time_2)
total_time_2 = stop_time_2 - start_time_2
print *
write (*, "(a)") "Matrix C2 is AxB using matmul."
write (*, "(10f10.3)") (c1(i,:), i=1,n)
print *
write (*, form) "Time of Do loop version is: ", total_time_1, " seconds."
write (*, form) "Time of matmul version is: ", total_time_2, " seconds."
print *
if (any(abs(c1-c2) > 1.0e-4)) then
write (*, "(a)") "There are significantly different values between the matrices."
else
write (*, "(a)") "The results are approximately the same."
end if
print *
write (*, "(a, f10.3, a)") "The speedup ratio is: ", total_time_1/total_time_2, "x"
end program time_matrix_multiply
The above program is compiled and run using Fortran Package Manager (fpm):
import os
root_dir = os.getcwd()
code_dir = root_dir + "/" + "Fortran_Code/Section_8_2_Time_Matrix_Multiply"
os.chdir(code_dir)
build_status = os.system("fpm build 2>/dev/null")
exec_status = os.system("fpm run 2>/dev/null")
Timing report dated: 2024-06-02
Matrix A
0.235 0.112 0.197 0.145 0.587 0.706 0.106 0.577 0.815 0.028
0.999 0.716 0.038 0.728 0.143 0.166 0.193 0.672 0.331 0.247
0.137 0.575 0.044 0.382 0.090 0.915 0.415 0.735 0.785 0.789
0.758 0.498 0.551 0.407 0.887 0.983 0.674 0.271 0.043 0.135
0.796 0.495 0.448 0.271 0.096 0.163 0.905 0.714 0.641 0.506
0.887 0.141 0.652 0.760 0.056 0.588 0.896 0.961 0.225 0.765
0.139 0.665 0.887 0.921 0.675 0.691 0.739 0.894 0.493 0.654
0.110 0.586 0.111 0.642 0.553 0.318 0.412 0.202 0.756 0.279
0.163 0.460 0.343 0.522 0.287 0.373 0.153 0.367 0.435 0.322
0.389 0.939 0.667 0.322 0.989 0.323 0.111 0.332 0.070 0.963
Matrix B
0.599 0.532 0.334 0.581 0.217 0.429 0.470 0.928 0.664 0.723
0.094 0.311 0.146 0.673 0.047 0.322 0.925 0.157 0.756 0.903
0.614 0.778 0.168 0.920 0.548 0.314 0.397 0.519 0.946 0.855
0.750 0.229 0.365 0.587 0.519 0.375 0.072 0.469 0.632 0.118
0.753 0.354 0.192 0.278 0.938 0.608 0.171 0.908 0.182 0.191
0.457 0.394 0.230 0.744 0.121 0.802 0.063 0.968 0.731 0.690
0.740 0.112 0.829 0.710 0.054 0.640 0.524 0.858 0.483 0.602
0.454 0.241 0.152 0.915 0.968 0.956 0.554 0.582 0.180 0.518
0.162 0.337 0.229 0.384 0.330 0.475 0.810 0.297 0.843 0.552
0.752 0.886 0.272 0.005 0.678 0.868 0.409 0.709 0.418 0.631
Matrix C1 is AxB using loops.
1.639 1.283 0.826 2.084 1.728 2.207 1.494 2.310 1.995 1.885
2.106 1.580 1.181 2.569 1.740 2.301 2.076 2.521 2.444 2.432
2.296 1.953 1.355 2.710 2.002 3.215 2.297 2.963 2.818 2.911
2.992 2.056 1.609 3.244 2.058 2.967 1.871 3.859 3.004 3.019
2.627 2.024 1.712 3.060 1.986 2.938 2.652 3.161 2.952 3.156
3.537 2.534 1.998 3.715 2.646 3.684 2.404 4.096 3.347 3.476
3.729 2.735 1.956 4.122 3.253 3.990 2.806 4.170 3.850 3.765
1.961 1.391 1.176 2.096 1.659 2.160 1.852 2.255 2.349 2.049
1.722 1.402 0.881 1.997 1.555 1.929 1.515 1.970 2.076 1.925
2.832 2.537 1.181 2.590 2.631 2.916 2.220 3.207 2.793 3.033
Matrix C2 is AxB using matmul.
1.639 1.283 0.826 2.084 1.728 2.207 1.494 2.310 1.995 1.885
2.106 1.580 1.181 2.569 1.740 2.301 2.076 2.521 2.444 2.432
2.296 1.953 1.355 2.710 2.002 3.215 2.297 2.963 2.818 2.911
2.992 2.056 1.609 3.244 2.058 2.967 1.871 3.859 3.004 3.019
2.627 2.024 1.712 3.060 1.986 2.938 2.652 3.161 2.952 3.156
3.537 2.534 1.998 3.715 2.646 3.684 2.404 4.096 3.347 3.476
3.729 2.735 1.956 4.122 3.253 3.990 2.806 4.170 3.850 3.765
1.961 1.391 1.176 2.096 1.659 2.160 1.852 2.255 2.349 2.049
1.722 1.402 0.881 1.997 1.555 1.929 1.515 1.970 2.076 1.925
2.832 2.537 1.181 2.590 2.631 2.916 2.220 3.207 2.793 3.033
Time of Do loop version is: .0000880000 seconds.
Time of matmul version is: .0001129999 seconds.
The results are approximately the same.
The speedup ratio is: 0.779x