Section 8.2: Time Matrix Multiply#

Adapted from: “Guide to Fortran 2008 Programming” by Walter S. Brainerd (Springer 2015)

Program to demonstrate the performance improvement of using matmul over do loops.#

program time_matrix_multiply

    ! Compare times of the matmul intrinsic cs DO loops

    implicit none
    integer, parameter :: n = 10
    real, dimension (n, n) :: a, b, c1, c2
    character(len=8) :: date
    real :: start_time_1, stop_time_1, start_time_2, stop_time_2
    real :: total_time_1, total_time_2
    integer :: i, j, k
    character(len=*), parameter :: form = "(t2, a, f0.10, a)"

    ! Get date to print on report

    call date_and_time(date = date)

    print *, "Timing report dated: " // date(1:4) &
        // "-" // date(5:6) // "-" // date(7:8)

    call random_seed()
    call random_number(a)
    call random_number(b)
    call cpu_time(start_time_1)
   
    ! Lines below added for effect
    ! The matrices of random values will be printed to the screen
    write (*, "(a)") "Matrix A"
    write (*, "(10f10.3)") (a(i,:), i=1,n)

    print *
    write (*, "(a)") "Matrix B"
    write (*, "(10f10.3)") (b(i,:), i=1,n)

    c1 = 0

    do k = 1, n
        do j = 1, n
            do i = 1, n
                c1(i, j) = c1(i, j) + a(i, k) * b(k, j)
            end do
        end do
    end do

    call cpu_time(stop_time_1)

    total_time_1 = stop_time_1 - start_time_1

    print *

    write (*, "(a)") "Matrix C1 is AxB using loops."
    write (*, "(10f10.3)") (c1(i,:), i=1,n)

    call cpu_time(start_time_2)
    c2 = matmul(a, b)
    call cpu_time(stop_time_2)

    total_time_2 = stop_time_2 - start_time_2

    print *

    write (*, "(a)") "Matrix C2 is AxB using matmul."
    write (*, "(10f10.3)") (c1(i,:), i=1,n)

    print *

    write (*, form) "Time of Do loop version is: ", total_time_1, " seconds."
    write (*, form) "Time of matmul version is: ", total_time_2, " seconds."

    print *

    if (any(abs(c1-c2) > 1.0e-4)) then
        write (*,  "(a)") "There are significantly different values between the matrices."
    else
        write (*,  "(a)") "The results are approximately the same."
    end if

    print *
    write (*, "(a, f10.3, a)") "The speedup ratio is: ", total_time_1/total_time_2, "x"

end program time_matrix_multiply

The above program is compiled and run using Fortran Package Manager (fpm):

import os
root_dir = os.getcwd()
code_dir = root_dir + "/" + "Fortran_Code/Section_8_2_Time_Matrix_Multiply"
os.chdir(code_dir)
build_status = os.system("fpm build 2>/dev/null")
exec_status = os.system("fpm run 2>/dev/null")
 Timing report dated: 2024-06-02
Matrix A
     0.235     0.112     0.197     0.145     0.587     0.706     0.106     0.577     0.815     0.028
     0.999     0.716     0.038     0.728     0.143     0.166     0.193     0.672     0.331     0.247
     0.137     0.575     0.044     0.382     0.090     0.915     0.415     0.735     0.785     0.789
     0.758     0.498     0.551     0.407     0.887     0.983     0.674     0.271     0.043     0.135
     0.796     0.495     0.448     0.271     0.096     0.163     0.905     0.714     0.641     0.506
     0.887     0.141     0.652     0.760     0.056     0.588     0.896     0.961     0.225     0.765
     0.139     0.665     0.887     0.921     0.675     0.691     0.739     0.894     0.493     0.654
     0.110     0.586     0.111     0.642     0.553     0.318     0.412     0.202     0.756     0.279
     0.163     0.460     0.343     0.522     0.287     0.373     0.153     0.367     0.435     0.322
     0.389     0.939     0.667     0.322     0.989     0.323     0.111     0.332     0.070     0.963

Matrix B
     0.599     0.532     0.334     0.581     0.217     0.429     0.470     0.928     0.664     0.723
     0.094     0.311     0.146     0.673     0.047     0.322     0.925     0.157     0.756     0.903
     0.614     0.778     0.168     0.920     0.548     0.314     0.397     0.519     0.946     0.855
     0.750     0.229     0.365     0.587     0.519     0.375     0.072     0.469     0.632     0.118
     0.753     0.354     0.192     0.278     0.938     0.608     0.171     0.908     0.182     0.191
     0.457     0.394     0.230     0.744     0.121     0.802     0.063     0.968     0.731     0.690
     0.740     0.112     0.829     0.710     0.054     0.640     0.524     0.858     0.483     0.602
     0.454     0.241     0.152     0.915     0.968     0.956     0.554     0.582     0.180     0.518
     0.162     0.337     0.229     0.384     0.330     0.475     0.810     0.297     0.843     0.552
     0.752     0.886     0.272     0.005     0.678     0.868     0.409     0.709     0.418     0.631

Matrix C1 is AxB using loops.
     1.639     1.283     0.826     2.084     1.728     2.207     1.494     2.310     1.995     1.885
     2.106     1.580     1.181     2.569     1.740     2.301     2.076     2.521     2.444     2.432
     2.296     1.953     1.355     2.710     2.002     3.215     2.297     2.963     2.818     2.911
     2.992     2.056     1.609     3.244     2.058     2.967     1.871     3.859     3.004     3.019
     2.627     2.024     1.712     3.060     1.986     2.938     2.652     3.161     2.952     3.156
     3.537     2.534     1.998     3.715     2.646     3.684     2.404     4.096     3.347     3.476
     3.729     2.735     1.956     4.122     3.253     3.990     2.806     4.170     3.850     3.765
     1.961     1.391     1.176     2.096     1.659     2.160     1.852     2.255     2.349     2.049
     1.722     1.402     0.881     1.997     1.555     1.929     1.515     1.970     2.076     1.925
     2.832     2.537     1.181     2.590     2.631     2.916     2.220     3.207     2.793     3.033

Matrix C2 is AxB using matmul.
     1.639     1.283     0.826     2.084     1.728     2.207     1.494     2.310     1.995     1.885
     2.106     1.580     1.181     2.569     1.740     2.301     2.076     2.521     2.444     2.432
     2.296     1.953     1.355     2.710     2.002     3.215     2.297     2.963     2.818     2.911
     2.992     2.056     1.609     3.244     2.058     2.967     1.871     3.859     3.004     3.019
     2.627     2.024     1.712     3.060     1.986     2.938     2.652     3.161     2.952     3.156
     3.537     2.534     1.998     3.715     2.646     3.684     2.404     4.096     3.347     3.476
     3.729     2.735     1.956     4.122     3.253     3.990     2.806     4.170     3.850     3.765
     1.961     1.391     1.176     2.096     1.659     2.160     1.852     2.255     2.349     2.049
     1.722     1.402     0.881     1.997     1.555     1.929     1.515     1.970     2.076     1.925
     2.832     2.537     1.181     2.590     2.631     2.916     2.220     3.207     2.793     3.033

 Time of Do loop version is: .0000880000 seconds.
 Time of matmul version is: .0001129999 seconds.

The results are approximately the same.

The speedup ratio is:      0.779x