首页 > 解决方案 > 使用高斯消元法求解对称线性方程 ax=b

问题描述

我是并行编程的新手,目前正在优化适用于电磁计算的代码。分析程序是如何工作的,我意识到花在执行上的时间中有 85% 是用于求解线性方程。我研究了一点openmp,但我不知道如何并行化这样的嵌套循环。任何想法?先感谢您。按照下面的代码

    Subroutine GaussEqSolver_Sym(n,ma,a,b,ep,kwji)
!------------------------------------------------------------------
!  Solve sysmmetric linear equation ax=b by using Gauss elimination.
!  If kwji=1, no solution;if kwji=0,has solution
!  Input--n,ma,a(ma,n),b(n),ep,
!  Output--b,kwji
!------------------------------------------------------------------
   

        implicit real*8 (a-h,o-z)
           dimension a(ma,n),b(n),m(n+1)
           do 10 i=1,n
    10     m(i)=i
           do 120 k=1,n
              p=0.0
              do 20 i=k,n
                 do 20 j=k,n
                    if(dabs(a(i,j)).gt.dabs(p)) then
                       p=a(i,j)
                       io=i
                       jo=j
                    endif
    20        continue
              if(dabs(p)-ep) 30,30,35
    30        kwji=1
              return
    35        continue
              if(jo.eq.k) go to 45
              do 40 i=1,n
                 t=a(i,jo)
                 a(i,jo)=a(i,k)
                 a(i,k)=t
    40        continue
              j=m(k)
              m(k)=m(jo)
              m(jo)=j
    45        if(io.eq.k) go to 55
              do 50 j=k,n
                 t=a(io,j)
                 a(io,j)=a(k,j)
                 a(k,j)=t
    50        continue
              t=b(io)
              b(io)=b(k)
              b(k)=t
    55        p=1./p
              in=n-1
              if(k.eq.n) go to 65
              do 60 j=k,in
    60        a(k,j+1)=a(k,j+1)*p
    65        b(k)=b(k)*p
              if(k.eq.n) go to 120
              do 80 i=k,in
                 do 70 j=k,in
    70              a(i+1,j+1)=a(i+1,j+1)-a(i+1,k)*a(k,j+1)
    80              b(i+1)=b(i+1)-a(i+1,k)*b(k)
    120       continue
              do 130 i1=2,n
                 i=n+1-i1
                 do 130 j=i,in
    130       b(i)=b(i)-a(i,j+1)*b(j+1)
              do 140 k=1,n
                 i=m(k)
    140       a(1,i)=b(k)
              do 150 k=1,n
    150       b(k)=a(1,k)
              kwji=0
        return
        end

标签: parallel-processingfortranopenmp

解决方案


如果您对性能感兴趣,您应该使用 LAPACK。为了说明这一点,我编写了一个简单的驱动程序,它比较了调用 DSYSV 时提供的代码的速度,DSYSV 是 LAPACK 例程,用于求解对称“双精度”矩阵的一组线性方程。代码和结果如下,但总而言之,LAPACK 比 Fortran 快 3.3 倍到 725 倍不等。请注意,这可能不是一个优化的 LAPACK 库,它是我在笔记本电脑上安装的 Mint Linux 附带的。无论如何,下面的详细信息

ian@eris:~/work/stack$ cat solve.f90
    Subroutine GaussEqSolver_Sym(n,ma,a,b,ep,kwji)
!------------------------------------------------------------------
!  Solve sysmmetric linear equation ax=b by using Gauss elimination.
!  If kwji=1, no solution;if kwji=0,has solution
!  Input--n,ma,a(ma,n),b(n),ep,
!  Output--b,kwji
!------------------------------------------------------------------
   

        implicit real*8 (a-h,o-z)
           dimension a(ma,n),b(n),m(n+1)
           do 10 i=1,n
    10     m(i)=i
           do 120 k=1,n
              p=0.0
              do 20 i=k,n
                 do 20 j=k,n
                    if(dabs(a(i,j)).gt.dabs(p)) then
                       p=a(i,j)
                       io=i
                       jo=j
                    endif
    20        continue
              if(dabs(p)-ep) 30,30,35
    30        kwji=1
              return
    35        continue
              if(jo.eq.k) go to 45
              do 40 i=1,n
                 t=a(i,jo)
                 a(i,jo)=a(i,k)
                 a(i,k)=t
    40        continue
              j=m(k)
              m(k)=m(jo)
              m(jo)=j
    45        if(io.eq.k) go to 55
              do 50 j=k,n
                 t=a(io,j)
                 a(io,j)=a(k,j)
                 a(k,j)=t
    50        continue
              t=b(io)
              b(io)=b(k)
              b(k)=t
    55        p=1./p
              in=n-1
              if(k.eq.n) go to 65
              do 60 j=k,in
    60        a(k,j+1)=a(k,j+1)*p
    65        b(k)=b(k)*p
              if(k.eq.n) go to 120
              do 80 i=k,in
                 do 70 j=k,in
    70              a(i+1,j+1)=a(i+1,j+1)-a(i+1,k)*a(k,j+1)
    80              b(i+1)=b(i+1)-a(i+1,k)*b(k)
    120       continue
              do 130 i1=2,n
                 i=n+1-i1
                 do 130 j=i,in
    130       b(i)=b(i)-a(i,j+1)*b(j+1)
              do 140 k=1,n
                 i=m(k)
    140       a(1,i)=b(k)
              do 150 k=1,n
    150       b(k)=a(1,k)
              kwji=0
        return
     end 

Program solve_eqns

  Use, Intrinsic :: iso_fortran_env, Only :  wp => real64, li => int64

  Implicit None

  Real( wp ), Dimension( :, : ), Allocatable :: a, a_copy

  Real( wp ), Dimension( : ), Allocatable :: b
  Real( wp ), Dimension( : ), Allocatable :: x_lap, x_for
  Real( wp ), Dimension( : ), Allocatable :: work

  Real( wp ) :: time_lap, time_for
  
  Integer, Dimension( : ), Allocatable :: pivots

  Integer :: i
  Integer :: n, nb = 64 ! hack value for nb - should use ilaenv
  Integer :: error

  Integer( li ) :: start, finish, rate

  Write( *, * ) 'n ?'
  Read ( *, * )  n

  Allocate( a( 1:n, 1:n ) )
  Allocate( b( 1:n ) )
  Allocate( pivots( 1:n ) )

  ! Set up matrix
  Call Random_number( a )
  a = a - 0.5_wp
  ! Make A symmetric
  a = 0.5_wp * ( a + Transpose( a ) )
  ! Add n to diagonal of A to avoid any nasty condition numbers
  Do i = 1, n
     a( i, i ) = a( i, i ) + n
  End Do
  ! And keep a back up of A
  a_copy = a

  ! RHS
  Call Random_number( b )

  ! Solve with LAPACK
  x_lap = b
  Allocate( work( 1:n * nb ) )
  Call system_clock( start, rate )
  Call dsysv( 'U', n, 1, a, Size( a, dim = 1 ), pivots, &
       x_lap, Size( x_lap, Dim = 1 ), work, Size( work ), error )
  Call system_clock( finish, rate )
  time_lap = Real( finish - start, Kind( time_lap ) ) / rate
  ! Restore A
  a = a_copy
  Write( *, * ) 'Errors for LAPACK', error, Maxval( Abs( Matmul( a, x_lap ) - b ) )
  Write( *, * ) 'Time for LAPACK', time_lap

  ! Solve with Fortran  
  x_for = b
  Call system_clock( start, rate )
  Call GaussEqSolver_Sym( Size( a, Dim = 2 ), Size( a, Dim = 1 ), a, x_for, Epsilon( a ), error )
  Call system_clock( finish, rate )
  time_for = Real( finish - start, Kind( time_for ) ) / rate
  ! Restore A
  a = a_copy
  Write( *, * ) 'Errors for Fortran', error, Maxval( Abs( Matmul( a, x_for ) - b ) )
  Write( *, * ) 'Time_For for Fortran', time_for

  Write( *, * ) 'Max difference in solutions', Maxval( Abs( x_lap - x_for ) )

  Write( *, * ) 'LAPACK is ', time_for / time_lap, ' times quicker than the Fortran'
  
       
End Program solve_eqns
ian@eris:~/work/stack$ gfortran --version
GNU Fortran (Ubuntu 7.4.0-1ubuntu1~18.04.1) 7.4.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

ian@eris:~/work/stack$ gfortran -O3 solve.f90  -llapack
ian@eris:~/work/stack$ ./a.out
 n ?
100
 Errors for LAPACK           0   4.4408920985006262E-016
 Time for LAPACK   1.5952670000000000E-003
 Errors for Fortran           0   9.9920072216264089E-016
 Time_For for Fortran   5.3095140000000004E-003
 Max difference in solutions   8.6736173798840355E-018
 LAPACK is    3.3282917530419676       times quicker than the Fortran
ian@eris:~/work/stack$ ./a.out
 n ?
1000
 Errors for LAPACK           0   1.3322676295501878E-015
 Time for LAPACK   3.9014976000000000E-002
 Errors for Fortran           0   4.9960036108132044E-015
 Time_For for Fortran   1.9314730620000000     
 Max difference in solutions   4.7704895589362195E-018
 LAPACK is    49.505940026722044       times quicker than the Fortran
ian@eris:~/work/stack$ ./a.out
 n ?
5000 
 Errors for LAPACK           0   4.3298697960381105E-015
 Time for LAPACK   1.2611959250000000     
 Errors for Fortran           0   1.3322676295501878E-014
 Time_For for Fortran   913.76959534100001     
 Max difference in solutions   2.7647155398380363E-018
 LAPACK is    724.52628273517462       times quicker than the Fortran

推荐阅读