首页 > 解决方案 > 如何使用 openMP 提高 do 循环的性能?

问题描述

如下所示,此代码片段旨在计算两个数组,即data_realdata_imag​​ 。它们的形状都是 1024*10000。我想通过使用 OpenMP 来加速 DO 循环的计算。但我绝对是 openMP 的初学者。我不太清楚如何将循环与依赖迭代并行化,例如 temp2 = dx * temp1(2*i), temp3 = dx * temp1(2*i+1)下面的代码片段的语句。我的意思是,如果此代码段中存在竞争条件。

有没有办法加快 do 循环,如下所示?

Four1是用于执行 FFT 的子程序,sinc2是 sinc 函数的平方。

!Declare variables
Real, Allocatable, Dimension(:,:) :: data_complex, data_real, data_imag  
Real, Dimension(0:2*1024-1) :: temp1
Real :: temp2, temp3

!Allocate
Allocate( data_real(0:1024-1,0:10000-1),                    & 
         data_imag(0:1024-1,0:10000-1),                     &            
         data_complex(0:2*1024-1,0:10000-1), STAT=istat1 )

!Initialise 
data_real = 0.0
data_imag = 0.0
data_complex = 0.0
dk = 2*3.14159 / 75.0

!$OMP Parallel num_threads(24) private(i,j,k,temp1,temp2,temp3) shared( dk)
!$OMP Do schedule(dynamic,1) 
  Do j = 0, 10000-1 
    temp1(:) = data_complex(:,j)
    Call Four1(temp1, 1024, 1)                          ! Calling the subroutine 'Four1' to 
                                                        ! perform Fast Fourier Transform
    Do i = 0, 1023
      k = dk * Real(i)
      temp2 = dx * temp1(2*i)          
      temp3 = dx * temp1(2*i+1)        
      data_real(i,j) = temp2 / sinc2( dx * k / 2 )      ! sinc2(x) = sin(x) / x
      data_imag(i,j) = temp3 / sinc2( dx * k / 2 )          
    End Do 
  End Do
!$OMP End Do nowait
!$OMP End Parallel
! --------------------------------------------------------------- !
! ----------------------------------------------------------------!
Subroutine Four1(data_complex, nn, isign)

    Integer, Intent(in) :: nn
    Integer, Intent(in) :: isign
    Real, Intent(inout), Dimension(2*nn) :: data_complex
    Integer :: i, istep, j, m, mmax, n
    Real :: tempi, tempr
    Real(8) :: theta, wi, wpi, wpr, wr, wtemp
    ! ---------------------------------------------------------
    n=2*nn
    j=1
    Do i=1,n,2
      If(j>i) then
        tempr=data_complex(j)
        tempi=data_complex(j+1)
        data_complex(j)=data_complex(i)
        data_complex(j+1)=data_complex(i+1)
        data_complex(i)=tempr
        data_complex(i+1)=tempi
      endif
      m=n/2

      Do while ( (m>=2).and.(j>m) )
        j=j-m
        m=m/2
      End do

      j=j+m
    EndDo
    
    mmax=2
    Do while ( n > mmax )
       istep=2*mmax
       theta=(2*pi)/(isign*mmax)
       wpr=-2.0d0*sin(0.5d0*theta)**2
       wpi=sin(theta)
       wr=1.0d0
       wi=0.0d0
       Do m=1,mmax,2
         Do i=m,n,istep
           j=i+mmax
           tempr=Real(wr)*data_complex(j)-Real(wi)*data_complex(j+1)
           tempi=Real(wr)*data_complex(j+1)+Real(wi)*data_complex(j)
           data_complex(j)=data_complex(i)-tempr
           data_complex(j+1)=data_complex(i+1)-tempi
           data_complex(i)=data_complex(i)+tempr
           data_complex(i+1)=data_complex(i+1)+tempi
         End Do
         wtemp=wr
         wr=wr*wpr-wi*wpi+wr
         wi=wi*wpr+wtemp*wpi+wi
       End Do
       mmax=istep
     End Do

  End Subroutine Four1
  ! ------------------------------------------------------------ !
  ! ------------------------------------------------------------ !

  Real Function sinc2 ( x )
    !
    ! Define the square of sinc function
    !
    Real, Intent(in) :: x
    
    If ( abs(x) < 1.e-16 ) then
    ! be careful with comparison to real numbers because of rounding errors
    ! better: if (abs(x).lt.1.e-16) thensinc=1.
      sinc2 = 1.0
    Else 
      sinc2 = ( sin(x)/x )**2
    End If
  
  End Function sinc2

标签: performanceloopsfortranopenmpdo-loops

解决方案


推荐阅读