首页 > 解决方案 > 当循环的顺序很重要时,如何优化 Fortran 中的嵌套 for 循环?

问题描述

我有一个相对简单的子例程,它在数组中存储数字 2 的出现次数。我有两个二维数组 geno_pro(2000, 50000) 和 geno_par(100000, 50000)。我需要将 geno_pro 中的每一行与 geno_par 中的每一行进行比较,并计算它有多少个数字 2。

  subroutine ptoh(oh_ped, oh_cnt, m, npro, npar)
    use omp_lib
    implicit none
    integer :: par,pro,npar,npro,m,i,i_M,i_F,total_snp,effective
    integer :: geno_par(npar,m),geno_pro(npar,m),oh_po(npar),sex(npar),sex_par(npar)
    integer :: progeny_snp(m),parent_snp(m),oh_cnt(npro,4)
    character*20 :: parent(npar),progeny(npro),bird_par(npar),oh_ped(npro,3)
    character(len=m) :: snp
    ! Read parents' id, gender and genotypes
    open(unit=100, file="parents.txt", status='old', action='read')
    !$omp parallel do private(i,snp) shared(geno_par,parent,sex,npar,m)
    do i=1,npar
      read(100,*) parent(i), sex(i), snp
      read(snp,'(*(i1))') geno_par(i,1:m)
    end do
    !$omp end parallel do
    close(unit=100)
    ! Read progeny's id and genotypes 
    open(unit=200, file="progeny.txt", status='old', action='read')
    !$omp parallel do private(i,snp) shared(geno_pro,progeny,npro,m)
    do i=1,npro
      read(200,*) progeny(i), snp
      read(snp,'(*(i1))') geno_pro(i,1:m)
    end do
    !$omp end parallel do
    close(unit=200)
    ! Count OH
    !$omp parallel do private(pro,par,progeny_snp,parent_snp,i_M,i_F) &
    !$omp private(bird_par,sex_par,oh_po,total_snp,effective) &
    !$omp shared(m,npro,npar,geno_pro,geno_par,parent,sex,oh_ped,oh_cnt)
    do pro=1,npro
      progeny_snp = geno_pro(pro,1:m)
      total_snp = count(progeny_snp .ge. 0)
      effective = count(progeny_snp .le. 2)
      do par=1,npar
        parent_snp = geno_par(par,1:m)
        bird_par(par) = parent(par)
        sex_par(par) = sex(par)
        oh_po(par) = count(abs(progeny_snp - parent_snp) .eq. 2)
      end do
      i_M = minloc(oh_po, dim=1, mask=(sex_par .eq. 1))
      i_F = minloc(oh_po, dim=1, mask=(sex_par .eq. 2))
      oh_ped(pro,1:3) = (/progeny(pro),bird_par(i_M),bird_par(i_F)/)
      oh_cnt(pro,1:4) = (/total_snp,effective,oh_po(i_M),oh_po(i_F)/)
    end do
    !$omp end parallel do
  end subroutine ptoh

这是我正在努力优化的部分。

    do pro=1,npro
      progeny_snp = geno_pro(pro,1:m)
      total_snp = count(progeny_snp .ge. 0)
      effective = count(progeny_snp .le. 2)
      do par=1,npar
        parent_snp = geno_par(par,1:m)
        bird_par(par) = parent(par)
        sex_par(par) = sex(par)
        oh_po(par) = count(abs(progeny_snp - parent_snp) .eq. 2)
      end do
      i_M = minloc(oh_po, dim=1, mask=(sex_par .eq. 1))
      i_F = minloc(oh_po, dim=1, mask=(sex_par .eq. 2))
      oh_ped(pro,1:3) = (/progeny(pro),bird_par(i_M),bird_par(i_F)/)
      oh_cnt(pro,1:4) = (/total_snp,effective,oh_po(i_M),oh_po(i_F)/)
    end do

即使使用 OpenMP,它仍然很慢。我想知道是否有更好的方法来加速它。有谁知道如何改进它?谢谢你。

标签: fortranopenmpgfortran

解决方案


推荐阅读