首页 > 解决方案 > 使用 MPI Windows (MPI_Put) 传递数据时出现段错误

问题描述

我试图弄清楚如何使用 MPI 来处理矩阵。

我有一个用零填充的 3x6 矩阵,并且正在运行带有 3 个线程的代码。0是主要的,1从 1 到 3 列写入矩阵的第一行,并2在 4-6 列中的两列中流式写入第二行。

我将这些形成的部分传递给主线程(在 0 处),我得到了正确的结果,但之后向控制台输出了一个内存错误。

我无法弄清楚我做错了什么。你能告诉我我的错误是什么吗?

program test

    Use mpi

    Implicit None

    integer :: process_Rank, size_Of_Cluster, ierror = 0, win, size_s, n = 6
    integer:: i , j
    integer:: start, target_count = 9
    integer :: mtx(3,6)
    integer(kind = MPI_ADDRESS_KIND) :: nbytes = 4

    !input matrix
    do i = 1,3
        do j =1,6
            mtx(i,j) = 0
        end do
    end do

    Call mpi_sizeof( mtx, size_s, ierror ) !Get the size of a matrix element
    call MPI_INIT(ierror)
    call MPI_COMM_SIZE(MPI_COMM_WORLD, size_Of_Cluster, ierror)
    call MPI_COMM_RANK(MPI_COMM_WORLD, process_Rank, ierror)

    !create windows
    if(process_Rank == 0) then
        call MPI_WIN_CREATE(mtx, size_s *6 * 3 * nbytes, 1, MPI_INFO_NULL, MPI_COMM_WORLD, win, ierror)
    else
        call MPI_WIN_CREATE(mtx, size_s * 6* 3*nbytes,1, MPI_INFO_NULL, MPI_COMM_WORLD, win, ierror)
    end if

    CALL MPI_Win_fence(0,win,ierror)

    if(process_Rank == 1) then
        !fill 3 columns of the first row with ones
        start = 0
        do i = 0,3
            mtx(process_Rank,i+start) = process_Rank
        end do

        CALL MPI_PUT(mtx, size_s*3*6, MPI_INTEGER, 0, start * nbytes, target_count, MPI_INTEGER, win, ierror)

        !print mtx
        print *, process_Rank, ' put = '
        do i = 1,3
            print *, ''
            do j = 1,3
                write(*,fmt='(g0)', advance = 'no') mtx(i,j)
                write(*,fmt='(g0)', advance = 'no') '  '
            end do
        end do
    end if

    CALL MPI_Win_fence(0, win,ierror)

    if(process_Rank == 2) then
        !fill the last 3 columns of the second row with twos
        start = 3
        do i = 1,3
            mtx(process_Rank,i+start) = process_Rank
        end do

        CALL MPI_PUT(mtx(1:3,4:6), size_s* 3 *6, MPI_INTEGER, 0, 3 * 3 * nbytes, target_count, MPI_INTEGER, win, ierror)

        !print mtx
        print *, process_Rank, ' put = '
        do i = 1,3
            print *, ''
            do j = 4,6 
                write(*,fmt='(g0)', advance = 'no') mtx(i,j)
                write(*,fmt='(g0)', advance = 'no') '  '
            end do
        end do
    end if

    CALL MPI_Win_fence(0, win,ierror)

    ! print result
    if(process_Rank == 0) then
        print *, 'result = '
        do i = 1,3
            print *, ''
            do j = 1,6 
                write(*,fmt='(g0)', advance = 'no') mtx(i,j)
                write(*,fmt='(g0)', advance = 'no') '  '
            end do
        end do
    end if

    CALL MPI_Win_fence(0, win,ierror)
    CALL MPI_WIN_FREE(win, ierror)
    call MPI_FINALIZE(ierror)

end program test

安慰:

1 put =
1 1 1
0 0 0
0 0 0 

2 put =
0 0 0
2 2 2
0 0 0 

result =
1 1 1 0 0 0
0 0 0 2 2 2
0 0 0 0 0 0

Program received signal SIGSEGV: Segmentation fault - invalid memory reference.

Backtrace for this error:
#0 0x7fd4447bcd01 in ???
#1 0x7fd4447bbed5 in ???
#2 0x7fd4445f020f in ???
--------------------------------------------------------------------------
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
--------------------------------------------------------------------------
--------------------------------------------------------------------------
mpirun noticed that process rank 0 with PID 0 on node alm-VirtualBox exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------

标签: parallel-processingfortranmpi

解决方案


如果您使用-fcheck=allIan Bush 在您的问题下的第一条评论中向您建议的 ,您将立即获得错误原因,并且您无需等待数小时在互联网上获得反馈。我有:

At line 38 of file mpi_wins.f90 Fortran runtime error:
Index '0' of dimension 2 of array 'mtx' below lower bound of 1 


Error termination. Backtrace:
#0  0x7f7ed3e75640 in ???
#1  0x7f7ed3e76185 in ???
#2  0x7f7ed3e7652a in ???
#3  0x4010e4 in test
        at /home/lada/f/testy/stackoverflow/mpi_wins.f90:38
#4  0x401e78 in main
        at /home/lada/f/testy/stackoverflow/mpi_wins.f90:3

您正在mtx使用进程等级对数组进行索引,但数组定义为从1.

integer :: mtx(3,6)

但是,MPI 排名从 开始0,而不是从1

另请注意,由于-g编译器选项,回溯现在包含更好的代码位置。


推荐阅读