首页 > 解决方案 > MPI Isend Recv 死锁问题

问题描述

我正在使用 MPI 在管道模式中并行化所有对最短路径算法,我使用 MPI_Isend 发送消息并使用 MPI_Recv 接收消息并使用 MPI_Wait 同步通信,但是我遇到了死锁(我假设死锁是因为程序没有响应)。这是代码,谁能帮我找出问题所在?

#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

#define ROW 0
#define COL 1
#define INF 99

#define FILE_NOT_FOUND 404

inline int min(int a, int b) {
    return a < b ? a : b;
}

int floyd_all_pairs_sp_pipelined_2d(int n, int nlocal, int sqrt_p, int *a, MPI_Comm comm_2d, MPI_Comm comm_row, MPI_Comm comm_col) {
    int i, j, k;
    int my2drank, mycoords[2];
    int mycolrank, myrowrank;
    MPI_Status status;
    MPI_Request row_forward, col_forward;
    MPI_Request row_left, row_right, col_up, col_down;

    MPI_Comm_rank(comm_2d, &my2drank);
    MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);

    MPI_Comm_rank(comm_col, &mycolrank);
    MPI_Comm_rank(comm_row, &myrowrank);
    
    int *krow = (int *)malloc(nlocal * sizeof(int));
    int *kcol = (int *)malloc(nlocal * sizeof(int));

    for (k = 0; k < n; k++) {
        if (k >= mycoords[ROW] * nlocal && k <= (mycoords[ROW] + 1) * nlocal) {
            for (i = 0; i < nlocal; i++) {
                krow[i] = a[(k % nlocal) * nlocal + i];
            }
            if (mycolrank > 0) {
                MPI_Isend(&krow[0], nlocal, MPI_INT, mycolrank - 1, k, comm_col, &col_up);
            }
            if (mycolrank < sqrt_p - 1) {
                MPI_Isend(&krow[0], nlocal, MPI_INT, mycolrank + 1, k, comm_col, &col_down);
            }
        }
        else {
            MPI_Recv(krow, nlocal, MPI_INT, MPI_ANY_SOURCE, k, comm_col, &status);
            if (status.MPI_SOURCE > mycolrank && mycolrank > 0) {
                MPI_Isend(krow, nlocal, MPI_INT, mycolrank - 1, k, comm_col, &row_forward);
            }
            else if (mycolrank < sqrt_p - 1) {
                MPI_Isend(krow, nlocal, MPI_INT, mycolrank + 1, k, comm_col, &row_forward);
            }
        }
        if (k >= mycoords[COL] * nlocal && k <= (mycoords[COL] + 1) * nlocal) {
            for (i = 0; i < nlocal; i++) {
                kcol[i] = a[i * nlocal + (k % nlocal)];
            }
            if (myrowrank > 0) {
                MPI_Isend(&kcol[0], nlocal, MPI_INT, myrowrank - 1, k, comm_row, &row_left);
            }
            if (myrowrank < sqrt_p - 1) {
                MPI_Isend(&kcol[0], nlocal, MPI_INT, myrowrank + 1, k, comm_row, &row_right);
            }
        }
        else {
            MPI_Recv(kcol, nlocal, MPI_INT, MPI_ANY_SOURCE, k, comm_row, &status);
            if (status.MPI_SOURCE > myrowrank && myrowrank > 0) {
                MPI_Isend(kcol, nlocal, MPI_INT, myrowrank - 1, k, comm_col, &col_forward);
            }
            else if (myrowrank < sqrt_p - 1) {
                MPI_Isend(kcol, nlocal, MPI_INT, myrowrank + 1, k, comm_col, &col_forward);
            }
        }

        for (i = 0; i < nlocal; i++) {
            for (j = 0; j < nlocal; j++) {
                a[i * nlocal + j] = min(a[i * nlocal + j], kcol[i] + krow[j]);
            }
        }
        if (k >= mycoords[ROW] * nlocal && k <= (mycoords[ROW] + 1) * nlocal) {
            if (mycolrank > 0) {
                MPI_Wait(&col_up, MPI_STATUSES_IGNORE);
            }
            if (mycolrank < sqrt_p - 1) {
                MPI_Wait(&col_down, MPI_STATUSES_IGNORE);
            }
        }
        else {
            if (mycolrank > 0 && mycolrank < sqrt_p - 1) {
                MPI_Wait(&row_forward, MPI_STATUSES_IGNORE);
            }
        }
        if (k >= mycoords[COL] * nlocal && k <= (mycoords[COL] + 1) * nlocal) {
            if (myrowrank > 0) {
                MPI_Wait(&row_left, MPI_STATUSES_IGNORE);
            }
            if (myrowrank < sqrt_p - 1) {
                MPI_Wait(&row_right, MPI_STATUSES_IGNORE);
            }
        }
        else {
            if (myrowrank > 0 && myrowrank < sqrt_p - 1) {
                MPI_Wait(&col_forward, MPI_STATUSES_IGNORE);
            }
        }
        //      printf("(%d, %d) finish iteration %d\n",  mycoords[0], mycoords[1], k);
    }
    free(krow);
    free(kcol);
    return 0;
}

int main(int argc, char *argv[]) {
    int a[16] = {
        0, 1, 99, 8,
        4, 0, 3, 1,
        8, 7, 0, 99,
        5, 4, 3, 0
    };

    int i, n, nlocal;
    int numprocs, dims[2], periods[2], keep_dims[2];
    int myrank, my2drank, mycoords[2];
    MPI_Comm comm_2d, comm_row, comm_col;
    MPI_Status status;

    MPI_Init(&argc, &argv);
    MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
    MPI_Comm_rank(MPI_COMM_WORLD, &myrank);

    dims[ROW] = dims[COL] = sqrt(numprocs);

    periods[ROW] = periods[COL] = 1;
    MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d);

    MPI_Comm_rank(comm_2d, &my2drank);
    MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);

    keep_dims[ROW] = 0;
    keep_dims[COL] = 1;
    MPI_Cart_sub(comm_2d, keep_dims, &comm_row);

    keep_dims[ROW] = 1;
    keep_dims[COL] = 0;
    MPI_Cart_sub(comm_2d, keep_dims, &comm_col);

    n = 16;
    nlocal = n / dims[ROW];

    if (my2drank == 3) {
        printf("before pipled calculation:\n");
        for (i = 0; i < nlocal; i++) {
            for (int j = 0; j < nlocal; j++) {
                printf("%d ", a[i * nlocal + j]);
            }
            printf("\n");
        }
    }
    double start = MPI_Wtime();
    floyd_all_pairs_sp_pipelined_2d(n, nlocal, dims[ROW], a, comm_2d, comm_row, comm_col);
    double stop = MPI_Wtime();
    //printf("[%d] Completed in %1.3f seconds\n", my2drank, stop - start);

    MPI_Comm_free(&comm_2d);
    MPI_Finalize();
    return 0;
}

标签: cmpideadlock

解决方案


推荐阅读