c - MPI Isend Recv 死锁问题
问题描述
我正在使用 MPI 在管道模式中并行化所有对最短路径算法,我使用 MPI_Isend 发送消息并使用 MPI_Recv 接收消息并使用 MPI_Wait 同步通信,但是我遇到了死锁(我假设死锁是因为程序没有响应)。这是代码,谁能帮我找出问题所在?
#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#define ROW 0
#define COL 1
#define INF 99
#define FILE_NOT_FOUND 404
inline int min(int a, int b) {
return a < b ? a : b;
}
int floyd_all_pairs_sp_pipelined_2d(int n, int nlocal, int sqrt_p, int *a, MPI_Comm comm_2d, MPI_Comm comm_row, MPI_Comm comm_col) {
int i, j, k;
int my2drank, mycoords[2];
int mycolrank, myrowrank;
MPI_Status status;
MPI_Request row_forward, col_forward;
MPI_Request row_left, row_right, col_up, col_down;
MPI_Comm_rank(comm_2d, &my2drank);
MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);
MPI_Comm_rank(comm_col, &mycolrank);
MPI_Comm_rank(comm_row, &myrowrank);
int *krow = (int *)malloc(nlocal * sizeof(int));
int *kcol = (int *)malloc(nlocal * sizeof(int));
for (k = 0; k < n; k++) {
if (k >= mycoords[ROW] * nlocal && k <= (mycoords[ROW] + 1) * nlocal) {
for (i = 0; i < nlocal; i++) {
krow[i] = a[(k % nlocal) * nlocal + i];
}
if (mycolrank > 0) {
MPI_Isend(&krow[0], nlocal, MPI_INT, mycolrank - 1, k, comm_col, &col_up);
}
if (mycolrank < sqrt_p - 1) {
MPI_Isend(&krow[0], nlocal, MPI_INT, mycolrank + 1, k, comm_col, &col_down);
}
}
else {
MPI_Recv(krow, nlocal, MPI_INT, MPI_ANY_SOURCE, k, comm_col, &status);
if (status.MPI_SOURCE > mycolrank && mycolrank > 0) {
MPI_Isend(krow, nlocal, MPI_INT, mycolrank - 1, k, comm_col, &row_forward);
}
else if (mycolrank < sqrt_p - 1) {
MPI_Isend(krow, nlocal, MPI_INT, mycolrank + 1, k, comm_col, &row_forward);
}
}
if (k >= mycoords[COL] * nlocal && k <= (mycoords[COL] + 1) * nlocal) {
for (i = 0; i < nlocal; i++) {
kcol[i] = a[i * nlocal + (k % nlocal)];
}
if (myrowrank > 0) {
MPI_Isend(&kcol[0], nlocal, MPI_INT, myrowrank - 1, k, comm_row, &row_left);
}
if (myrowrank < sqrt_p - 1) {
MPI_Isend(&kcol[0], nlocal, MPI_INT, myrowrank + 1, k, comm_row, &row_right);
}
}
else {
MPI_Recv(kcol, nlocal, MPI_INT, MPI_ANY_SOURCE, k, comm_row, &status);
if (status.MPI_SOURCE > myrowrank && myrowrank > 0) {
MPI_Isend(kcol, nlocal, MPI_INT, myrowrank - 1, k, comm_col, &col_forward);
}
else if (myrowrank < sqrt_p - 1) {
MPI_Isend(kcol, nlocal, MPI_INT, myrowrank + 1, k, comm_col, &col_forward);
}
}
for (i = 0; i < nlocal; i++) {
for (j = 0; j < nlocal; j++) {
a[i * nlocal + j] = min(a[i * nlocal + j], kcol[i] + krow[j]);
}
}
if (k >= mycoords[ROW] * nlocal && k <= (mycoords[ROW] + 1) * nlocal) {
if (mycolrank > 0) {
MPI_Wait(&col_up, MPI_STATUSES_IGNORE);
}
if (mycolrank < sqrt_p - 1) {
MPI_Wait(&col_down, MPI_STATUSES_IGNORE);
}
}
else {
if (mycolrank > 0 && mycolrank < sqrt_p - 1) {
MPI_Wait(&row_forward, MPI_STATUSES_IGNORE);
}
}
if (k >= mycoords[COL] * nlocal && k <= (mycoords[COL] + 1) * nlocal) {
if (myrowrank > 0) {
MPI_Wait(&row_left, MPI_STATUSES_IGNORE);
}
if (myrowrank < sqrt_p - 1) {
MPI_Wait(&row_right, MPI_STATUSES_IGNORE);
}
}
else {
if (myrowrank > 0 && myrowrank < sqrt_p - 1) {
MPI_Wait(&col_forward, MPI_STATUSES_IGNORE);
}
}
// printf("(%d, %d) finish iteration %d\n", mycoords[0], mycoords[1], k);
}
free(krow);
free(kcol);
return 0;
}
int main(int argc, char *argv[]) {
int a[16] = {
0, 1, 99, 8,
4, 0, 3, 1,
8, 7, 0, 99,
5, 4, 3, 0
};
int i, n, nlocal;
int numprocs, dims[2], periods[2], keep_dims[2];
int myrank, my2drank, mycoords[2];
MPI_Comm comm_2d, comm_row, comm_col;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &myrank);
dims[ROW] = dims[COL] = sqrt(numprocs);
periods[ROW] = periods[COL] = 1;
MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &comm_2d);
MPI_Comm_rank(comm_2d, &my2drank);
MPI_Cart_coords(comm_2d, my2drank, 2, mycoords);
keep_dims[ROW] = 0;
keep_dims[COL] = 1;
MPI_Cart_sub(comm_2d, keep_dims, &comm_row);
keep_dims[ROW] = 1;
keep_dims[COL] = 0;
MPI_Cart_sub(comm_2d, keep_dims, &comm_col);
n = 16;
nlocal = n / dims[ROW];
if (my2drank == 3) {
printf("before pipled calculation:\n");
for (i = 0; i < nlocal; i++) {
for (int j = 0; j < nlocal; j++) {
printf("%d ", a[i * nlocal + j]);
}
printf("\n");
}
}
double start = MPI_Wtime();
floyd_all_pairs_sp_pipelined_2d(n, nlocal, dims[ROW], a, comm_2d, comm_row, comm_col);
double stop = MPI_Wtime();
//printf("[%d] Completed in %1.3f seconds\n", my2drank, stop - start);
MPI_Comm_free(&comm_2d);
MPI_Finalize();
return 0;
}
解决方案
推荐阅读
- vb.net - 如何通过添加参数来搜索确切的控件类型而不是仅搜索文本框来更新此功能?
- c# - 调度程序不更新 ListBox 控件
- python - Python Pandas - 处理具有嵌套字典(json)值的列
- python - 检查时间控制并在 listrCtrl 中更改颜色
- django - 部署到亚马逊弹性豆茎后如何运行 collectstatic 脚本?
- message-queue - 消息总线 vs. 服务总线 vs. 事件中心 vs. 事件网格
- python - 在“从模块名导入变量”的情况下,可变性会改变命名空间的行为吗?
- leaflet - 将 choropleth 图层添加到 Leaflet 地图
- c# - 迭代 JArray 和 JArray.Children() 之间的区别
- html - 为什么这个 div 不使用 css 移动?