c++ - 主要作业正常终止,但 1 个进程返回非零退出代码。根据用户方向,作业已中止
问题描述
#include <stdio.h>
#include <iostream>
using namespace std;
void findCoords(int elem_num, int n, int& i, int& j){
int d=(elem_num+1)/n;
int q=(elem_num+1)%n;
i=d-1+int(q!=0);
j=(q-1+n)%n;
}
int main(int argc, char const *argv[])
{
const int m=10,n=10,o=10,p=10;
double A[m][n]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double B[o][p]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double C[m][p];
int size=10;
for (int rank=0;rank<size;rank++){
int oneD_idx=rank;
int i, j;
int elements_number=((m*p-1)-rank)/size+1;
double values_coords[elements_number][3];
for (int a=0;a<elements_number;a++){
findCoords(oneD_idx, p, i, j);
oneD_idx+=size;
double s=0;
for (int k=0;k<n;k++){
s+=A[i][k]*B[k][j];
}
values_coords[a][0]=s;
values_coords[a][1]=i;
values_coords[a][2]=j;
}
for (int x=0;x<elements_number;x++){
i=values_coords[x][1];
j=values_coords[x][2];
double value=values_coords[x][0];
C[i][j]=value;
}
}
for (int i=0;i<m;i++){
for (int j=0;j<p;j++){
cout << C[i][j]<<" ";
}
cout<<endl;
}
return 0;
}
以上代码通过循环模拟 MPI。它用于矩阵乘法。想法是每个等级都有必须计算的坐标,它适用于 2-m*p 过程,其中 m 和 p 是输出矩阵的维度。代码工作正常。
但是,当我将下面的代码与 MPI 一起使用时,我不断收到第二个代码之后显示的错误。
#include <stdio.h>
#include <mpi.h>
#include <iostream>
using namespace std;
void findCoords(int oneD_idx, int n, int& i, int& j){
int d=(oneD_idx+1)/n;
int q=(oneD_idx+1)%n;
i=d-1+int(q!=0);
j=(q-1+n)%n;
}
int main( int argc, char *argv[])
{
int rank, size;
/*const int m=10,n=10,o=10,p=10;
double A[m][n]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double B[o][p]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};*/
const int m=3,n=3,o=3,p=3;
double A[m][n]={
{1,2,3},
{1,2,3},
{1,2,3}
};
double B[o][p]={
{1,2,3},
{1,2,3},
{1,2,3}
};
if (n!=o)
{
printf("Can not multiply because of the wrong shape!");
return 0;
}
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int oneD_idx=rank;
int i, j;
int elements_number=((m*p-1)-rank)/size+1;
double values_coords[elements_number][3];
for (int a=0;a<elements_number;a++){
findCoords(oneD_idx, p, i, j);
oneD_idx+=size;
double s=0;
for (int k=0;k<n;k++){
s+=A[i][k]*B[k][j];
}
values_coords[a][0]=s;
values_coords[a][1]=i;
values_coords[a][2]=j;
}
MPI_Status status1, status2;
if (rank==0)
{
double C[m][p];
for (int x=0;x<elements_number;x++){
i=values_coords[x][1];
j=values_coords[x][2];
double value=values_coords[x][0];
C[i][j]=value;
}
for (int r=1;r<size;r++){
int recv_elements_number;
MPI_Recv( &recv_elements_number , 1 , MPI_INT , r , 403 , MPI_COMM_WORLD , &status1);
int recv_values_coords[recv_elements_number][3];
MPI_Recv( &recv_values_coords , recv_elements_number*3 , MPI_DOUBLE , r , 404 , MPI_COMM_WORLD , &status2);
for (int x=0;x<recv_elements_number;x++){
i=recv_values_coords[x][1];
j=recv_values_coords[x][2];
double value=recv_values_coords[x][0];
C[i][j]=value;
}
}
for (int i=0;i<m;i++){
for (int j=0;j<p;j++){
cout << C[i][j]<<" ";
}
cout<<endl;
}
}
else
{
MPI_Send( &elements_number , 1 , MPI_INT , 0 , 403 , MPI_COMM_WORLD);
MPI_Send( &values_coords , elements_number*3 , MPI_DOUBLE , 0 , 404 , MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
我使用 mpi_send 和 mpi_recv 来完成这项任务。沟通似乎有些问题,我很困惑。
解决方案
正如gilles-gouaillardet在评论中所说,从 int 到 double 的转换存在问题,我应该使用 struct
推荐阅读
- javascript - 如何使用 JS 下载带有 Textcontent 的文件
- r - 是否可以将非平稳 GEV 拟合到 R 中的一系列数据,以固定其中一个分布参数?
- sas - 使用子集数据集在 SAS 中生成自定义表
- python - 具有多重共线性问题的 statsmodel 中的聚类标准误差
- xml - 在 xslt 2.0 中动态创建 xpath
- c++ - 顶部带有搜索框的组合框
- c++ - 如何从内存转储中解码 Lua 5.3 调用堆栈?
- android - 设置root模拟器android
- java - 尝试在 RSelenium 中下载缓存的图片
- deep-learning - 深度 Q 学习代理找到解决方案然后再次发散