c - 关于pthread和多线程的问题
问题描述
所以我创建了一个程序,它顺序计算矩阵乘法,然后记录时间,然后使用在命令行中输入的任意数量的 pthread 计算矩阵乘法numberOfThreads
。但是,无论我输入多少线程,它仍然每次都给我相同的时间。我目前使用的是 i7 Macbook,所以我不确定这是否是添加更多线程不能优化计算的原因,或者我只是没有正确的程序。
继承人的代码:
/*Program to generate two square 2D arrays of random doubles and
time their multiplication.
Program utlizies pthreads to efficiently perform matrix Multiplication
Compile by: gcc -o mmult -O3 mmultHW6.c -lpthread
Run by: ./mmult 1000 2
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <pthread.h>
#define TRUE 1
#define FALSE 0
#define BOOL int
typedef struct {
int threadId;
int start_row;
int end_row;
int start_col;
int end_col;
} BLOCK;
// function prototypes
double ** allocate2DArray(int rows, int columns);
void print2DArray(int rows, int columns, double ** array2D);
void generateRandom2DArray(int rows, int columns,
double min, double max, double ** random2DArray);
BOOL equal2DArrays(int rows, int columns, double ** array1, double ** array2,
double tolerance);
void matrixMultiplication(int rows1, int columns1, double ** array1,
int rows2, int columns2, double ** array2,
double ** product);
void matrixMultiplicationAlt(int rows1, int columns1, double ** array1,
int rows2, int columns2, double ** array2,
double ** product);
void * threadMMult(void * rank);
int numberOfThreads;
double ** A;
double ** B;
double ** C;
double ** C_alt;
int rows, columns;
int main(int argc, char ** argv) {
long i, startTime, endTime,seqTime, paralellTime;
BLOCK * blocksOfWork;
int errorCode;
double tolerence;
pthread_t * threadHandles;
if (argc !=3) {
printf("Usage: %s <# of rows><# of Threads>\n", argv[0]);
exit(-1);
} // end if
sscanf(argv[1], "%d", &rows);
sscanf(argv[1], "%d", &numberOfThreads);
columns = rows;
// seed the random number generator
srand( time(NULL) );
A = allocate2DArray(rows, columns);
B = allocate2DArray(rows, columns);
C = allocate2DArray(rows, columns);
C_alt = allocate2DArray(rows, columns);
generateRandom2DArray(rows, columns, -1.0, +1.0, A);
generateRandom2DArray(rows, columns, -1.0, +1.0, B);
printf("after initializing matrices\n");
time(&startTime);
matrixMultiplicationAlt(rows, columns, A, rows, columns, B, C_alt);
time(&endTime);
seqTime = endTime-startTime;
printf("Matrix Multiplication Alt. time = %ld\n",seqTime);
time(&startTime);
threadHandles = (pthread_t *) malloc(numberOfThreads*sizeof(pthread_t));
blocksOfWork = (BLOCK *) malloc(numberOfThreads*sizeof(BLOCK));
for(i=0; i < numberOfThreads; i++){
blocksOfWork[i].threadId = i;
blocksOfWork[i].start_row = i * rows/numberOfThreads;
if (i == numberOfThreads -1){
blocksOfWork[i].end_row = rows - 1;
}
else{
blocksOfWork[i].end_row = (i+1)*rows/numberOfThreads -1;
}
}
for (i=0; i < numberOfThreads; i++) {
if (errorCode = pthread_create(&threadHandles[i], NULL, threadMMult,
&blocksOfWork[i]) != 0) {
printf("pthread %d failed to be created with error code %d\n", i, errorCode);
} // end if
} // end for
for (i=0; i < numberOfThreads; i++) {
if (errorCode = pthread_join(threadHandles[i], (void **) NULL) != 0) {
printf("pthread %d failed to be joined with error code %d\n", i, errorCode);
} // end if
} // end for
time(&endTime);
paralellTime = endTime-startTime;
printf("Parallel Matrix Multiplication time = %ld\n",paralellTime);
if (equal2DArrays(rows, columns, C, C_alt, 0.000001)) {
printf("Arrays match with tolerance of %.000001f\n", 0.000001);
} else {
printf("Arrays DON'T match with tolerance of %.000001f\n", 0.000001);
} // end if
return 0;
} // end main
void * threadMMult(void * arg){
BLOCK * block = (BLOCK *) arg;
int threadId = block->threadId;
int startRow = block->start_row;
int endRow = block->end_row;
int i, j, k, sum;
for(i=startRow; i<=endRow;i++){
for(j = 0; j<rows;j++){
C[i][j] = 0;
for(k=0; k<rows ; k++){
C[i][j] += A[i][k]*B[k][j];
//printf("%lu - C[%d][%d] += A[%d][%d] * B[%d][%d]\n",
//pthread_self(), i,j,i,k,k,j);
}
}
return 0;
}
}
//C[i][j] += A[i][k] * B_transpose[j][k];
/*******************************************************************
* Function matrixMultiplicationAlt passed two matrices and returns
* their product.
********************************************************************/
void matrixMultiplicationAlt(int rows1, int columns1, double ** array1,
int rows2, int columns2, double ** array2,
double ** product) {
int i, j, k;
double ** array2_transpose;
if (columns1 != rows2) {
printf("Matrices cannot be multiplied -- incompatible dimensions!\n");
exit(-1);
} // end if
// Transposes array2
array2_transpose = allocate2DArray(columns2, rows2);
for (i=0; i < rows2; i++) {
for (j=0; j < columns2; j++) {
array2_transpose[j][i] = array2[i][j];
} /* end for (j */
} /* end for (i */
// Matrix Multiplication uses array1 and array2_transpose
for (i=0; i < rows1; i++) {
for (j=0; j < columns2; j++) {
C_alt[i][j] = 0.0;
for (k=0; k < columns1; k++) {
C_alt[i][j] += array1[i][k]*array2_transpose[j][k];
} /* end for (k */
} /* end for (j */
} /* end for (i */
} // end matrixMultiplicationAlt
/*******************************************************************
* Function allocate2DArray dynamically allocates a 2D array of
* size rows x columns, and returns it.
********************************************************************/
double ** allocate2DArray(int rows, int columns) {
double ** local2DArray;
int r;
local2DArray = (double **) malloc(sizeof(double *)*rows);
for (r=0; r < rows; r++) {
local2DArray[r] = (double *) malloc(sizeof(double)*columns);
} // end for
return local2DArray;
} // end allocate2DArray
/*******************************************************************
* Function generateRandom2DArray is passed the # rows, the # columns,
* min. value, max. value, and returns random2DArray containing
* randomly generated doubles.
********************************************************************/
void generateRandom2DArray(int rows, int columns,
double min, double max, double ** random2DArray) {
int r, c;
double range, div;
for (r = 0; r < rows; r++) {
for (c = 0; c < columns; c++) {
range = max - min;
div = RAND_MAX / range;
random2DArray[r][c] = min + (rand() / div);
} // end for (c...
} // end for (r...
} // end generateRandom2DArray
/*******************************************************************
* Function print2DArray is passed the # rows, # columns, and the
* array2D. It prints the 2D array to the screen.
********************************************************************/
void print2DArray(int rows, int columns, double ** array2D) {
int r, c;
for(r = 0; r < rows; r++) {
for (c = 0; c < columns; c++) {
printf("%10.5lf", array2D[r][c]);
} // end for (c...
printf("\n");
} // end for(r...
} // end print2DArray
/*******************************************************************
* Function equal2DArrays is passed the # rows, # columns, two
* array2Ds, and tolerance. It returns TRUE if corresponding array
* elements are equal within the specified tolerance; otherwise it
* returns FALSE.
********************************************************************/
BOOL equal2DArrays(int rows, int columns, double ** array1, double ** array2,
double tolerance) {
int r, c;
for(r = 0; r < rows; r++) {
for (c = 0; c < columns; c++) {
if (fabs(array1[r][c] - array2[r][c]) > tolerance) {
return FALSE;
} // end if
} // end for (c...
} // end for(r...
return TRUE;
} // end equal2DArray
解决方案
这有点可疑:
sscanf(argv[1], "%d", &rows);
sscanf(argv[1], "%d", &numberOfThreads);
我不确定您在学习 C 方面的进展情况,但是“man 3 getopt”应该显示出一种将运行时参数传递到程序中的更好方法,而不是意外地重新使用 argv[1]....
您创建的第二个问题是使用两种不同的机制来执行矩阵乘法;一种用于顺序版本,另一种用于并行版本。从逻辑上讲,您应该能够拥有一个完全参数化的函数来执行操作,然后 M 个并行线程使用不同的数据调用它的事实对函数本身是透明的。既然你没有,你留下了一个问号:是否因为你的矩阵乘法函数之一不能正常工作而缺少比例?
查看您的代码,我不太相信它会这样做;你已经利用了一系列机制来实现这一点。核心是:
Mult(double *A, int Ar, int Ac, double *B, int Br, int Bc, double C, int Cr, int Cc) { /乘 C = A * B */
}
无论是 N 个线程汇聚在一个解决方案上,还是一个线程在解决方案中跋涉,它们都应该能够执行相同的代码。
推荐阅读
- spring - Spring Integration:手动通道处理
- c++ - 删除数组中的多个元素
- c# - JsonConvert.DeserializeObject 中的“无法对空引用执行运行时绑定”
( 回复 ) - javascript - 使用 JavaScript 或 jQuery 从父/父动态获取第一个标题标签
- python - 如何在 Postman 中上传文件?
- boost-geometry - 需要一个带有地理坐标和最近查询的 boost-geometry rtree 示例
- vue.js - 带有 Axios 的 Vue.js 使用来自其他方法的数据
- python - 蟒蛇逃生室计时器
- ionic-framework - 在离子可选上显示分隔符
- python-3.x - 安装成功后无法导入 Bert_Text