c++ - 如何通过带有多 for 循环的 openmp 加速我的代码
问题描述
现在,我正在尝试加速计算不同 ROI 区域的质心。她是我的原始代码:
#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv/cv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include "omp.h"
#include <time.h>
#include <ctime>
#define File_SubAperture "SubAperture.txt"
#define Row_Subapaerture 750
Mat src;
Mat src_gray;
using namespace cv;
using namespace std;
int thresh = 0;
int max_thresh = 255;
Mat ROI;
int nn = 0;
float subApX[751] = { 0 };
float subApY[751] = { 0 };
float x = 0.0;
float y = 0.0;
float k = 0.0;
float f = 0.0;
int main()
{
clock_t startTime, endTime;
int SubAperture[Row_Subapaerture][Col_Subaperture];
double data1, data2, data3, data4;
int i;
Rect rect_subaperture[Row_Subapaerture];
FILE * fp_SubAperture;
FILE* px;
px = fopen("C:\\Users\\DELL\\Desktop\\AO acceleration\\center-coordinates-x.txt", "w+");
FILE* py;
py=fopen("C:\\Users\\DELL\\Desktop\\AO acceleration\\center-coordinates-y.txt", "w+");
//---read file Sub-aperture.txt---*
fp_SubAperture = fopen(File_SubAperture, "r");
if (fp_SubAperture == NULL)
{
perror("Couldn't open the file " File_SubAperture);
exit(1);
}
for (i = 0; fscanf(fp_SubAperture, "%lf%lf%lf%lf", &data1, &data2, &data3, &data4) != EOF; ++i)
{
SubAperture[i][0] = (int)data1;
SubAperture[i][1] = (int)data2;
SubAperture[i][2] = (int)data3;
SubAperture[i][3] = (int)data4;
}
fclose(fp_SubAperture);
//read image
float sumval = 0.0;
MatIterator_<uchar> it, end;
src = imread("WFS_29x29-circle.png", CV_LOAD_IMAGE_COLOR);
cvtColor(src, src_gray, CV_BGR2GRAY);
//imshow("gray image", src_gray);
//calculate the ROI area in advance
for (i = 0; i < 749; i++)
{
rect_subaperture[i].x = SubAperture[i][0];
rect_subaperture[i].y = SubAperture[i][1];
//rect_subaperture[i].width = SubAperture[i][2] - SubAperture[i][0];
//rect_subaperture[i].height = SubAperture[i][3] - SubAperture[i][1];
rect_subaperture[i].width = 4;
rect_subaperture[i].height = 4;
}
startTime = clock();// time start
omp_set_num_threads(2);
#pragma omp parallel private(ROI,it,i,k,f ) firstprivate(sumval,x,y) shared(src_gray,subApX,subApY,rect_subaperture)
#pragma omp for nowait schedule(guided) collapse(2)
for(i=0; i<749;i++)
{
ROI = src_gray(rect_subaperture[i]);
for (it = ROI.begin<uchar>(), end = ROI.end<uchar>(); it != end; it++)
{
((*it) > thresh) ? sumval += (*it) : NULL;
// printf("sum = %f\n", sumval);
}
for (int k = 0; k < ROI.cols; k++)
{
for (int f = 0; f < ROI.rows; f++)
{
float S = ROI.at<uchar>(f, k);
if (S < thresh)
S = 0;
x += (k * S) / sumval;
y += (f * S) / sumval;
}
}
subApX[i]= x + SubAperture[i][0];
subApY[i]= y + SubAperture[i][1];
fprintf(px, "\n%f", subApX[i]);
fprintf(py, "\n%f", subApY[i]);
}
endTime = clock();
printf("time = %f\n", (double)(endTime - startTime) / CLOCKS_PER_SEC);
return 0;
}
如您所见,我必须使用多重 for 循环来完成计算。但是,整个代码会卡住,没有任何错误,只完成了50%的计算,其余的无法计算。有谁知道我遇到了什么问题?以及如何加快我的代码。此代码的目标是计算一个图像的不同 ROI 区域中的质心。坐标将保存为 .TXT 文件,代码将计算所需时间。
解决方案
推荐阅读
- javascript - Javascript 变量在 obejct 中不起作用。但是上console.log
- python - matplotlib 中带有欧元符号的刻度
- c# - 如何使用扩展 WPF 工具包创建简单的饼图
- java - 总是/只添加一次到 Intent.ACTION_VIEW
- algorithm - 旅行推销员中的贪婪方法 VS 动态规划
- sass - 有没有根据窗口大小改变元素高度的好方法?
- json - Angular i18n json 文件中的自动比较检查
- regex - 如果它存在于正则表达式中,如何排除以“(”开头的子字符串?
- maven - Maven 发布插件:在项目的某些模块中运行特定的 prepareGoals 和 completionGoals
- reactjs - 我正在使用 react useState 钩子,但它不起作用