首页 > 解决方案 > 如何通过带有多 for 循环的 openmp 加速我的代码

问题描述

现在,我正在尝试加速计算不同 ROI 区域的质心。她是我的原始代码:

#define _CRT_SECURE_NO_WARNINGS
#include <iostream>
#include <opencv2/core/core.hpp>
#include <opencv/cv.hpp>
#include <opencv2/highgui/highgui.hpp>
#include "omp.h"
#include <time.h>
#include <ctime>

#define File_SubAperture        "SubAperture.txt"
#define Row_Subapaerture             750

Mat src;
Mat src_gray;
using namespace cv;
using namespace std;
int thresh = 0;
int max_thresh = 255;
Mat ROI;
int nn = 0;
float subApX[751] = { 0 };
float subApY[751] = { 0 };
float x = 0.0;
float y = 0.0;
float k = 0.0;
float f = 0.0;
int main() 
{   
    clock_t startTime, endTime;
    int     SubAperture[Row_Subapaerture][Col_Subaperture];
    

    double data1, data2, data3, data4;
    int i;
    
    Rect rect_subaperture[Row_Subapaerture];
    FILE * fp_SubAperture;
    FILE* px;
    px = fopen("C:\\Users\\DELL\\Desktop\\AO acceleration\\center-coordinates-x.txt", "w+");
    FILE* py;
    py=fopen("C:\\Users\\DELL\\Desktop\\AO acceleration\\center-coordinates-y.txt", "w+");




    //---read file Sub-aperture.txt---*
    fp_SubAperture = fopen(File_SubAperture, "r");
    if (fp_SubAperture == NULL)
    {
        perror("Couldn't open the file " File_SubAperture);
        exit(1);
    }
    for (i = 0; fscanf(fp_SubAperture, "%lf%lf%lf%lf", &data1, &data2, &data3, &data4) != EOF; ++i)
    {
        SubAperture[i][0] = (int)data1;
        SubAperture[i][1] = (int)data2;
        SubAperture[i][2] = (int)data3;
        SubAperture[i][3] = (int)data4;
    }

    fclose(fp_SubAperture);

    //read image
    float sumval = 0.0;
    MatIterator_<uchar> it, end;
    src = imread("WFS_29x29-circle.png", CV_LOAD_IMAGE_COLOR);
    cvtColor(src, src_gray, CV_BGR2GRAY);
    //imshow("gray image", src_gray);




    //calculate the ROI area in advance
    for (i = 0; i < 749; i++) 
    {
        rect_subaperture[i].x = SubAperture[i][0];
        rect_subaperture[i].y = SubAperture[i][1];
        //rect_subaperture[i].width = SubAperture[i][2] - SubAperture[i][0];
        //rect_subaperture[i].height = SubAperture[i][3] - SubAperture[i][1];
        rect_subaperture[i].width = 4;
        rect_subaperture[i].height = 4;
    }
    
    
    startTime = clock();// time start
    omp_set_num_threads(2);
#pragma omp parallel private(ROI,it,i,k,f ) firstprivate(sumval,x,y) shared(src_gray,subApX,subApY,rect_subaperture)
#pragma omp for nowait schedule(guided) collapse(2)
 for(i=0; i<749;i++)
 {
     ROI = src_gray(rect_subaperture[i]);

     for (it = ROI.begin<uchar>(), end = ROI.end<uchar>(); it != end; it++)
     {
         ((*it) > thresh) ? sumval += (*it) : NULL;
        // printf("sum = %f\n", sumval);
     }
 
     for (int k = 0; k < ROI.cols; k++)
     {
         for (int f = 0; f < ROI.rows; f++)
         {
             float S = ROI.at<uchar>(f, k);
             if (S < thresh)
                 S = 0;
            
                 x += (k * S) / sumval;
                 y += (f * S) / sumval;
         }
     }
           subApX[i]= x + SubAperture[i][0];
         subApY[i]= y + SubAperture[i][1];
        
        
         fprintf(px, "\n%f", subApX[i]);
         fprintf(py, "\n%f", subApY[i]); 
}
     endTime = clock();
     printf("time = %f\n", (double)(endTime - startTime) / CLOCKS_PER_SEC);
     return 0;

}

如您所见,我必须使用多重 for 循环来完成计算。但是,整个代码会卡住,没有任何错误,只完成了50%的计算,其余的无法计算。有谁知道我遇到了什么问题?以及如何加快我的代码。此代码的目标是计算一个图像的不同 ROI 区域中的质心。坐标将保存为 .TXT 文件,代码将计算所需时间。

标签: c++multithreadingopenmp

解决方案


推荐阅读