c++ - 由 opencl 处理返回的 C++ 中的表面模糊算法 (0xC0000005)
问题描述
当我通过 opencl 编写 Surface Blur 算法时,我遇到了这个问题。
我想我没有超过长度(我的图片是512*512)
我尝试测试 BlurredProcessDown 函数是可以的,但是当我想添加到 BD ,GD,RD 时。
它返回“进程返回 -1073741819 (0xC0000005)”
我不知道为什么
这是代码:(Face.cl)
int checkrange(int value){
if (value > 255){
return 255;
}else if (value < 0){
return 0;
}else{
return value;
}
}
double BlurredProcessDown(int xi, int x, int T){
double w = 1 - (abs_diff(xi,x) / (2.5 * T));
if (w < 0){
return 0;
}else{
return w;
}
}
__kernel void Face(__global uchar* sR, __global uchar* sG, __global uchar* sB,__global uchar* tR, __global uchar* tG, __global uchar* tB)
{
int id = get_global_id(0);
int offset=7;
double RD = 0, GD = 0, BD = 0;
double RU = 0, GU = 0, BU = 0;
double BPD=0;
int index;
if(id/512<offset||id%512<offset||id/512>511-offset||id%512>511-offset)
//I think this code is already filter id which cause index exceed
{
tR[id]=sR[id];
tG[id]=sG[id];
tB[id]=sB[id];
}else{
for(int i=-offset;i!=offset;++i){
for(int j=-offset;j!=offset;++j){
index=id+i*512+j;
BPD = BlurredProcessDown((int)sR[index], (int)sR[id], 15);
RD =RD+ BPD;
RU =RU+ (BPD * (double)sR[index]);
BPD = BlurredProcessDown((int)sG[index], (int)sG[id], 15);
GD =GD+ BPD;
GU =GU+ (BPD * (double)sG[index]);
BPD = BlurredProcessDown((int)sB[index], (int)sB[id], 15);
BD =BD+ BPD;
BU =BU+ (BPD * (double)sB[index]);
}
}
tR[id] = (unsigned)checkrange((int)(RU / RD));
tG[id] = (unsigned)checkrange((int)(GU / GD));
tB[id] = (unsigned)checkrange((int)(BU / BD));
}
}
主机代码:
typedef struct IMG_obj {
unsigned char* InputSrc, * InputData; // Input Image: 2048x2048
unsigned char* sR, * sG, * sB,*sY; // Input data
unsigned short IMGW, IMGH; // Input Size
} IMGObj, * pImgObj;
int OCL_Blurred_Surface(IMGObj *IObj){
int width = IMG_W, height = IMG_H;//512*512
int x, y, index=0, off=0;
// read RGB Data
for (y = 0; y != height; y++)
{
for (x = 0; x != width; x++)
{
IObj->sR[off] = IObj->InputSrc[index + 2];
IObj->sG[off] = IObj->InputSrc[index + 1];
IObj->sB[off] = IObj->InputSrc[index + 0];
off++;
index+=3;
}
}
//
//===========OpenCL============pre process
cl_uint status;
cl_platform_id platform;
status=clGetPlatformIDs(1,&platform,NULL);
cl_device_id device;
clGetDeviceIDs(platform,CL_DEVICE_TYPE_GPU,1,&device,NULL);
cl_context context=clCreateContext(NULL,1,&device,NULL,NULL,NULL);
cl_command_queue queue =clCreateCommandQueue(context,device,CL_QUEUE_PROFILING_ENABLE,NULL);
//===========OpenCL============pre process
//read only rgb data
cl_mem clbufR=clCreateBuffer(context,
CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
IMG_Size*sizeof(cl_uchar),IObj->sR,NULL);
cl_mem clbufG=clCreateBuffer(context,
CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
IMG_Size*sizeof(cl_uchar),IObj->sG,NULL);
cl_mem clbufB=clCreateBuffer(context,
CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
IMG_Size*sizeof(cl_uchar),IObj->sB,NULL);
//read only rgb data
//write only rgb data
cl_mem clbufWOR=clCreateBuffer(context,
CL_MEM_WRITE_ONLY,
IMG_Size*sizeof(cl_uchar),NULL,NULL);
cl_mem clbufWOG=clCreateBuffer(context,
CL_MEM_WRITE_ONLY,
IMG_Size*sizeof(cl_uchar),NULL,NULL);
cl_mem clbufWOB=clCreateBuffer(context,
CL_MEM_WRITE_ONLY,
IMG_Size*sizeof(cl_uchar),NULL,NULL);
//write only rgb data
const char * filename ="Face.cl";
std::string sourceStr;
status =convertToString(filename,sourceStr);
const char * source=sourceStr.c_str();
size_t sourceSize[]={strlen(source)};
//創建程序對象
cl_program program=clCreateProgramWithSource(context,
1,
&source,
sourceSize,
NULL);
//編譯程序對象
status =clBuildProgram(program,1,&device,NULL,NULL,NULL);
if(status!=0){
printf("clBuild failed:%d\n",status);
char tbuf[0x10000];
clGetProgramBuildInfo(program,device,CL_PROGRAM_BUILD_LOG,0x10000,tbuf,NULL);
printf("%s\n",tbuf);
return -1;
}else{
// printf("clBuild sucess\n");
}
//創建kernel對象
cl_kernel kernel =clCreateKernel(program,"Face",NULL);
//set arg
cl_int clnum=IMG_Size;
//source rgb
clSetKernelArg(kernel,0,sizeof(cl_mem),(void*)&clbufR);
clSetKernelArg(kernel,1,sizeof(cl_mem),(void*)&clbufG);
clSetKernelArg(kernel,2,sizeof(cl_mem),(void*)&clbufB);
//target rgb
clSetKernelArg(kernel,3,sizeof(cl_mem),(void*)&clbufWOR);
clSetKernelArg(kernel,4,sizeof(cl_mem),(void*)&clbufWOG);
clSetKernelArg(kernel,5,sizeof(cl_mem),(void*)&clbufWOB);
//printf("set arg sucess\n");
//run
double Start_Time, End_Time, All_Time;
Start_Time = clock();
cl_event ev;
size_t global_work_size=IMG_Size;
clEnqueueNDRangeKernel(queue,
kernel,
1,
NULL,
&global_work_size,
NULL,0,NULL,&ev);
clFinish(queue);
End_Time= clock();
All_Time = (End_Time-Start_Time) / CLOCKS_PER_SEC;
printf("GPU Time:%10.5fsec\n", All_Time);
//Copy to host
cl_uchar *tR,*tG,*tB;
tR=(cl_uchar*)clEnqueueMapBuffer(queue,
clbufWOR,CL_TRUE,CL_MAP_READ,0,
IMG_Size*sizeof(cl_uchar),
0,NULL,NULL,NULL);
tG=(cl_uchar*)clEnqueueMapBuffer(queue,
clbufWOG,CL_TRUE,CL_MAP_READ,0,
IMG_Size*sizeof(cl_uchar),
0,NULL,NULL,NULL);
tB=(cl_uchar*)clEnqueueMapBuffer(queue,
clbufWOB,CL_TRUE,CL_MAP_READ,0,
IMG_Size*sizeof(cl_uchar),
0,NULL,NULL,NULL);
//save
SSaveIMGOCL(tR,tG,tB,"OCL.bmp");
return 0;
}
感谢阅读QAQ
解决方案
推荐阅读
- javascript - 替换斜线之间的单词,但跳过第一次出现的单词
- python - 边界框的语义分割
- java - java中的价格提取
- chromium - 在 linux 上使用 update-alternatives 将 Chromium 设置为默认 Web 浏览器
- android - 如何处理不同大小的 RecyclerView 项目
- node.js - 用 socket io 连接两个 docker 容器
- apache - Htaccess 规则被覆盖
- spring-boot - spring-cloud-stream kafka 如何使用循环策略生成消息?
- npm - 重命名一个 npm 模块,对名称进行细微更改 => “包名太相似”
- python - Numpy 通过套接字串接导致解码错误