c++ - "的多重定义" 使用 cpp 编译 cuda 代码时出错
问题描述
您好我正在尝试构建一个自定义 python 包,其中包括 cuda 文件和相应的 cpp 和头文件。我的目录结构看起来像这样
道歉下面有点长的问题。我不太确定错误的确切位置,因此我提出了大部分相关内容。
头文件如下 -
实用程序.h
#pragma once
#include <ATen/cuda/CUDAContext.h>
#include <torch/extension.h>
#define CHECK_CUDA(x) \
do \
{ \
AT_ASSERT(x.is_cuda(), #x " must be a CUDA tensor"); \
} while (0)
cuda_utils.h
#ifndef _CUDA_UTILS_H
#define _CUDA_UTILS_H
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <cmath>
#include <cuda.h>
#include <cuda_runtime.h>
#include <vector>
#define TOTAL_THREADS 512
inline int opt_n_threads(int work_size)
{
const int pow_2 = std::log(static_cast<double>(work_size)) / std::log(2.0);
return max(min(1 << pow_2, TOTAL_THREADS), 1);
}
#define CUDA_CHECK_ERRORS() \
do \
{ \
cudaError_t err = cudaGetLastError(); \
if (cudaSuccess != err) \
{ \
fprintf(stderr, "CUDA kernel failed : %s\n%s at L:%d in %s\n", \
cudaGetErrorString(err), __PRETTY_FUNCTION__, __LINE__, \
__FILE__); \
exit(-1); \
} \
} while (0)
#endif
cuda_test.h
#pragma once
#include <torch/extension.h>
at::Tensor cuda_test(
at::Tensor verts_list,
at::Tensor verts_list_length,
const float radius,
const int nsample,
const int current_nearest_vertex_index);
CUDA 文件
cuda_test.cu
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "cuda_utils.h"
__global__ void query(int b, float radius,
int nsample, int current_nearest_vertex_index,
const float *__restrict__ verts_list,
const int *__restrict__ verts_list_length
int *__restrict__ query_idx)
{
int batch_idx = blockIdx.x;
<some implementation>
}
void query_wrapper(int b, float radius,
int nsample, int current_nearest_vertex_index,
const float *verts_list,
const int *verts_list_length,
int *query_idx
)
{
cudaStream_t stream = at::cuda::getCurrentCUDAStream();
query<<<b, opt_n_threads(1), 0, stream>>>(b, radius,
nsample, current_nearest_vertex_index,
verts_list,
verts_list_length,
query_idx);
CUDA_CHECK_ERRORS();
}
与 cuda 文件 cuda_test.cpp链接的 CPP 文件
#include "cuda_test.h"
#include "utils.h"
void query_wrapper(int b, float radius,
int nsample,
int current_nearest_vertex_index,
const float *verts_list,
const int *verts_list_length,
int *query_idx);
at::Tensor cuda_test(
at::Tensor verts_list,
at::Tensor verts_list_length,
const float radius,
const int nsample,
const int current_nearest_vertex_index)
{
CHECK_CONTIGUOUS(verts_list);
CHECK_IS_FLOAT(verts_list);
CHECK_CONTIGUOUS(verts_list_length);
CHECK_IS_INT(verts_list_length);
at::Tensor query_idx =
torch::zeros({verts_list.size(0), nsample},
at::device(verts_list.device()).dtype(at::ScalarType::Int));
if (verts_list.is_cuda())
{
query_wrapper(verts_list.size(0), radius,
nsample,
current_nearest_vertex_index,
verts_list.data_ptr<float>(),
verts_list_length.data_ptr<int>(),
query_idx.data_ptr<int>());
}
else
{
AT_ASSERT(false, "CPU not supported");
}
return idx;
}
绑定.cpp
#include "cuda_test.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m)
{
m.def("cuda_test", &cuda_test);
}
我尝试构建和安装它,但出现以下错误
build/temp.linux-x86_64-3.7/test_ops/_ext-src/src/cuda_test.o: In function `__device_stub__Z5queryifiiPKfPKiPi(int, float, int, int, float const*, int const*, int*)':
tmpxft_000001a4_00000000-6_cuda_test.compute_75.cudafe1.cpp:(.text+0x10): multiple definition of `__device_stub__Z5queryifiiPKfPKiPi(int, float, int, int, float const*, int const*, int*)'
build/temp.linux-x86_64-3.7/test_ops/_ext-src/src/cuda_test.o:tmpxft_000001a4_00000000-6_cuda_test.compute_75.cudafe1.cpp:(.text+0x10): first defined here
build/temp.linux-x86_64-3.7/test_ops/_ext-src/src/cuda_test.o: In function `query(int, float, int, int, float const*, int const*, int*)':
tmpxft_000001a4_00000000-6_cuda_test.compute_75.cudafe1.cpp:(.text+0x180): multiple definition of `query(int, float, int, int, float const*, int const*, int*)'
build/temp.linux-x86_64-3.7/test_ops/_ext-src/src/cuda_test.o:tmpxft_000001a4_00000000-6_cuda_test.compute_75.cudafe1.cpp:(.text+0x180): first defined here
build/temp.linux-x86_64-3.7/test_ops/_ext-src/src/cuda_test.o: In function `query_wrapper(int, float, int, int, float const*, int const*, int*)':
tmpxft_000001a4_00000000-6_cuda_test.compute_75.cudafe1.cpp:(.text+0x190): multiple definition of `query_wrapper(int, float, int, int, float const*, int const*, int*)'
build/temp.linux-x86_64-3.7/test_ops/_ext-src/src/cuda_test.o:tmpxft_000001a4_00000000-6_cuda_test.compute_75.cudafe1.cpp:(.text+0x190): first defined here
collect2: error: ld returned 1 exit status
我遵循了这个链接中的确切结构,该结构完全正确。我想我已经尽我所能做好了一切。
我在构建和安装中是否有任何错误?先感谢您!!
解决方案
推荐阅读
- node.js - 错误:组件中未定义渲染函数或模板:匿名
- android - Firebase电话号码身份验证码为空
- java - 我应该编写自定义验证还是正确配置弹簧?
- acceleo - 3.7.x 版的 Acceleo 编译器(.mtl 到 .emtl)
- excel - 是否可以将数学文本放入 Excel 中?
- latex - 如何在 Sphinx 生成的 Latex 文档中添加版权声明?
- symfony - Symfony 4 - 如何为存在路由的 Web api REST 修复“404 Not Found”?
- python - 从带有其他数字的地址字符串中获取印度邮政编码
- python - 无法更新 Django 内置用户的 last_login 字段
- python - 如何使python中的函数超时