首页 > 解决方案 > cuda c++奇偶排序实现

问题描述

这是我用于奇偶排序的代码:此代码编译,运行正常,但我猜不是排序。请帮助我,我目前在 Visual Studio 2019 上使用 CUDA 11.3。我的想法是创建奇偶函数并一个接一个地运行它。我正在多线程处理每个函数的过程。换句话说,如果(arr[2 k]>arr[2 k+1]) 交换(arr[2 k],arr[2 k+1])。

#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"

using namespace std;
using namespace std::chrono;

__global__ void Even(int *arr, int n) {
    int index = threadIdx.x;
    index = index * 2;
    if  (index < n-1) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
            arr[index] = arr[index+ 1];
            arr[index + 1] = temp;
        }
    }
}

__global__ void Odd(int* arr, int n) {
    int index = threadIdx.x;
        index = index * 2+1;
    if (index <= n - 2) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
                arr[index] = arr[index + 1];
                arr[index+ 1] = temp;
        }
    }
}
        
#define n 10
int main(){
    int *a;
    int* ptr;
    const int Size = sizeof(int) * n;

    cudaMalloc((void**)&ptr, Size);

    a = (int*)malloc(n * Size);

    srand(time(NULL));
    
    for(int i =0 ;i<n;i++){
        a[i] = rand()%n;
    }


    for (int i = 0; i < n; i++) {
       std:: cout << a[i] << " ";
    }
    std::cout << endl;

    cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);

    auto starttime = high_resolution_clock::now();
 

    for (int i = 0; i < n / 2; i++) {
       Even<<<1,n >>>(a, n);
        Odd<<<1,n >>>(a, n);

    }

    cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);

    auto stoptime = high_resolution_clock::now();
    auto duration = duration_cast<microseconds>(stoptime-starttime);

    std::cout<<" time : " <<duration.count()<<"ms"<<endl;

    for (int i = 0; i < n; i++) {
        std::cout << a[i] << " ";
    }
    std::cout << endl;
    free(a);
    cudaFree(ptr);
 
    return 0;

}

标签: c++sortingcuda

解决方案


我怀疑有两个问题。

首先,每次运行时都会覆盖数组中的第一个值Odd()。您应该删除该行arr[0] = 0;来解决此问题。

其次,您将主机指针a而不是设备指针ptr传递给内核。你应该通过ptr

通过这些(未经测试的)编辑,代码如下所示:

#include <stdio.h>
#include<iostream>
#include<chrono>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"

using namespace std;
using namespace std::chrono;

__global__ void Even(int *arr, int n) {
    int index = threadIdx.x;
    index = index * 2;
    if  (index < n-1) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
            arr[index] = arr[index+ 1];
            arr[index + 1] = temp;
        }
    }
}

__global__ void Odd(int* arr, int n) {
    int index = threadIdx.x;
    index = index * 2+1;
    // no longer setting a[0] = 0
    if (index <= n - 2) {
        if (arr[index ] > arr[index + 1]) {
            int temp = arr[index];
                arr[index] = arr[index + 1];
                arr[index+ 1] = temp;
        }
    }
}
        
#define n 10
int main(){
    int *a;
    int* ptr;
    const int Size = sizeof(int) * n;

    cudaMalloc((void**)&ptr, Size);

    a = (int*)malloc(n * Size);

    srand(time(NULL));
    
    for(int i =0 ;i<n;i++){
        a[i] = rand()%n;
    }


    for (int i = 0; i < n; i++) {
       std:: cout << a[i] << " ";
    }
    std::cout << endl;

    cudaMemcpy(ptr, a, Size, cudaMemcpyHostToDevice);

    auto starttime = high_resolution_clock::now();
 

    for (int i = 0; i < n / 2; i++) {
       Even<<<1,n >>>(ptr, n);  // ptr instead of a
        Odd<<<1,n >>>(ptr, n);  // ptr instead of a

    }

    cudaMemcpy( a, ptr, Size, cudaMemcpyDeviceToHost);

    auto stoptime = high_resolution_clock::now();
    auto duration = duration_cast<microseconds>(stoptime-starttime);

    std::cout<<" time : " <<duration.count()<<"ms"<<endl;

    for (int i = 0; i < n; i++) {
        std::cout << a[i] << " ";
    }
    std::cout << endl;
    free(a);
    cudaFree(ptr);
 
    return 0;

}

推荐阅读