数组规约加法（任意长度）

时间：2017-10-13 14:12:57 阅读：107 评论：0 收藏：0 [点我收藏+]

标签：线程 put code share rand tar span lock format

  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <time.h>
  4 #include "cuda_runtime.h"
  5 #include "device_launch_parameters.h"
  6 
  7 #define ARRAY_SIZE      (1024*100-1)
  8 #define WIDTH           32
  9 #define WIDTH2          1024
 10 #define SEED            1 //(unsigned int)clock()
 11 #define CEIL(x,y)       (( x - 1 ) /  y + 1)
 12 
 13 typedef float format;    // int or float
 14 
 15 void checkCudaError(cudaError input)
 16 {
 17     if (input != cudaSuccess)
 18     {
 19         printf("\n\tfind a cudaError!");
 20         exit(1);
 21     }
 22     return;
 23 }
 24 
 25 __global__ void add_naive(format *in, format *out)
 26 {
 27     extern __shared__ format sdata[];
 28     sdata[threadIdx.x] = 0;
 29     
 30     int id = threadIdx.x;
 31     while (id < ARRAY_SIZE)
 32     {
 33         sdata[threadIdx.x] += in[id];
 34         id += blockDim.x;
 35     }
 36     __syncthreads();
 37 
 38     if (threadIdx.x == 0)// 零号线程负责sdata的规约
 39     {
 40         for (id = 1; id < blockDim.x; id++)
 41             sdata[0] += sdata[id];
 42         *out = sdata[0];
 43     }
 44     return;
 45 }
 46 
 47 __global__ void add_reduce(format *in, format *out)
 48 {
 49     extern __shared__ format sdata[];
 50     sdata[threadIdx.x] = 0;
 51 
 52     int id = threadIdx.x;
 53     int wall = WIDTH2;
 54     while (id < ARRAY_SIZE)
 55     {
 56         sdata[threadIdx.x] += in[id];
 57         id += blockDim.x;
 58     }
 59     __syncthreads();
 60     
 61     for (int s = (wall + 1) / 2; s > 1 && s < wall; wall = (wall + 1) / 2, s = (s + 1) / 2)
 62     {
 63         if (threadIdx.x < s)
 64             sdata[threadIdx.x] += (threadIdx.x + s >= wall) ? 0 : sdata[threadIdx.x + s];
 65     }
 66     __syncthreads();
 67 
 68     if (threadIdx.x == 0)
 69         *out = sdata[0] + sdata[1];
 70     return;
 71 }
 72 
 73 int main()
 74 {
 75     int i;
 76     format h_in[ARRAY_SIZE], cpu_sum, h_out_naive, h_out_reduce;
 77     format *d_in, *d_out_naive, *d_out_reduce;
 78     cudaEvent_t start, stop;
 79     float elapsedTime1,elapsedTime2;
 80     cudaEventCreate(&start);
 81     cudaEventCreate(&stop);
 82 
 83     cudaMalloc((void **)&d_in, sizeof(format)*ARRAY_SIZE);
 84     cudaMalloc((void **)&d_out_naive, sizeof(format));
 85     cudaMalloc((void **)&d_out_reduce, sizeof(format));
 86     
 87     srand(SEED);
 88     for (i = 0, cpu_sum = 0; i < ARRAY_SIZE; i++)
 89     {
 90         h_in[i] = rand();
 91         cpu_sum += h_in[i];
 92         //printf("%f\n", h_in[i]);
 93     }
 94 
 95     cudaMemcpy(d_in, h_in, sizeof(format) * ARRAY_SIZE, cudaMemcpyHostToDevice);
 96 
 97     cudaEventRecord(start, 0);
 98     add_naive <<< 1, WIDTH , sizeof(format) * WIDTH >>> (d_in, d_out_naive);
 99     cudaMemcpy(&h_out_naive, d_out_naive, sizeof(format), cudaMemcpyDeviceToHost);
100     cudaThreadSynchronize();
101     cudaEventRecord(stop, 0);
102     cudaEventSynchronize(stop);
103     cudaEventElapsedTime(&elapsedTime1, start, stop);
104 
105     cudaEventRecord(start, 0);
106     add_reduce <<< CEIL(ARRAY_SIZE,WIDTH2), WIDTH2 ,sizeof(format) * WIDTH2 >>> (d_in, d_out_reduce);
107     cudaMemcpy(&h_out_reduce, d_out_reduce, sizeof(format), cudaMemcpyDeviceToHost);
108     cudaThreadSynchronize();
109     cudaEventRecord(stop, 0);
110     cudaEventSynchronize(stop);
111     cudaEventElapsedTime(&elapsedTime2, start, stop);
112     printf("\n\tCPU:\t\t%f\n\tGPU naive:\t%f\tTime:\t%6.2f ms\n\tGPU reduce:\t%f\tTime:\t%6.2f ms\n",
113         (float)cpu_sum, (float)h_out_naive, elapsedTime1, (float)h_out_reduce, elapsedTime2);
114 
115     cudaFree(d_in);
116     cudaFree(d_out_naive);
117     cudaFree(d_out_reduce);
118     cudaEventDestroy(start);
119     cudaEventDestroy(stop);
120 
121     getchar();
122     return 0;
123 }

数组规约加法（任意长度）

标签：线程 put code share rand tar span lock format

原文地址：http://www.cnblogs.com/cuancuancuanhao/p/7660621.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)

周排行