标签:style blog http color io os ar for div
当向量元素超过线程个数时的情况
向量元素个数为(33 * 1024)/(128 * 128)=2.x倍
1 /*
2 * Copyright 1993-2010 NVIDIA Corporation. All rights reserved.
3 *
4 * NVIDIA Corporation and its licensors retain all intellectual property and
5 * proprietary rights in and to this software and related documentation.
6 * Any use, reproduction, disclosure, or distribution of this software
7 * and related documentation without an express license agreement from
8 * NVIDIA Corporation is strictly prohibited.
9 *
10 * Please refer to the applicable NVIDIA end user license agreement (EULA)
11 * associated with this source code for terms and conditions that govern
12 * your use of this NVIDIA software.
13 *
14 */
15
16
17 #include "../common/book.h"
18 #include "cuda.h"
19 #include "cuda_runtime.h"
20 #include "device_launch_parameters.h"
21
22 #define N (33 * 1024)
23
24 __global__ void add(int *a, int *b, int *c) {
25 int tid = threadIdx.x + blockIdx.x * blockDim.x;
26 while (tid < N) {
27 c[tid] = a[tid] + b[tid];
28 tid += blockDim.x * gridDim.x;
29 }
30 }
31
32 int main(void) {
33 int *a, *b, *c;
34 int *dev_a, *dev_b, *dev_c;
35
36 // allocate the memory on the CPU
37 a = (int*)malloc(N * sizeof(int));
38 b = (int*)malloc(N * sizeof(int));
39 c = (int*)malloc(N * sizeof(int));
40
41 // allocate the memory on the GPU
42 HANDLE_ERROR(cudaMalloc((void**)&dev_a, N * sizeof(int)));
43 HANDLE_ERROR(cudaMalloc((void**)&dev_b, N * sizeof(int)));
44 HANDLE_ERROR(cudaMalloc((void**)&dev_c, N * sizeof(int)));
45
46 // fill the arrays ‘a‘ and ‘b‘ on the CPU
47 for (int i = 0; i<N; i++) {
48 a[i] = i;
49 b[i] = 2 * i;
50 }
51
52 // copy the arrays ‘a‘ and ‘b‘ to the GPU
53 HANDLE_ERROR(cudaMemcpy(dev_a, a, N * sizeof(int),
54 cudaMemcpyHostToDevice));
55 HANDLE_ERROR(cudaMemcpy(dev_b, b, N * sizeof(int),
56 cudaMemcpyHostToDevice));
57
58 /*
59 当向量元素超过线程个数时的情况
60 向量元素个数为(33 * 1024)/(128 * 128)=2.x倍
61 */
62 add << <128, 128 >> >(dev_a, dev_b, dev_c);
63
64 // copy the array ‘c‘ back from the GPU to the CPU
65 HANDLE_ERROR(cudaMemcpy(c, dev_c, N * sizeof(int),
66 cudaMemcpyDeviceToHost));
67
68 // verify that the GPU did the work we requested
69 bool success = true;
70 for (int i = 0; i<N; i++) {
71 if ((a[i] + b[i]) != c[i]) {
72 printf("Error: %d + %d != %d\n", a[i], b[i], c[i]);
73 success = false;
74 }
75 }
76 if (success) printf("We did it!\n");
77
78 // free the memory we allocated on the GPU
79 HANDLE_ERROR(cudaFree(dev_a));
80 HANDLE_ERROR(cudaFree(dev_b));
81 HANDLE_ERROR(cudaFree(dev_c));
82
83 // free the memory we allocated on the CPU
84 free(a);
85 free(b);
86 free(c);
87
88 return 0;
89 }
标签:style blog http color io os ar for div
原文地址:http://www.cnblogs.com/liangliangdetianxia/p/3985040.html