标签:ice 虚拟内存 oca size 报错信息 src ace 核函数 stat
? 按书上写的管道的代码,就算使用支持 OpenCL2.0 的平台和设备,编译器还是报错曰 ”不支持修饰符 pipe“,暂时不知道是什么问题,先把代码堆上来,以后换了新的设备再说
● 程序主要功能:用主机上的数组 srcHost 创建设备缓冲区 src,调用核函数 pipeProducer 将 src 分装到管道中,再调用核函数 pipeConsumer 将管道中的数据读到设备缓冲区 dst 中,最后拷贝回主机数组 dstHost 中检查结果。
● 代码
1 //pipe.cl 2 __kernel void pipeProducer(__global float *src, __write_only pipe float outPipe) 3 { 4 int gid = get_global_id(0); 5 float srcPipe = src[id]; 6 reserve_id_t resID = reserve_write_pipe(outPipe, 1); 7 if (is_valid_reserve_id(resID)) 8 { 9 if (write_pipe(outPipe, resID, 0, &srcPipe) != 0) 10 return; 11 commit_write_pipe(outPipe, resID); 12 } 13 } 14 15 __kernel void pipeConsumer(__global float *dst, __read_only pipe float inPipe) 16 { 17 int gid = get_global_id(0); 18 float dstPipe; 19 reserve_id_t resID = reserve_read_pipe(inPipe, 1); 20 if (is_valid_reserve_id(resID)) 21 { 22 if (read_pipe(inPipe, resID, 0, &dstPipe) != 0) 23 return; 24 commit_read_pipe(inPipe, resID); 25 } 26 dst[gid] = dstPipe; 27 }
1 //main.c 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <cl.h> 5 6 const char *sourceCode = "D:/Code/pipe.cl"; 7 8 char* readSource(const char* kernelPath)// 读取文本文件,存储为 char * 9 { 10 FILE *fp; 11 char *source; 12 long int size; 13 //printf("readSource, Program file: %s\n", kernelPath); 14 fopen_s(&fp, kernelPath, "rb"); 15 if (!fp) 16 { 17 printf("Open kernel file failed\n"); 18 exit(-1); 19 } 20 if (fseek(fp, 0, SEEK_END) != 0) 21 { 22 printf("Seek end of file faildd\n"); 23 exit(-1); 24 } 25 if ((size = ftell(fp)) < 0) 26 { 27 printf("Get file position failed\n"); 28 exit(-1); 29 } 30 rewind(fp); 31 if ((source = (char *)malloc(size + 1)) == NULL) 32 { 33 printf("Allocate space failed\n"); 34 exit(-1); 35 } 36 fread(source, 1, size, fp); 37 fclose(fp); 38 source[size] = ‘\0‘; 39 return source; 40 } 41 42 int main() 43 { 44 const int nPacket = 1024, dataSize = nPacket * sizeof(float); 45 char info[1024] = { 0 }; 46 int i; 47 48 // 初始化平台 49 cl_int status; 50 cl_platform_id platform; 51 clGetPlatformIDs(1, &platform, NULL); 52 cl_device_id device; 53 clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL); 54 cl_context_properties contextProp[] = { CL_CONTEXT_PLATFORM,(cl_context_properties)(platform), 0 }; 55 cl_context context = clCreateContext(contextProp, 1, &device, NULL, contextProp, &status); 56 cl_command_queue queue = clCreateCommandQueueWithProperties(context, device, NULL, &status); 57 cl_event eventProducer, eventConsumer; 58 59 const char* source = readSource(sourceCode); 60 cl_program program = clCreateProgramWithSource(context, 1, &source, NULL, &status); 61 status = clBuildProgram(program, 1, &device, "-w -g –cl-std=CL2.0", NULL, NULL); 62 63 clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 1024, info, NULL); 64 printf("Build log:\n%s\n", info); 65 66 cl_kernel kernelProducer = clCreateKernel(program, "pipeProducer", &status); 67 cl_kernel kernelConsumer = clCreateKernel(program, "pipeConsumer", &status); 68 size_t globalSize = nPacket, localSize = 128; 69 70 float *srcHost = (float *)malloc(dataSize); 71 float *dstHost = (float *)malloc(dataSize); 72 for (i = 0; i < nPacket; srcHost[i] = i, dstHost[i] = 0.0f, i++); 73 74 cl_mem src, dst; 75 src = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, dataSize, srcHost, &status); 76 dst = clCreateBuffer(context, CL_MEM_WRITE_ONLY, dataSize, NULL, &status); 77 78 cl_mem pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(float), nPacket, NULL, &status); 79 80 clSetKernelArg(kernelProducer, 0, sizeof(cl_mem),src); 81 clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe); 82 83 clSetKernelArg(kernelProducer, 0, sizeof(cl_mem), dst); 84 clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe); 85 86 clEnqueueNDRangeKernel(queue, kernelProducer, 1, NULL, &globalSize, &localSize, 0, NULL, &eventProducer); 87 clEnqueueNDRangeKernel(queue, kernelConsumer, 1, NULL, &globalSize, &localSize, 1, &eventProducer, &eventConsumer); 88 clEnqueueReadBuffer(queue, dst, CL_TRUE, dataSize, dataSize, dstHost, 1, &eventConsumer, NULL); 89 clFinish(queue); 90 91 for (i = 0; i < nPacket; i++) 92 { 93 if (dstHost[i] != i) 94 break; 95 } 96 printf("Output is %s.\n", (i == nPacket) ? "correct" : "incorrect"); 97 clEnqueueSVMUnmap(queue, dst, 0, NULL, NULL); 98 99 free(srcHost); 100 free(dstHost); 101 clReleaseContext(context); 102 clReleaseCommandQueue(queue); 103 clReleaseProgram(program); 104 clReleaseKernel(kernelProducer); 105 clReleaseKernel(kernelConsumer); 106 getchar(); 107 return 0; 108 }
● 输出结果
■ 使用编译参数 "-w -g –cl-std=CL2.0" 时返回 status 为 -43(CL_INVALID_BUILD_OPTIONS),不使用参数 "–cl-std=CL2.0" 的情况下返回 -11(CL_BUILD_PROGRAM_FAILURE),麻烦的是调用函数 clGetProgramBuildInfo 查询编译日志 info 始终都是空的,不知道出了什么问题。
■ 转机,去掉了修饰符 __write_only 和 __read_only(只用于图像类型的缓冲区),返回 status 为 -11,至少报错信息有了:【identifier "pipe" is undefined】和【invalid combination of type specifiers】(指在 float 上)
● 后续代码,但是上述代码都编译不了,下面的也暂时没用。(1)使用局部内存来统一工作组的写入
1 //pipe2.cl 2 __kernel void pipeProducer(__global float *src, __write_only pipe float outPipe) 3 { 4 int gid = get_global_id(0), lid = get_local_id(0); 5 __local reserve_id_t resID; 6 if (lid == 0) 7 resID = reserve_write_pipe(outPipe, get_local_size(0)); // 工作组中首个工作项一次预定多个管道位置 8 barrier(CLK_LOCAL_MEM_FENCE); 9 10 float srcPipe = src[id]; 11 if (is_valid_reserve_id(resID)) 12 { 13 if (write_pipe(outPipe, resID, lid, &srcPipe) != 0) // 每个工作项写入预定的位置 14 return; 15 commit_write_pipe(outPipe, resID); 16 } 17 } 18 19 __kernel void pipeConsumer(__global float *dst, __read_only pipe float inPipe) 20 { 21 int gid = get_global_id(0), lid = get_local_id(0); 22 __local reserve_id_t resID; 23 if (lid == 0) 24 resID = reserve_read_pipe(inPipe, get_local_size(0)); 25 barrier(CLK_LOCAL_MEM_FENCE); 26 27 float dstPipe; 28 if (is_valid_reserve_id(resID)) 29 { 30 if (read_pipe(inPipe, resID, lid, &dstPipe) != 0) 31 return; 32 commit_read_pipe(inPipe, resID); 33 } 34 dst[gid] = dstPipe; 35 }
● (2)使用工作组管道操作简化上述代码(只是干掉了一个 if 和一个同步)
1 //pipe3.cl 2 __kernel void pipeProducer(__global float *src, __write_only pipe float outPipe) 3 { 4 int gid = get_global_id(0), lid = get_local_id(0); 5 __local reserve_id_t resID = work_group_reserve_write_pipe(outPipe, get_local_size(0));// 自带分支和同步 6 7 float srcPipe = src[id]; 8 if (is_valid_reserve_id(resID)) 9 { 10 if (write_pipe(outPipe, resID, lid, &srcPipe) != 0) 11 return; 12 commit_write_pipe(outPipe, resID); 13 } 14 } 15 16 __kernel void pipeConsumer(__global float *dst, __read_only pipe float inPipe) 17 { 18 int gid = get_global_id(0), lid = get_local_id(0); 19 __local reserve_id_t resID = work_group_reserve_read_pipe(inPipe, get_local_size(0)); 20 21 float dstPipe; 22 if (is_valid_reserve_id(resID)) 23 { 24 if (read_pipe(inPipe, resID, lid, &dstPipe) != 0) 25 return; 26 commit_read_pipe(inPipe, resID); 27 } 28 dst[gid] = dstPipe; 29 }
● 书上原本的主函数的内容(关于数据缓冲区的部分),是用虚拟内存写的,由于办公室的电脑不支持,上面的代码中被我换成了普通缓冲区
1 float *src = (float *)clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, dataSize, 0); 2 float *dst = (float *)clSVMAlloc(context, CL_MEM_READ_WRITE | CL_MEM_SVM_FINE_GRAIN_BUFFER, dataSize, 0); 3 if (src == NULL || dst == NULL) 4 { 5 printf("clSVMAlloc failed!\n"); 6 getchar(); 7 return 0; 8 } 9 10 clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_WRITE, src, dataSize, 0, NULL, NULL); 11 for (i = 0; i < nPacket; i++) 12 src[i] = i, dst[i] = 0.0f; 13 clEnqueueSVMUnmap(queue, src, 0, NULL, NULL); 14 15 cl_mem pipe = clCreatePipe(context, CL_MEM_HOST_NO_ACCESS, sizeof(float), nPacket, NULL, &status); 16 17 clSetKernelArgSVMPointer(kernelProducer, 0, src); 18 clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe); 19 20 clSetKernelArgSVMPointer(kernelProducer, 0, dst); 21 clSetKernelArg(kernelProducer, 1, sizeof(cl_mem), &pipe); 22 23 clEnqueueNDRangeKernel(queue, kernelProducer, 1, NULL, &globalSize, &localSize, 0, NULL, &eventProducer); 24 clEnqueueNDRangeKernel(queue, kernelConsumer, 1, NULL, &globalSize, &localSize, 1, &eventProducer,NULL); 25 clFinish(queue); 26 27 clEnqueueSVMMap(queue, CL_TRUE, CL_MAP_READ, dst, dataSize, 0, NULL, NULL); 28 for (i = 0; i < nPacket; i++) 29 { 30 if (dst[i] != i) 31 break; 32 } 33 printf("Output is %s.\n", (i == nPacket) ? "correct" : "incorrect"); 34 clEnqueueSVMUnmap(queue, dst, 0, NULL, NULL);
标签:ice 虚拟内存 oca size 报错信息 src ace 核函数 stat
原文地址:https://www.cnblogs.com/cuancuancuanhao/p/9038655.html