#include <stdio.h> __global__ void childKernel(int i) { int tid = blockIdx.x*blockDim.x+threadIdx.x; printf("parent:%d,child:%d\n",i,tid); for(int j=i;j<i+10;j++) { printf(",%d",j); } printf("\n"); } __global__ void kernel() { int tid = blockIdx.x*blockDim.x+threadIdx.x; childKernel<<<1,2>>>(tid); } int main() { kernel<<<1,1>>>(); cudaDeviceSynchronize(); return 0; }
cuda核函数再调用核函数,多层并行,布布扣,bubuko.com
原文地址:http://blog.csdn.net/linger2012liu/article/details/26258117