标签:style blog http color os 使用 io ar for
1GPU编程,依赖于显卡
2GPU变成依赖于OpenGL和direct
3CPU的特点是:频率比较快,GPU的特点是寄存器非常非常的多。
4如果电脑是windows7,没法直接调试GPU。Window8可以直接调试
5用VS2013新建一个项目,命名:GPU
6调试GPU的方式是VS中的:打断点—>运行项目à调试à窗口àGPU线程(通过这种方式实现调试GPU项目)
8.修改项目属性:右击项目à属性à配置属性à常规,修改调试器类型为仅GPU
修改Amp默认快捷键可以选择时时(Use C++ AMP runtime default)的方式,也可以使用软件加速器(WARPsoftware accelerator)的方式,截图
9.代码:
#include <iostream>
#include <amp.h> //GPU编程所需的头文件
using namespace concurrency;
int main()
{
int v[11] = { ‘G‘, ‘d‘, ‘k‘, ‘k‘, ‘n‘, 31, ‘v‘, ‘n‘, ‘q‘, ‘k‘, ‘c‘ };
array_view<int> av(11, v);//array_view是GPU计算结构,av存储到GPU显存
//=表示直接操作AV
//(index<1> idx)操作每一个元素
//restrict(amp)定位GPU执行
parallel_for_each(av.extent, [=](index<1> idx) restrict(amp)
{
av[idx] += 1;//加完后变成了hello world
});
for (unsigned int i = 0; i < 11; i++)
{
std::cout << static_cast<char>(av[i]);
}
std::cin.get();
return 0;
}
10.CPU,GPU单值计算效率测试
案例:
#include <iostream>
#include <amp.h>
#include <WinBase.h>
#define COUNT 100000
float nickName_GPU[COUNT];
float nickName_CPU[COUNT];
//GPU并行计算比较占有优势,restrict(amp):限制使用GPU编程
double rungpu(int num) restrict(amp)
{
double temp = 0;
for (int i = 0; i < num; i++)
{
temp += i;
}
return temp;
}
//cpu处理单值计算比较有优势,单点计算比较有优势,只能在GPU内部执行
double runcpu(int num) restrict(cpu)
{
//这是对一个数进行操作
double temp = 0;
for (int i = 0; i < num; i++)
{
temp += i;
}
return temp;
}
//限制使用GPU或CPU运行
double runcpugpu(int num) restrict(amp, cpu)
{
double temp = 0;
for (int i = 0; i < num; i++)
{
temp += i;
}
return temp;
}
//测试单值计算的运行效率
int main()
{
LARGE_INTEGER freq;
LARGE_INTEGER strt;
LARGE_INTEGER ed;
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&strt);
double dx[1] = { 0.0 };
double db = 0.0;
concurrency::array_view<double> myview(1, dx);
parallel_for_each(myview.extent,
[=](concurrency::index<1> idx) restrict(amp)
{
myview[idx] += rungpu(1000000);
});
myview.synchronize();//显式等待GPU计算完成并将数据打回内存
printf("%f\n", dx[0]);
QueryPerformanceCounter(&ed);
printf("GPU耗时: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
QueryPerformanceCounter(&strt);
printf("%f\n", runcpu(1000000));
QueryPerformanceCounter(&ed);
printf("CPU耗时: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
puts("测试结束");
getchar();
return 0;
}
运行结果:
案例2:
#include <iostream>
#include <amp.h>
#include <WinBase.h>
#define COUNT 3000
float nickName_GPU[COUNT];
float nickName_CPU[COUNT];
//GPU并行计算比较占有优势,restrict(amp):限制使用GPU编程
double rungpu(int num) restrict(amp)
{
double temp = 0;
for (int i = 0; i < num; i++)
{
temp += i;
}
return temp;
}
//cpu处理单值计算比较有优势,单点计算比较有优势,只能在GPU内部执行
double runcpu(int num) restrict(cpu)
{
//这是对一个数进行操作
double temp = 0;
for (int i = 0; i < num; i++)
{
temp += i;
}
return temp;
}
//限制使用GPU或CPU运行
double runcpugpu(int num) restrict(amp, cpu)
{
double temp = 0;
for (int i = 0; i < num; i++)
{
temp += i;
}
return temp;
}
int main()
{
LARGE_INTEGER freq;
LARGE_INTEGER strt;
LARGE_INTEGER ed;
QueryPerformanceFrequency(&freq);
QueryPerformanceCounter(&strt);
concurrency::array_view<float> myView(COUNT, nickName_GPU); //将数据打入显存
concurrency::parallel_for_each(myView.extent, [=](concurrency::index<1> idx) restrict(amp)
{
for (int i = 0; i < COUNT / 10; i++)
{
myView[idx] = (myView[idx] + 0.1f) / 2.3f;
}
});
myView.synchronize();//显式等待GPU计算完成并将数据打回内存
QueryPerformanceCounter(&ed);
printf("GPU耗时: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
QueryPerformanceCounter(&strt);
for (int idx = 0; idx < COUNT; idx++)
{
for (int i = 0; i < COUNT / 10; i++)
{
nickName_CPU[idx] = (nickName_CPU[idx] + 0.1f) /2.3f;
}
}
QueryPerformanceCounter(&ed);
printf("CPU耗时: %d 毫秒\r\n", (ed.QuadPart - strt.QuadPart) * 1000 / freq.QuadPart);
for (int idx = 0; idx < COUNT; idx++)
{
if (nickName_CPU[idx] != nickName_GPU[idx])
{
puts("CPU和GPU的计算结果不相符!");
getchar();
return 0;
}
}
puts("测试结束");
getchar();
return 0;
}
运行结果:
标签:style blog http color os 使用 io ar for
原文地址:http://blog.csdn.net/tototuzuoquan/article/details/38964739