标签:
参考自:https://msdn.microsoft.com/en-us/library/hh265136.aspx
#include <amp.h> #include <amp_math.h> #include <iostream> using namespace concurrency; const int size = 5; // C++AMP样例 void CppAmpMethod() { int aCPP[] = { 1, 2, 3, 4, 5 }; int bCPP[] = { 6, 7, 8, 9, 10 }; int sumCPP[size]; // Create C++ AMP objects. array_view<const int, 1> a(size, aCPP); array_view<const int, 1> b(size, bCPP); array_view<int, 1> sum(size, sumCPP); sum.discard_data(); parallel_for_each( // Define the compute domain, which is the set of threads that are created sum.extent, // Define the code to run on each thread on the accelerator [=](index<1> idx) restrict(amp) { sum[idx] = a[idx] + b[idx]; } ); // print the results. The expected output is "7, 9, 11, 13, 15" for (int i = 0; i < size; i++) { std::cout << sum[i] << "\n"; } } // array_view用法范例1 void index1() { int aCPP[] = { 1, 2, 3, 4, 5 }; array_view<int, 1> a(5, aCPP); index<1> idx(2); std::cout << a[idx] << "\n"; // Output: 3 } // array_view用法范例2 void index2() { int aCPP[] = { 1, 2, 3, 4, 5, 6 }; array_view<int, 2> a(2, 3, aCPP); index<2> idx(1, 2); std::cout << a[idx] << "\n"; // Output: 6 } // array_view用法范例3 void index3() { int aCPP[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }; array_view<int, 3> a(2, 3, 4, aCPP); // Specifies the element at 3, 1, 0 index<3> idx(0, 1, 3); std::cout << a[idx] << "\n"; // Output: 8 } // extent用法范例1 void extent1() { int aCPP[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 }; // There are 3 rows and 4 columns, and the depth is two. array_view<int, 3> a(2, 3, 4, aCPP); std::cout << "The number of colmns is " << a.extent[2] << "\n"; std::cout << "The number of rows is " << a.extent[1] << "\n"; std::cout << "The depth is " << a.extent[0] << "\n"; std::cout << "Length in most significant dimension is " << a.extent[0] << "\n"; } // extent用法范例2 void extent2() { int aCPP[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24 }; extent<3> e(2, 3, 4); array_view<int, 3> a(e, aCPP); std::cout << "The num of columns is " << a.extent[2] << "\n"; std::cout << "The num of rows is " << a.extent[1] << "\n"; std::cout << "The depth is " << a.extent[0] << "\n"; } // araay范例 void array1() { std::vector<int> data(5); for (int count = 0; count < 5; count++) { data[count] = count; } array<int, 1> a(5, data.begin(), data.end()); parallel_for_each( a.extent, [=, &a](index<1> idx) restrict(amp) { a[idx] = a[idx] * 10; } ); data = a; for (int i = 0; i < 5; i++) { std::cout << data[i] << "\n"; } } // 和cpu共享内存 void shareMemory1() { accelerator acc = accelerator(accelerator::default_accelerator); // Early out if the defult accelerator doesn‘t support shared memory. if (!acc.supports_cpu_shared_memory) { std::cout << "The defult acclerator does not support shared memory " << std::endl; return; } // Override the default CPU access type. //acc.default_cpu_access_type = access_type_read_write; // Create an accelerator_view from the default accelerator. // The accelerator_view inherits its default_cpu_access_type from acc. accelerator_view acc_v = acc.default_view; // Create an extent object to size the arrays. extent<1> ex(10); // Input array that can be written on the CPU. array<int, 1> arr_w(ex, acc_v, access_type_write); // Output array that can be read on the CPU array<int, 1> arr_r(ex, acc_v, access_type_read); // Read-write array that can be both written to and read from on the CPU. array<int, 1> arr_rm(ex, acc_v, access_type_read_write); } // parallel_for_each用法范例1 void AddArrays() { int aCPP[] = { 1, 2, 3, 4, 5 }; int bCPP[] = { 6, 7, 8, 9, 10 }; int sumCPP[] = { 0, 0, 0, 0, 0 }; array_view<int, 1> a(5, aCPP); array_view<int, 1> b(5, bCPP); array_view<int, 1> sum(5, sumCPP); parallel_for_each( sum.extent, [=](index<1> idx) restrict(amp) { sum[idx] = a[idx] + b[idx]; } ); for (int i = 0; i < 5; i++) { std::cout << sum[i] << "\n"; } } void AddElements(index<1> idx, array_view<int, 1> sum, array_view<int, 1> a, array_view<int, 1> b) restrict(amp) { sum[idx] = a[idx] + b[idx]; } // parallel_for_each用法范例2 void AddArraysWitchFunction() { int aCPP[] = { 1, 2, 3, 4, 5 }; int bCPP[] = { 6, 7, 8, 9, 10 }; int sumCPP[] = { 0, 0, 0, 0, 0 }; array_view<int, 1> a(5, aCPP); array_view<int, 1> b(5, bCPP); array_view<int, 1> sum(5, sumCPP); parallel_for_each( sum.extent, [=](index<1> idx) restrict(amp) { AddElements(idx, sum, a, b); } ); for (int i = 0; i < 5; i++) { std::cout << sum[i] << "\n"; } } // 二维分割切块加速 void acceleratingCode() { // Sample data: int sampledata[] = { 2, 2, 9, 7, 1, 4, 4, 4, 8, 8, 3, 4, 1, 5, 1, 2, 5, 2, 6, 8, 3, 2, 7, 2 }; // The tiles: // 2 2 9 7 1 4 // 4 4 8 8 3 4 // // 1 5 1 2 5 2 // 6 8 3 2 7 2 // Averages: int averagedata[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; array_view<int, 2> sample(4, 6, sampledata); array_view<int, 2> average(4, 6, averagedata); parallel_for_each( // Create threads for sample.extent and divide the extent into 2 x 2 tiles sample.extent.tile<2, 2>(), [=](tiled_index<2, 2> idx) restrict(amp) { // Create a 2 x 2 array to hold the values in this tile. tile_static int nums[2][2]; // Copy the values for the tile into the 2 x 2 array. nums[idx.local[1]][idx.local[0]] = sample[idx.global]; // When all the threads have executed and the 2 x 2 array is complete, find the average. idx.barrier.wait(); int sum = nums[0][0] + nums[0][1] + nums[1][0] + nums[1][1]; // Copy the average into the array_view. average[idx.global] = sum / 4; } ); for (int i = 0; i < 4; i++) { for (int j = 0; j < 6; j++) { std::cout << average(i, j) << " "; } std::cout << "\n"; } // Output // 3 3 8 8 3 3 // 3 3 8 8 3 3 // 5 5 2 2 4 4 // 5 5 2 2 4 4 } // parallel_for_each用法范例3:使用并且的数学库 void MathExample() { double numbers[] = { 1.0, 10.0, 60.0, 100.0, 600.0, 1000.0 }; array_view<double, 1> logs(6, numbers); parallel_for_each( logs.extent, [=](index<1> idx) restrict(amp) { logs[idx] = concurrency::fast_math::log10(logs[idx]); } ); for (int i = 0; i < 6; i++) { std::cout << logs[i] << "\n"; } } int main() { CppAmpMethod(); //index1(); //index2(); //index3(); //extent1(); //extent2(); //array1(); //shareMemory1(); //AddArrays(); //AddArraysWitchFunction(); //acceleratingCode(); //MathExample(); return 1; }
标签:
原文地址:http://www.cnblogs.com/WuhanLiukai/p/4545453.html