各种形式的系数矩阵存储方式及其与一个向量的乘法计算

时间：2017-10-18 18:31:04 阅读：211 评论：0 收藏：0 [点我收藏+]
标签：create end seed 需要打印 ntc malloc tcp das
先放上完整代码，以后填坑。
  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <malloc.h>
  4 #include <math.h>
  5 #include <time.h>
  6 #include "cuda_runtime.h"
  7 #include "device_launch_parameters.h"
  8 
  9 #define ROW         (256+79)
 10 #define COL         (256+83)
 11 #define EPSILON     0.1         // 系数矩阵非零元素占比
 12 #define NON_ZEROS   ROW * COL   // 非零元素上限（用于分配MAT转ELL时的临时变量大小）
 13 #define THREAD_SIZE 256
 14 #define SEED        1           // (unsigned int)time(MULL)
 15 #define MAX(x,y)    ((x>y)?x:y)
 16 #define CEIL(x,y)   (int)(( x - 1 ) /  y + 1)
 17 #define INT         false        // 使用的数字格式    
 18 
 19 #if INT
 20 typedef int format;
 21 #else
 22 typedef float format;
 23 #endif
 24 
 25 // 矩阵存储格式
 26 typedef struct          // Matrix 顺序格式
 27 {
 28     int     row;        // 行数
 29     int     col;        // 列数
 30     int     count;      // 元素个数（计算时用不到，而是用于转换）
 31     format  *data;      // 实际元素的值
 32 }
 33 MAT;
 34 
 35 typedef struct          // Compressed Sparse Row 格式
 36 {
 37     int     row;        // 行数
 38     int     col;        // 列数
 39     format  *data;      // 实际元素的值
 40     int     *index;     // 元素的列号
 41     int     *ptr;       // 矩阵的行指针 
 42 }
 43 CSR;
 44 
 45 typedef struct          // ELL 格式
 46 {
 47     int     row;        // 行数
 48     int     col;        // 列数
 49     format  *data;      // 实际元素的值
 50     int     *index;     // 元素的列号
 51 }
 52 ELL;
 53 
 54 typedef struct          // Coordinate 格式
 55 {
 56     int     row;        // 行数
 57     int     col;        // 列数
 58     int     count;      // 元素个数
 59     int     *row_index; // 行向量
 60     int     *col_index; // 列向量
 61     format  *data;      // 实际元素的值
 62 }
 63 COO;
 64 
 65 // 通用函数
 66 void checkNULL(const void *input)
 67 {
 68     if (input == NULL)
 69     {
 70         printf("\n\tfind a NULL!");
 71         exit(1);
 72     }
 73     return;
 74 }
 75 
 76 void checkCudaError(cudaError input)
 77 {
 78     if (input != cudaSuccess)
 79     {
 80         printf("\n\tfind a cudaError!");
 81         exit(1);
 82     }
 83     return;
 84 }
 85 
 86 int checkResult(const format * in1, const format * in2, const int length)
 87 // 注意返回值为0（两向量相等）或者“值不等的元素下标加一”（防止0号元素就不相等），返回后若想使用该下标则需要减1
 88 {
 89     int i;
 90     for (i = 0; i < length; i++)
 91     {
 92         if (in1[i] != in2[i])
 93             return i + 1;
 94     }
 95     return 0;
 96 }
 97 
 98 // 打印矩阵
 99 void printMAT(MAT *in)// 打印MAT格式的矩阵
100 {
101     int i;
102     printf("\n\t");
103     for (i = 0; i < in->row * in->col; i++)
104     {
105 #if INT
106         printf("%d ", in->data[i]);
107 #else
108         printf("%.2f ", in->data[i]);
109 #endif
110         if ((i + 1) % in->col == 0)
111             printf("\n\t");
112     }
113     printf("\n");
114     return;
115 }
116 
117 void printCSR(const CSR *in)// 打印CSR格式的矩阵
118 {
119     int i;
120     printf("\n\t%d,%d", in->row, in->col);
121     printf("\n\t");
122     for (i = 0; i < in->ptr[in->row]; i++)
123 #if INT
124         printf("%d ", in->data[i]);
125 #else
126         printf("%.2f ", in->data[i]);
127 #endif
128     printf("\n\t");
129     for (i = 0; i < in->ptr[in->row]; i++)
130         printf("%d ", in->index[i]);
131     printf("\n\t");
132     for (i = 0; i < in->row + 1; i++)
133         printf("%d ", in->ptr[i]);
134     printf("\n\n");
135     return;
136 }
137 
138 void printELL(const ELL *in)// 打印ELL格式的矩阵
139 {
140     int i;
141     printf("\n\t%d,%d", in->row, in->col);
142     printf("\n\t");
143     for (i = 0; i < in->row * in->col; i++)
144     {
145 #if INT
146         printf("%d ", in->data[i]);
147 #else
148         printf("%.2f ", in->data[i]);
149 #endif
150         if ((i + 1) % in->col == 0)
151             printf("\n\t");
152     }
153     printf("\n\t");
154     for (i = 0; i < in->row * in->col; i++)
155     {
156         printf("%d ", in->index[i]);
157         if ((i + 1) % in->col == 0)
158             printf("\n\t");
159     }
160     printf("\n\n");
161     return;
162 }
163 
164 void printCOO(const COO *in)// 打印ELL格式的矩阵
165 {
166     int i;
167     printf("\n\t%d,%d,%d", in->row, in->col, in->count);
168     printf("\n\t");
169     for (i = 0; i < in->count; i++)
170     {
171 #if INT
172         printf("%d ", in->data[i]);
173 #else
174         printf("%.2f ", in->data[i]);
175 #endif
176     }
177     printf("\n\t");
178     for (i = 0; i < in->count; i++)
179     {
180         printf("%d ", in->row_index[i]);
181         printf("%d ", in->col_index[i]);
182     }
183     printf("\n\n");
184     return;
185 }
186 
187 // 矩阵初始化与清理（有问题）
188 MAT *initializeMAT(const int row, const int col, const bool isCPU)// 初始化MAT
189 {
190     MAT *temp;
191     if (isCPU)
192     {
193         checkNULL(temp = (MAT *)malloc(sizeof(MAT)));
194         temp->row = row;
195         temp->col = col;
196         temp->count = 0;
197         checkNULL(temp->data = (format *)malloc(sizeof(format) * row * col));
198     }
199     else
200     {
201         checkCudaError(cudaMalloc((void **)&temp, sizeof(MAT)));
202         temp->row = row;
203         temp->col = col;
204         temp->count = 0;
205         checkCudaError(cudaMalloc((void **)&temp->data, sizeof(format) * row * col));
206     }
207     return temp;
208 }
209 
210 int countMAT(MAT *in)// 统计MAT形式的矩阵中的非零元素个数
211 {
212     if (in == NULL)
213         return -1;
214 
215     int i, out;
216     for (i = out = 0; i < in->row * in->col; i++)
217         out += (in->data[i] == 0);
218     return out;
219 }
220 
221 int freeMAT(MAT *in, const bool isCPU)// 释放MAT
222 {
223     checkNULL(in);
224     if (isCPU)
225     {
226         free(in->data);
227         free(in);
228     }
229     else
230     {
231         cudaFree(in->data);
232         cudaFree(in);
233     }
234     return 0;
235 }
236 
237 CSR * initializeCSR(const int row, const int col, const int count, const bool isCPU)// 初始化CSR
238 {
239     CSR *temp;
240     if (isCPU)
241     {
242         checkNULL(temp = (CSR *)malloc(sizeof(CSR)));
243         temp->row = row;
244         temp->col = col;
245         checkNULL(temp->data = (format *)malloc(sizeof(format) * count));
246         checkNULL(temp->index = (int *)malloc(sizeof(int) * count));
247         checkNULL(temp->ptr = (int *)malloc(sizeof(int) * (row + 1)));
248     }
249     else
250     {
251         checkCudaError(cudaMalloc((void **)&temp, sizeof(CSR)));
252         //temp->row = row;
253         //temp->col = col;
254         checkCudaError(cudaMalloc((void **)&temp->data, sizeof(format) * count));
255         checkCudaError(cudaMalloc((void **)&temp->index, sizeof(int) * count));
256         checkCudaError(cudaMalloc((void **)&temp->ptr, sizeof(int) * (row + 1)));
257     }
258     return temp;
259 }
260 
261 int freeCSR(CSR *in, const bool isCPU)// 释放CSR
262 {
263     checkNULL(in);
264     if (isCPU)
265     {
266         free(in->data);
267         free(in->index);
268         free(in->ptr);
269         free(in);
270     }
271     else
272     {
273         cudaFree(in->data);
274         cudaFree(in->index);
275         cudaFree(in->ptr);
276         cudaFree(in);
277     }
278     return 0;
279 }
280 
281 ELL * initializeELL(const int row, const int col, const bool isCPU)// 初始化ELL
282 {
283     ELL *temp;
284     if (isCPU)
285     {
286         checkNULL(temp = (ELL *)malloc(sizeof(ELL)));
287         temp->row = row;
288         temp->col = col;
289         checkNULL(temp->data = (format *)malloc(sizeof(format) * row * col));
290         checkNULL(temp->index = (int *)malloc(sizeof(int) *row * col));
291     }
292     else
293     {
294         checkCudaError(cudaMalloc((void **)&temp, sizeof(ELL)));
295         //temp->row = row;
296         //temp->col = col;
297         checkCudaError(cudaMalloc((void **)&temp->data, sizeof(format) * row * col));
298         checkCudaError(cudaMalloc((void **)&temp->data, sizeof(int) * row * col));
299     }
300     return temp;
301 }
302 
303 int freeELL(ELL *in, const bool isCPU)// 释放ELL
304 {
305     checkNULL(in);
306     if (isCPU)
307     {
308         free(in->data);
309         free(in->index);
310         free(in);
311     }
312     else
313     {
314         cudaFree(in->data);
315         cudaFree(in->index);
316         cudaFree(in);
317     }
318     return 0;
319 }
320 
321 COO * initializeCOO(const int row, const int col, const int count, const bool isCPU)// 初始化COO
322 {
323     COO * temp;
324     if (isCPU)
325     {
326         checkNULL(temp = (COO *)malloc(sizeof(COO)));
327         temp->row = row;
328         temp->col = col;
329         temp->count = count;
330         checkNULL(temp->data = (format *)malloc(sizeof(format) * count));
331         checkNULL(temp->row_index = (int *)malloc(sizeof(int) * count));
332         checkNULL(temp->col_index = (int *)malloc(sizeof(int) * count));
333     }
334     else
335     {
336         checkCudaError(cudaMalloc((void **)&temp, sizeof(COO)));
337         //temp->row = row;
338         //temp->col = col;
339         //temp->count = count;
340         checkCudaError(cudaMalloc((void **)&temp->data, sizeof(format) * count));
341         checkCudaError(cudaMalloc((void **)&temp->row_index, sizeof(int) * count));
342         checkCudaError(cudaMalloc((void **)&temp->col_index, sizeof(int) * count));
343     }
344     return temp;
345 }
346 
347 int freeCOO(COO *in, const bool isCPU)// 释放COO
348 {
349     checkNULL(in);
350     if (isCPU)
351     {
352         free(in->data);
353         free(in->row_index);
354         free(in->col_index);
355         free(in);
356     }
357     else
358     {
359         cudaFree(in->data);
360         cudaFree(in->row_index);
361         cudaFree(in->col_index);
362         cudaFree(in);
363     }
364     return 0;
365 }
366 
367 // 矩阵格式的相互转化
368 int MATToCSR(const MAT *in, CSR *out)// MAT格式转化为CSR格式
369 {
370     if (in == NULL || out == NULL)
371         return 1;
372 
373     int i = 0, j = 0, k = 0;
374     out->row = in->row;                  // 行数和列数
375     out->col = in->col;
376     out->ptr[k++] = 0;
377     for (; i < in->row * in->col; i++)
378     {
379         if (in->data[i] != 0)                 // 找到非零元
380         {
381             if (j == in->count)         // 在out->data已经填满了的基础上又发现了非零元
382                 return 1;
383             out->data[j] = in->data[i];       // 填充非零元素
384             out->index[j] = i % in->col;  // 填充列号
385             j++;
386         }
387         if ((i + 1) % in->col == 0)       // 到了最后一列，写入行指针号
388             out->ptr[k++] = j;
389     }
390     return 0;
391 }
392 
393 int CSRToMAT(const CSR *in, MAT *out)// CSR转MAT
394 {
395     if (in == NULL || out == NULL)
396         return 1;
397 
398     int i, j;
399     out->row = in->row;
400     out->col = in->col;
401     for (i = 0; i < in->row; i++)
402     {
403         for (j = 0; j < in->col; j++)
404             out->data[i*in->col + j] = 0;
405         for (j = in->ptr[i]; j < in->ptr[i + 1]; j++)
406             out->data[i*in->col + in->index[j]] = in->data[j];
407     }
408     return 0;
409 }
410 
411 int MATToELL(const MAT *in, ELL *out)// MAT转ELL
412 {
413     if (in == NULL || out == NULL)
414         return 1;
415 
416     int i, j, k;
417     format temp_data[NON_ZEROS];
418     int temp_index[NON_ZEROS];
419     for (i = j = k = 0; i < in->row*in->col; i++)// 寻找最长的行，同时把in中每行的元素往左边推
420     {
421         temp_data[i] = 0;
422         temp_index[i] = 0;
423         if (in->data[i] != 0)                           // 找到非零元
424         {
425             temp_data[i / in->col*in->col + j] = in->data[i];   // 存放元素
426             temp_index[i / in->col*in->col + j] = i % in->col;  // 记录所在的列号
427             j++;                                        // j中为改行已经找到的非零元个数
428         }
429         if ((i + 1) % in->col == 0)                     // 找到了行末
430         {
431             k = MAX(j, k);                              // 更新最大非零元素个数
432             j = 0;                                      // 开始下一行之前清空j
433         }
434     }
435     out->row = k;                                       // 找到了原矩阵最大非零元个数
436     out->col = in->row;                                 // 新矩阵尺寸相当于原矩阵的转置
437     for (i = 0; i < in->row*in->col; i++)               // 使用新的列数重构原来的out
438     {
439         temp_data[i - i / in->col*(in->col - k)] = temp_data[i];// 将data和index往前挪
440         temp_index[i - i / in->col*(in->col - k)] = temp_index[i];
441         if (i >= out->row * k)
442         {
443             temp_data[i] = 0;
444             temp_index[i] = 0;
445         }
446     }
447     for (i = 0; i < out->row*out->col; i++)               // 转置
448     {
449         out->data[i] = temp_data[i % out->col * k + i / out->col];
450         out->index[i] = temp_index[i % out->col * k + i / out->col];
451     }
452     for (; i < in->row*in->col; i++)                     // 补0
453     {
454         out->data[i] = 0;
455         out->index[i] = 0;
456     }
457     return 0;
458 }
459 
460 int ELLToMAT(const ELL *in, MAT *out)// ELL转MAT
461 {
462     if (in == NULL || out == NULL)
463         return 1;
464     return 0;
465 }
466 
467 int MATToCOO(const MAT *in, COO *out)// MAT转COO
468 {
469     if (in == NULL || out == NULL)
470         return 1;
471 
472     int i, j;
473     out->row = in->row;
474     out->col = in->col;
475     for (i = j = 0; i < in->row * in->col; i++)
476     {
477         if (in->data[i] != 0)
478         {
479             out->data[j] = in->data[i];
480             out->row_index[j] = i / in->col;
481             out->col_index[j] = i % in->col;
482             j++;
483         }
484     }
485     out->count = j;
486     return 0;
487 }
488 
489 int COOToMAT(const COO *in, MAT *out)// COO转MAT
490 {
491     if (in == NULL || out == NULL)
492         return 1;
493 
494     int i;
495     out->row = in->row;
496     out->col = in->col;
497     for (i = 0; i < in->row * in->col; i++)
498         out->data[i] = 0;
499     for (i = 0; i < in->count; i++)
500         out->data[in->row_index[i] * in->col + in->col_index[i]] = in->data[i];
501     return 0;
502 }
503 
504 int CSRToELL(const CSR *in, ELL *out)// CSR转ELL
505 {
506     if (in == NULL || out == NULL)
507         return 1;
508     return 0;
509 }
510 
511 int ELLToCSR(const ELL *in, CSR *out)// ELL转CSR
512 {
513     if (in == NULL || out == NULL)
514         return 1;
515     return 0;
516 }
517 
518 int CSRToCOO(const CSR *in, COO *out)// CSR转COO
519 {
520     if (in == NULL || out == NULL)
521         return 1;
522     return 0;
523 }
524 
525 int COOToCSR(const COO *in, CSR *out)// COO转CSR
526 {
527     if (in == NULL || out == NULL)
528         return 1;
529     return 0;
530 }
531 
532 int ELLToCOO(const ELL *in, COO *out)// ELL转COO
533 {
534     if (in == NULL || out == NULL)
535         return 1;
536     return 0;
537 }
538 
539 int COOToELL(const COO *in, ELL *out)// COO转ELL
540 {
541     if (in == NULL || out == NULL)
542         return 1;
543 
544     return 0;
545 }
546 
547 // 各种形式的矩阵和一个向量的乘法
548 int dotMATCPU(const MAT *a, const MAT *x, MAT *y)// CPU MAT乘法
549 {
550     if (a == NULL || x == NULL || y == NULL || a->col != x->row)
551         return 1;
552 
553     int i;
554     format sum;
555     y->row = a->row;
556     y->col = x->col;
557     for (i = 0, sum = 0; i < a->row * a->col; i++)
558     {
559         sum += a->data[i] * x->data[i % a->col];
560         if ((i + 1) % a->col == 0)
561         {
562             y->data[i / a->col] = sum;
563             sum = 0;
564         }
565     }
566     return 0;
567 }
568 
569 int dotCSRCPU(const CSR *a, const MAT *x, MAT *y)// CPU CSR乘法
570 {
571     if (a == NULL || x == NULL || y == NULL || a->col != x->row)
572         return 1;
573 
574     int i, j;
575     format sum;
576     y->row = a->row;
577     y->col = x->col;
578     for (i = 0; i < a->row; i++)
579     {
580         for (sum = 0, j = a->ptr[i]; j < a->ptr[i + 1]; j++)
581             sum += a->data[j] * x->data[a->index[j]];
582         y->data[i] = sum;
583     }
584     return 0;
585 }
586 
587 int dotELLCPU(const ELL *a, const MAT *x, MAT *y)// CPU ELL乘法
588 {
589     if (a == NULL || x == NULL || y == NULL || a->col != x->row)
590         return 1;
591 
592     int i, j;
593     format sum;
594     y->row = a->row;
595     y->col = x->col;
596     for (i = 0; i<a->row; i++)
597     {
598         for (sum = 0, j = 0; j < a->col; j++)
599             sum += a->data[i + j*a->row] * x->data[a->index[i + j*a->row]];
600         y->data[i] = sum;
601     }
602     return 0;
603 }
604 
605 int dotCOOCPU(const COO *a, const MAT *x, MAT *y)// CPU COO乘法
606 {
607     if (a == NULL || x == NULL || y == NULL || a->col != x->row)
608         return 1;
609 
610     int i;
611     y->row = a->row;
612     y->col = x->col;
613     for (i = 0; i < a->row; i++)
614         y->data[i] = 0;
615     for (i = 0; i<a->count; i++)
616         y->data[a->row_index[i]] += a->data[i] * x->data[a->col_index[i]];
617     return 0;
618 }
619 
620 __global__ void dotMATGPU(const MAT *a, const MAT *x, MAT *y)// GPU MAT乘法
621 {
622     int id = blockIdx.x * blockDim.x + threadIdx.x;
623     if (id == 0)
624     {
625         y->row = a->row;
626         y->col = x->col;
627     }
628     if (id < a->row)
629     {
630         int i;
631         format sum = 0;
632         for (i = 0; i < a->col; i++)
633             sum += a->data[id * a->col + i] * x->data[i];
634         y->data[id] = sum;
635     }
636     return;
637 }
638 
639 __global__ void dotCSRGPU(const CSR *a, const MAT *x, MAT *y)// GPU CSR乘法
640 {
641     int id = blockIdx.x * blockDim.x + threadIdx.x;
642     if (id == 0)
643     {
644         y->row = a->row;
645         y->col = x->col;
646     }
647     if (id < a->row)
648     {
649         int j;
650         format sum;
651         for (sum = 0, j = a->ptr[id]; j < a->ptr[id + 1]; j++)
652             sum += a->data[j] * x->data[a->index[j]];
653         y->data[id] = sum;
654     }
655     return;
656 }
657 
658 __global__ void dotELLGPU(const ELL *a, const MAT *x, MAT *y)// GPU ELL乘法
659 {
660     int id = blockIdx.x * blockDim.x + threadIdx.x;
661     if (id == 0)
662     {
663         y->row = a->row;
664         y->col = x->col;
665     }
666     if (id < a->row)
667     {
668         int j;
669         format sum;
670         for (sum = 0, j = 0; j < a->col; j++)
671             sum += a->data[id + j*a->row] * x->data[a->index[id + j*a->row]];
672         y->data[id] = sum;
673     }
674     return;
675 }
676 
677 __global__ void dotCOOGPU(const COO *a, const MAT *x, MAT *y)// GPU COO乘法
678 {
679     int id = blockIdx.x * blockDim.x + threadIdx.x;
680     if (id == 0)
681     {
682         y->row = a->row;
683         y->col = x->col;
684     }
685     if (id < a->count)
686         atomicAdd(&(y->data[a->row_index[id]]), a->data[id] * x->data[a->col_index[id]]);
687     return;
688 }
689 
690 int test()// 测试矩阵打印和CPU计算的函数
691 {
692     int i;
693     MAT a, c, x, y;
694     CSR b;
695     ELL d;
696     COO e;
697 
698     a.row = 4, a.col = 5;
699     for (i = 0; i < a.row * a.col; i++)
700         a.data[i] = 0;
701     a.data[0] = 3, a.data[2] = 1, a.data[4] = 5, a.data[11] = 2;
702     a.data[12] = 4, a.data[13] = 1, a.data[15] = 1, a.data[18] = 1;
703 
704     x.row = a.col, x.col = 1;
705     for (i = 0; i < x.row; i++)
706         x.data[i] = i;
707 
708     printMAT(&a);           // MAT型
709 
710     MATToCSR(&a, &b);       // MAT转CSR
711     printCSR(&b);
712 
713     CSRToMAT(&b, &c);       // CSR转回MAT型
714     printMAT(&c);
715 
716     MATToELL(&a, &d);       // ELL型
717     printELL(&d);
718 
719     MATToCOO(&a, &e);       // MAT转COO
720     printCOO(&e);
721 
722     COOToMAT(&e, &c);       // COO转回MAT
723     printMAT(&c);
724 
725     dotMATCPU(&a, &x, &y);  // MAT乘法
726     printMAT(&y);
727 
728     dotCSRCPU(&b, &x, &y);  // CSR乘法
729     printMAT(&y);
730 
731     dotELLCPU(&d, &x, &y);  // ELL乘法
732     printMAT(&y);
733 
734     dotCOOCPU(&e, &x, &y);  // COO乘法
735     printMAT(&y);
736 
737     getchar();
738     return 0;
739 }
740 
741 int main()
742 {
743     int i;
744     MAT *h_aMAT, *d_aMAT, *h_xMAT, *d_xMAT, *h_yMAT_1, *h_yMAT_2, *d_yMAT;
745     CSR *h_aCSR, *d_aCSR;
746     ELL *h_aELL, *d_aELL;
747     COO *h_aCOO, *d_aCOO;
748 
749     clock_t timeCPU;
750     cudaEvent_t startGPU, stopGPU;
751     float timeGPU;
752     cudaEventCreate(&startGPU);
753     cudaEventCreate(&stopGPU);
754 
755     printf("\n\tStart! Matrix dimension:\t%d × %d", ROW, COL);
756 
757     h_aMAT = initializeMAT(ROW, COL, 1);
758     h_xMAT = initializeMAT(COL, 1, 1);
759     h_yMAT_1 = initializeMAT(ROW, 1, 1);
760     h_yMAT_2 = initializeMAT(ROW, 1, 1);
761 
762     // 初始化
763     timeCPU = clock();
764     srand(SEED);
765 #if INT
766     for (i = 0; i < COL * ROW; i++)
767         h_aMAT->data[i] = ((float)rand() < RAND_MAX * EPSILON) ? (rand() - RAND_MAX / 2) : 0;
768     h_aMAT->count = countMAT(h_aMAT);
769     for (i = 0; i < COL; i++)
770         h_xMAT->data[i] = (rand() - RAND_MAX / 2);
771     //h_xMAT->count = countMAT(h_xMAT);
772 #else
773     for (i = 0; i < COL * ROW; i++)
774         h_aMAT->data[i] = ((float)rand() < RAND_MAX * EPSILON) ? ((float)rand() / RAND_MAX) : 0;
775     h_aMAT->count = countMAT(h_aMAT);
776     for (i = 0; i < COL; i++)
777         h_xMAT->data[i] = ((float)rand() / RAND_MAX);
778     //h_xMAT->count = countMAT(h_xMAT);
779 #endif
780     printf("\n\tInitialized! Time:\t\t%8.3f ms\n", (format)(clock() - timeCPU));
781 
782     //CPU计算部分
783     //MAT 格式
784     timeCPU = clock();
785     dotMATCPU(h_aMAT, h_xMAT, h_yMAT_1);
786     timeCPU = clock() - timeCPU;
787     printf("\n\tdotMATCPU time:\t%8.3f ms\n", (float)timeCPU);
788 
789     // CSR格式                    
790     h_aCSR = initializeCSR(ROW, COL, h_aMAT->count, 1);
791     MATToCSR(h_aMAT, h_aCSR);
792     timeCPU = clock();
793     dotCSRCPU(h_aCSR, h_xMAT, h_yMAT_2);
794     timeCPU = clock() - timeCPU;
795     printf("\n\tdotCSRCPU time:\t%8.3f ms\n", (float)timeCPU);
796     if (i = checkResult(h_yMAT_1->data, h_yMAT_2->data, ROW))
797         printf("\n\tError at i = %d\n\tcpu_out1[i] = %10f, h_yMAT_2[i] = %10f\n",
798             i - 1, (float)h_yMAT_1->data[i - 1], (float)h_yMAT_2->data[i - 1]);
799     else
800         printf("\n\tdotCSRCPU correct!\n");
801 
802     // ELL格式
803     h_aELL = initializeELL(ROW, COL, 1);
804     MATToELL(h_aMAT, h_aELL);
805     timeCPU = clock();
806     dotELLCPU(h_aELL, h_xMAT, h_yMAT_2);
807     timeCPU = clock() - timeCPU;
808     printf("\n\tdotCELLPU time:\t%8.3f ms\n", (float)timeCPU);
809     if (i = checkResult(h_yMAT_1->data, h_yMAT_2->data, ROW))
810         printf("\n\tError at i = %d\n\tcpu_out1[i] = %10f, h_yMAT_2[i] = %10f\n",
811             i - 1, (float)h_yMAT_1->data[i - 1], (float)h_yMAT_2->data[i - 1]);
812     else
813         printf("\n\tdotELLCPU correct!\n");
814 
815     // COO格式
816     h_aCOO = initializeCOO(ROW, COL, h_aMAT->count, 1);
817     MATToCOO(h_aMAT, h_aCOO);
818     timeCPU = clock();
819     dotCOOCPU(h_aCOO, h_xMAT, h_yMAT_2);
820     timeCPU = clock() - timeCPU;
821     printf("\n\tdotCOOCPU time:\t%8.3f ms\n", (float)timeCPU);
822     if (i = checkResult(h_yMAT_1->data, h_yMAT_2->data, ROW))
823         printf("\n\tError at i = %d\n\tcpu_out1[i] = %10f, h_yMAT_2[i] = %10f\n",
824             i - 1, (float)h_yMAT_1->data[i - 1], (float)h_yMAT_2->data[i - 1]);
825     else
826         printf("\n\tdotCOOCPU correct!\n");
827 
828     // GPU计算部分
829     d_aMAT = initializeMAT(ROW, COL, 0);
830     d_xMAT = initializeMAT(ROW, COL, 0);
831     d_yMAT = initializeMAT(ROW, COL, 0);
832     d_aCSR = initializeCSR(ROW, COL, h_aMAT->count, 0);
833     d_aELL = initializeELL(ROW, COL, 0);
834     d_aCOO = initializeCOO(ROW, COL, h_aMAT->count, 0);
835 
836     cudaMemcpy(d_aMAT, h_aMAT, sizeof(MAT), cudaMemcpyHostToDevice);
837     cudaMemcpy(d_xMAT, h_xMAT, sizeof(format) * COL, cudaMemcpyHostToDevice);
838     cudaMemcpy(d_aCSR, h_aCSR, sizeof(CSR), cudaMemcpyHostToDevice);
839     cudaMemcpy(d_aELL, h_aELL, sizeof(CSR), cudaMemcpyHostToDevice);
840     cudaMemcpy(d_aCOO, h_aCOO, sizeof(CSR), cudaMemcpyHostToDevice);
841 
842     // MAT格式
843     cudaMemset(d_yMAT, 0, sizeof(MAT));
844     cudaEventRecord(startGPU, 0);
845     dotMATGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (d_aMAT, d_xMAT, d_yMAT);
846     cudaMemcpy(h_yMAT_2, d_yMAT, sizeof(format) * ROW, cudaMemcpyDeviceToHost);
847     cudaDeviceSynchronize();
848     cudaEventRecord(stopGPU, 0);
849     cudaEventSynchronize(stopGPU);
850     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
851     printf("\n\tdotMATGPU time:\t%8.3f ms\n", timeGPU);
852     if (i = checkResult(h_yMAT_1->data, h_yMAT_2->data, ROW))
853         printf("\n\tError at i = %d\n\tcpu_out[i] = %10f, gpu_out[i] = %10f\n",
854             i - 1, (float)h_yMAT_1->data[i - 1], (float)h_yMAT_2->data[i - 1]);
855     else
856         printf("\n\tdotMATGPU correct!\n");
857 
858     // CSR格式
859     cudaMemset(d_yMAT, 0, sizeof(MAT));
860     cudaEventRecord(startGPU, 0);
861     dotCSRGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (d_aCSR, d_xMAT, d_yMAT);
862     cudaMemcpy(h_yMAT_2, d_yMAT, sizeof(format) * ROW, cudaMemcpyDeviceToHost);
863     cudaDeviceSynchronize();
864     cudaEventRecord(stopGPU, 0);
865     cudaEventSynchronize(stopGPU);
866     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
867     printf("\n\tdotCSRGPU time:\t%8.3f ms\n", timeGPU);
868     if (i = checkResult(h_yMAT_1->data, h_yMAT_2->data, ROW))
869         printf("\n\tError at i = %d\n\tcpu_out[i] = %10f, gpu_out[i] = %10f\n",
870             i - 1, (float)h_yMAT_1->data[i - 1], (float)h_yMAT_2->data[i - 1]);
871     else
872         printf("\n\tdotCSRGPU correct!\n");
873 
874     // ELL格式
875     cudaMemset(d_yMAT, 0, sizeof(MAT));
876     cudaEventRecord(startGPU, 0);
877     dotELLGPU << <CEIL(ROW, THREAD_SIZE), THREAD_SIZE >> > (d_aELL, d_xMAT, d_yMAT);
878     cudaMemcpy(h_yMAT_2, d_yMAT, sizeof(format) * ROW, cudaMemcpyDeviceToHost);
879     cudaDeviceSynchronize();
880     cudaEventRecord(stopGPU, 0);
881     cudaEventSynchronize(stopGPU);
882     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
883     printf("\n\tdotELLGPU time:\t%8.3f ms\n", timeGPU);
884     if (i = checkResult(h_yMAT_1->data, h_yMAT_2->data, ROW))
885         printf("\n\tError at i = %d\n\tcpu_out[i] = %10f, gpu_out[i] = %10f\n",
886             i - 1, (float)h_yMAT_1->data[i - 1], (float)h_yMAT_2->data[i - 1]);
887     else
888         printf("\n\tdotELLGPU correct!\n");
889 
890     // COO格式
891     cudaMemset(d_yMAT, 0, sizeof(MAT));
892     cudaEventRecord(startGPU, 0);
893     dotCOOGPU << <CEIL(h_aMAT->count, THREAD_SIZE), THREAD_SIZE >> > (d_aCOO, d_xMAT, d_yMAT);
894     cudaMemcpy(h_yMAT_2, d_yMAT, sizeof(format) * ROW, cudaMemcpyDeviceToHost);
895     cudaDeviceSynchronize();
896     cudaEventRecord(stopGPU, 0);
897     cudaEventSynchronize(stopGPU);
898     cudaEventElapsedTime(&timeGPU, startGPU, stopGPU);
899     printf("\n\tdotCOOGPU time:\t%8.3f ms\n", timeGPU);
900     if (i = checkResult(h_yMAT_1->data, h_yMAT_2->data, ROW))
901         printf("\n\tError at i = %d\n\tcpu_out[i] = %10f, gpu_out[i] = %10f\n",
902             i - 1, (float)h_yMAT_1->data[i - 1], (float)h_yMAT_2->data[i - 1]);
903     else
904         printf("\n\tdotCOOGPU correct!\n");
905 
906     freeMAT(h_aMAT, 1);
907     freeMAT(h_xMAT, 1);
908     freeMAT(h_yMAT_1, 1);
909     freeMAT(h_yMAT_2, 1);
910     freeCSR(h_aCSR, 1);
911     freeELL(h_aELL, 1);
912     freeCOO(h_aCOO, 1);
913     freeMAT(d_aMAT, 0); 
914     freeMAT(d_xMAT, 0); 
915     freeMAT(d_yMAT, 0); 
916     freeCSR(d_aCSR, 0); 
917     freeELL(d_aELL, 0); 
918     freeCOO(d_aCOO, 0); 
919     getchar();
920     return 0;
921 }
标签：create end seed 需要打印 ntc malloc tcp das
原文地址：http://www.cnblogs.com/cuancuancuanhao/p/7687855.html
踩
(0)
评论一句话评论（0）
分享档案
更多>
2021年07月29日 (22)
2021年07月28日 (40)
2021年07月27日 (32)
2021年07月26日 (79)
2021年07月23日 (29)
2021年07月22日 (30)
2021年07月21日 (42)
2021年07月20日 (16)
2021年07月19日 (90)
2021年07月16日 (35)
周排行