标签:tde following amp dac glob duration sign sync roo
MPS的执行:主要看手册,https://docs.nvidia.com/deploy/pdf/CUDA_Multi_Process_Service_Overview.pdf
这里说一下,我需要的命令:
启动 mps-control
export CUDA_VISIBLE_DEVICES=0
export CUDA_MPS_PIPE_DIRECTORY=/tmp/nvidia-mps
export CUDA_MPS_LOG_DIRECTORY=/tmp/nvidia-log
nvidia-cuda-mps-control -d
关闭mps-control
echo quit | nvidia-cuda-mps-control
执行完了,可以查看log 文件。
从这个链接摘抄的关于验证mps 的程序
http://stackoverflow.com/questions/34709749/how-do-i-use-nvidia-multi-process-service-mps-to-run-multiple-non-mpi-cuda-app
$ cat t1034.cu #include <stdio.h> #include <stdlib.h> #define MAX_DELAY 30 #define cudaCheckErrors(msg) do { cudaError_t __err = cudaGetLastError(); if (__err != cudaSuccess) { fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", msg, cudaGetErrorString(__err), __FILE__, __LINE__); fprintf(stderr, "*** FAILED - ABORTING\n"); exit(1); } } while (0) #include <time.h> #include <sys/time.h> #define USECPSEC 1000000ULL unsigned long long dtime_usec(unsigned long long start){ timeval tv; gettimeofday(&tv, 0); return ((tv.tv_sec*USECPSEC)+tv.tv_usec)-start; } #define APPRX_CLKS_PER_SEC 1000000000ULL __global__ void delay_kernel(unsigned seconds){ unsigned long long dt = clock64(); while (clock64() < (dt + (seconds*APPRX_CLKS_PER_SEC))); } int main(int argc, char *argv[]){ unsigned delay_t = 5; // seconds, approximately unsigned delay_t_r; if (argc > 1) delay_t_r = atoi(argv[1]); if ((delay_t_r > 0) && (delay_t_r < MAX_DELAY)) delay_t = delay_t_r; unsigned long long difft = dtime_usec(0); delay_kernel<<<1,1>>>(delay_t); cudaDeviceSynchronize(); cudaCheckErrors("kernel fail"); difft = dtime_usec(difft); printf("kernel duration: %fs\n", difft/(float)USECPSEC); return 0; } $ nvcc -arch=sm_35 -o t1034 t1034.cu $ ./t1034 kernel duration: 6.528574s $
$ cat start_as_root.bash #!/bin/bash # the following must be performed with root privilege export CUDA_VISIBLE_DEVICES="0" nvidia-smi -i 0 -c EXCLUSIVE_PROCESS nvidia-cuda-mps-control -d
$ cat mps_run #!/bin/bash ./t1034 & ./t1034 $
$ cat stop_as_root.bash #!/bin/bash echo quit | nvidia-cuda-mps-control nvidia-smi -i 2 -c DEFAULT $
$ ./mps_run kernel duration: 6.409399s kernel duration: 12.078304s $
$ ./start_as_root.bash $ ./mps_run kernel duration: 6.167079s kernel duration: 6.263062s
标签:tde following amp dac glob duration sign sync roo
原文地址:http://www.cnblogs.com/xingzifei/p/6136095.html