列出 nvidia 顯示卡狀態

首先要先安裝 CUDA SDK.

程式部份 :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#include <stdio.h>
#include <cuda.h>
 
void displayProperties(cudaDeviceProp* cdevprop) {
    if (!cdevprop) return;
    printf("Device name : %s \n", cdevprop->name);
    printf("================================\n");
    printf("Total global memory : %d KB\n", cdevprop->totalGlobalMem / 1024);
    printf("Shared memory per block : %d KB\n", cdevprop->sharedMemPerBlock / 1024);
    printf("Registers per thread block : %d\n", cdevprop->regsPerBlock);
    printf("Warp size in threads : %d\n", cdevprop->warpSize);
    printf("Memory pitch : %d bytes\n", cdevprop->memPitch);
    printf("Maximum threads per block : %d\n", cdevprop->maxThreadsPerBlock);
    printf("Maximum thread dimension (block) : %d %d %d\n", cdevprop->maxThreadsDim[0], cdevprop->maxThreadsDim[1], cdevprop->maxThreadsDim[2]);
    printf("Maximum thread dimension (grid) : %d %d %d\n", cdevprop->maxGridSize[0], cdevprop->maxGridSize[1], cdevprop->maxGridSize[2]);
    printf("Total constant memory :  %d bytes\n", cdevprop->totalConstMem);
    printf("CUDA version : %d.%d\n", cdevprop->major, cdevprop->minor);
    printf("Clock rate : %d KHz\n", cdevprop->clockRate);
    printf("Texture alignment : %d bytes\n", cdevprop->textureAlignment);
    printf("Device overlap : %s \n", cdevprop-> deviceOverlap?"Allowed":"Not Allowed");
    printf("Number of multiprocessors : %d\n", cdevprop->multiProcessorCount);
}
 
int main(void) {  
    cudaDeviceProp dev;  
    int devCount = 0;  
 
    cudaGetDeviceCount(&devCount);
    printf("Device found: %d\n", devCount);
    for (int i = 0; i < devCount; ++i) {
        memset(&dev, 0, sizeof(dev));
        if (cudaSuccess == cudaGetDeviceProperties(&dev, i)) {
            displayProperties(&dev);
        } else {
            printf("\n%s", cudaGetErrorString(cudaGetLastError()));
        }
    }
}

編譯 :

# nvcc display.cu -o display

執行結果 :

Device found: 1
Device name : Quadro FX 4600 
================================
Total global memory : 785728 KB
Shared memory per block : 16 KB
Registers per thread block : 8192
Warp size in threads : 32
Memory pitch : 262144 bytes
Maximum threads per block : 512
Maximum thread dimension (block) : 512 512 64
Maximum thread dimension (grid) : 65535 65535 1
Total constant memory :  65536 bytes
CUDA version : 1.0
Clock rate : 1188000 KHz
Texture alignment : 256 bytes
Device overlap : Not Allowed 
Number of multiprocessors : 12