currybab's blog

pmpp lecture 03 Multidimensional Grids and Data 요약

source: Lecture 03 - Multidimensional Grids and Data

Today

RGB to Grayscale

    dim3 numThreadsPerBlock(32, 32); // 값이 안들어가면 1이 기본값
    dim3 numBlocks((width + 32 - 1) / 32, (height + 32 - 1) / 32); // numThreadsPerBlock.x 등을 사용해서 general하게 만들 수 있음
    rgb2gray_kernel<<< numBlocks, numThreadsPerBlock >>>(red_d, green_d, blue_d, gray_d, width, height);

multidimensional indexing in grayscale

    int row = blockIdx.y * blockDim.y + threadIdx.y; // y축은 높이
    int col = blockIdx.x * blockDim.x + threadIdx.x; // x축은 너비
    if (row < height && col < width) { // boundary check
        unsigned int idx = row * width + col; // 1차원 배열에 접근
        gray[idx] = red[idx] * 3 / 10 + green[idx] * 6 / 10 + blue[idx] * 1 / 10;
    }

blur

    __global__ void blur_kernel(unsigned char* image, unsigned char* blurred, unsigned int width, unsigned int height) {
        int outRow = blockIdx.y * blockDim.y + threadIdx.y;
        int outCol = blockIdx.x * blockDim.x + threadIdx.x;
        
        if (outRow < height && outCol < width) {
            unsigned int average = 0;
            for (int inRow = outRow - BLUR_SIZE; inRow <= outRow + BLUR_SIZE; ++inRow) {
                for (int inCol = outCol - BLUR_SIZE; inCol <= outCol + BLUR_SIZE; ++inCol) {
                    if (inRow >= 0 && inRow < height && inCol >= 0 && inCol < width) {
                        average += image[inRow * width + inCol];
                    }
                }
            }
            blurred[outRow * width + outCol] = (unsigned char) average / ((BLUR_SIZE * 2 + 1) * (BLUR_SIZE * 2 + 1));
        }
    }

boundary condition

Matrix-Matrix Multiplication

    C = A * B
    __global__ void mm_kernel(float* A, float* B, float* C, unsigned int N) {
        unsigned int row = blockIdx.y * blockDim.y + threadIdx.y;
        unsigned int col = blockIdx.x * blockDim.x + threadIdx.x;
        
        if (row < N && col < N) {
            float sum = 0.0f;
            for (unsigned int i = 0; i < N; ++i) {
                sum += A[row * N + i] * B[i * N + col];
            }
            C[row * N + col] = sum;
        }
    }

#blog #cuda #gpu #pmpp