CuPBoP/examples/microbench/dummy_kernel.cc

40 lines
898 B
C++
Raw Normal View History

2022-05-20 06:42:30 +08:00
#include <stdio.h>
2022-05-25 08:43:47 +08:00
__global__ void saxpy(void) {
int i = blockIdx.x * blockDim.x + threadIdx.x;
2022-05-20 06:42:30 +08:00
printf("block_id:%d thread_id:%d \n", i)
}
2022-05-25 08:43:47 +08:00
int main(void) {
int N = 1 << 20;
2022-05-20 06:42:30 +08:00
float *x, *y, *d_x, *d_y;
2022-05-25 08:43:47 +08:00
x = (float *)malloc(N * sizeof(float));
y = (float *)malloc(N * sizeof(float));
2022-05-20 06:42:30 +08:00
2022-05-25 08:43:47 +08:00
cudaMalloc(&d_x, N * sizeof(float));
cudaMalloc(&d_y, N * sizeof(float));
2022-05-20 06:42:30 +08:00
for (int i = 0; i < N; i++) {
x[i] = 1.0f;
y[i] = 2.0f;
}
2022-05-25 08:43:47 +08:00
cudaMemcpy(d_x, x, N * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_y, y, N * sizeof(float), cudaMemcpyHostToDevice);
2022-05-20 06:42:30 +08:00
// Perform SAXPY on 1M elements
2022-05-25 08:43:47 +08:00
saxpy<<<(1, 1)>>>;
2022-05-20 06:42:30 +08:00
2022-05-25 08:43:47 +08:00
cudaMemcpy(y, d_y, N * sizeof(float), cudaMemcpyDeviceToHost);
2022-05-20 06:42:30 +08:00
float maxError = 0.0f;
for (int i = 0; i < N; i++)
2022-05-25 08:43:47 +08:00
maxError = max(maxError, abs(y[i] - 4.0f));
2022-05-20 06:42:30 +08:00
printf("Max error: %f\n", maxError);
cudaFree(d_x);
cudaFree(d_y);
free(x);
free(y);
2022-05-25 08:43:47 +08:00
}