CuPBoP/runtime/lib/cudaRuntimeImpl.cpp

101 lines
2.8 KiB
C++

#include "cudaRuntimeImpl.h"
#include "api.h"
#include <stdio.h>
#include <stdlib.h>
cudaError_t cudaDeviceReset(void) { scheduler_uninit(); }
cudaError_t cudaDeviceSynchronize(void) { cuSynchronizeBarrier(); }
cudaError_t cudaFree(void *devPtr) { free(devPtr); }
cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
void **args, size_t sharedMem,
cudaStream_t stream) {
// if scheduler is null init device
cu_kernel *ker =
create_kernel(func, gridDim, blockDim, &args, sharedMem, stream);
int lstatus = cuLaunchKernel(&ker);
}
cudaError_t cudaMalloc(void **devPtr, size_t size) {
*devPtr = malloc(size);
if (devPtr == NULL)
return cudaErrorMemoryAllocation;
return cudaSuccess;
}
cudaError_t cudaMemcpy(void *dst, const void *src, size_t count,
cudaMemcpyKind kind) {
if (kind == cudaMemcpyHostToHost) {
memcpy(dst, src, count);
} else if (kind == cudaMemcpyDeviceToHost) {
// how does the code know which device accessing the memory
memcpy(dst, src, count);
} else if (kind == cudaMemcpyHostToDevice) {
// how does the code know which device accessing the memory
memcpy(dst, src, count);
} else if (kind == cudaMemcpyDeviceToHost) {
// how does the code know which device accessing the memory
memcpy(dst, src, count);
} else if (kind == cudaMemcpyDeviceToDevice) {
memcpy(dst, dst, count);
} else if (kind == cudaMemcpyDefault) {
memcpy(dst, src, count);
}
return cudaSuccess;
}
cudaError_t cudaSetDevice(int device) {
// error checking
init_device();
}
cudaError_t cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) {
cstreamData *dst_stream = (cstreamData *)dst;
cstreamData *src_stream = (cstreamData *)src;
if (dst_stream == NULL || src_stream == NULL) {
return cudaErrorInvalidValue; // 1
}
dst_stream->stream_priority = src_stream->stream_priority;
dst_stream->stream_flags = src_stream->stream_flags;
return cudaSuccess; // 0
}
cudaError_t cudaStreamCreate(cudaStream_t *pStream) {
cstreamData *s = (cstreamData *)calloc(1, sizeof(cstreamData));
if (s == NULL)
return cudaErrorMemoryAllocation;
s->ev.status = C_RUN;
s->id = stream_counter;
stream_counter++;
s->stream_priority = DEFAULT;
create_KernelQueue(&(s->kernelQueue));
INIT_LOCK(s->stream_lock);
*pStream = (cudaStream_t)(s);
return cudaSuccess;
}
cudaError_t cudaStreamDestroy(cudaStream_t stream) {
cstreamData *s = (cstreamData *)(stream);
free(s->kernelQueue);
DESTROY_LOCK(s->stream_lock);
free(s);
return cudaSuccess;
}
cudaError_t cudaStreamSynchronize(cudaStream_t stream) {
cstreamData *e = ((cstreamData *)(stream));
MUTEX_LOCK(e->stream_lock);
e->ev.status = C_SYNCHRONIZE;
e->ev.numKernelsToWait = e->kernelQueue->waiting_count;
MUTEX_UNLOCK(e->stream_lock);
}