reconstruct the code constructure

This commit is contained in:
Han Ruobing 2022-06-07 12:53:32 -07:00
parent d17128640f
commit f6ef5436de
45 changed files with 277 additions and 13 deletions

View File

@ -7,14 +7,14 @@ project(
set(CMAKE_VERBOSE_MAKEFILE ON)
# compile kernel translator
include_directories(./KernelTranslation/include)
include_directories(./KernelTranslation/include/x86)
add_subdirectory(KernelTranslation)
add_executable(kernelTranslator KernelTranslation.cpp)
target_link_libraries(kernelTranslator spmd2mpmd ${GCC_COVERAGE_LINK_FLAGS})
# compile host translator
include_directories(./HostTranslation/include)
include_directories(./HostTranslation/include/x86)
add_subdirectory(HostTranslation)
add_executable(hostTranslator HostTranslation.cpp)

View File

@ -12,10 +12,10 @@ set(LIB_NAME cudaRuntime2cpuRuntime)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE Debug)
include_directories(./include)
include_directories(./include/x86)
file(GLOB proj_HEADERS "include/*.h")
file(GLOB proj_SOURCES "lib/*.cpp")
file(GLOB proj_HEADERS "include/x86/*.h")
file(GLOB proj_SOURCES "src/x86/*.cpp")
# Add core library.
add_library(${LIB_NAME} SHARED ${proj_HEADERS} ${proj_SOURCES})

View File

@ -12,10 +12,10 @@ set(LIB_NAME spmd2mpmd)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE Debug)
include_directories(./include)
include_directories(./include/x86)
file(GLOB proj_HEADERS "include/*.h")
file(GLOB proj_SOURCES "lib/*.cpp")
file(GLOB proj_HEADERS "include/x86/*.h")
file(GLOB proj_SOURCES "src/x86/*.cpp")
# Add core library.
add_library(${LIB_NAME} SHARED ${proj_HEADERS} ${proj_SOURCES})

View File

@ -10,7 +10,7 @@ set(CMAKE_VERBOSE_MAKEFILE ON)
add_subdirectory(threadPool)
# compile x86 runtime library
include_directories(./include)
include_directories(./threadPool/include)
file(GLOB proj_SOURCES "lib/*.cpp")
include_directories(./include/x86)
include_directories(./threadPool/include/x86)
file(GLOB proj_SOURCES "src/vortex/*.cpp")
add_library(${LIB_NAME} SHARED ${proj_SOURCES})

View File

@ -0,0 +1,19 @@
#include "cudaKernelImpl.h"
#include <math.h>
double __nv_exp(double base) { return exp(base); }
double __nv_sqrt(double v) { return sqrt(v); }
float __nv_sqrtf(float v) { return sqrt(v); }
float __nv_powif(float base, int exp) { return pow(base, exp); }
float __nv_logf(float v) { return logf(v); }
float __nv_expf(float v) { return expf(v); }
float __nv_log10f(float v) { return log10f(v); }
float __nv_fast_log2f(float v) { return log2f(v); }
double __nv_powi(double base, int exp) { return pow(base, exp); }
float __nv_powf(float base, float exp) { return pow(base, exp); }
float __nv_fast_powf(float base, float exp) { return pow(base, exp); }
float __nv_fmodf(float x, float y) { return fmod(x, y); }
int __nv_isnanf(float v) { return isnan(v); }
int __nv_isinff(float v) { return isinf(v); }
float __nv_fabsf(float v) { return abs(v); }
int __nvvm_mul24_i(int a, int b) { return a * b; }
double _ZL3expd(double base) { return exp(base); }

View File

@ -0,0 +1,245 @@
#include "cudaRuntimeImpl.h"
#include "api.h"
#include "cuda_runtime.h"
#include "def.h"
#include "macros.h"
#include "structures.h"
#include <iostream>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
cudaError_t cudaGetDevice(int *devPtr) { *devPtr = 0; }
const char *cudaGetErrorName(cudaError_t error) { return "SUCCESS\n"; }
cudaError_t cudaDeviceReset(void) { scheduler_uninit(); }
cudaError_t cudaDeviceSynchronize(void) { cuSynchronizeBarrier(); }
cudaError_t cudaThreadSynchronize(void) { cuSynchronizeBarrier(); }
cudaError_t cudaFree(void *devPtr) { free(devPtr); }
cudaError_t cudaFreeHost(void *devPtr) { free(devPtr); }
cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
void **args, size_t sharedMem,
cudaStream_t stream) {
// if scheduler is null init device
// printf(
// "cudaLaunchKernel : Grid: x:%d y:%d z:%d Block: %d, %d, %d ShMem:%lu\n
// ", gridDim.x, gridDim.y, gridDim.z, blockDim.x, blockDim.y, blockDim.z,
// sharedMem);
cu_kernel *ker =
create_kernel(func, gridDim, blockDim, args, sharedMem, stream);
int lstatus = cuLaunchKernel(&ker);
// std::cout << "ret cudaLKernel" << std::endl;
}
cudaError_t cudaMalloc(void **devPtr, size_t size) {
*devPtr = malloc(size);
if (devPtr == NULL)
return cudaErrorMemoryAllocation;
return cudaSuccess;
}
cudaError_t cudaMemset(void *devPtr, int value, size_t count) {
memset(devPtr, value, count);
return cudaSuccess;
}
cudaError_t cudaMemcpy(void *dst, const void *src, size_t count,
cudaMemcpyKind kind) {
if (kind == cudaMemcpyHostToHost) {
memcpy(dst, src, count);
} else if (kind == cudaMemcpyDeviceToHost) {
// how does the code know which device accessing the memory
memcpy(dst, src, count);
} else if (kind == cudaMemcpyHostToDevice) {
// how does the code know which device accessing the memory
memcpy(dst, src, count);
} else if (kind == cudaMemcpyDeviceToHost) {
// how does the code know which device accessing the memory
memcpy(dst, src, count);
} else if (kind == cudaMemcpyDeviceToDevice) {
memcpy(dst, dst, count);
} else if (kind == cudaMemcpyDefault) {
memcpy(dst, src, count);
}
return cudaSuccess;
}
cudaError_t cudaMemcpyToSymbol_host(void *dst, const void *src, size_t count,
size_t offset, cudaMemcpyKind kind) {
assert(offset == 0 && "DO not support offset !=0\n");
memcpy(dst, src + offset, count);
return cudaSuccess;
}
cudaError_t cudaSetDevice(int device) {
// error checking
// std::cout << "cudaSetDevice Called" << std::endl;
init_device();
// std::cout << "cudaSetDevice Ret" << std::endl;
}
cudaError_t cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) {
cstreamData *dst_stream = (cstreamData *)dst;
cstreamData *src_stream = (cstreamData *)src;
if (dst_stream == NULL || src_stream == NULL) {
return cudaErrorInvalidValue; // 1
}
dst_stream->stream_priority = src_stream->stream_priority;
dst_stream->stream_flags = src_stream->stream_flags;
return cudaSuccess; // 0
}
static int stream_counter = 1;
/*
cudaStream_t is a Opaque Structure
Overwrites cudaStream_t into custom cstreamData structure
(does hardware uses the cudaStream_t stream)
*/
cudaError_t cudaStreamCreate(cudaStream_t *pStream) {
cstreamData *s = (cstreamData *)calloc(1, sizeof(cstreamData));
if (s == NULL)
return cudaErrorMemoryAllocation;
s->ev.status = C_RUN;
s->id = stream_counter;
stream_counter++;
s->stream_priority = DEFAULT;
create_KernelQueue(&(s->kernelQueue));
INIT_LOCK(s->stream_lock);
*pStream = (cudaStream_t)(s);
return cudaSuccess;
}
cudaError_t cudaStreamDestroy(cudaStream_t stream) {
cstreamData *s = (cstreamData *)(stream);
free(s->kernelQueue);
DESTROY_LOCK(s->stream_lock);
free(s);
return cudaSuccess;
}
cudaError_t cudaStreamSynchronize(cudaStream_t stream) {
cstreamData *e = ((cstreamData *)(stream));
MUTEX_LOCK(e->stream_lock);
e->ev.status = C_SYNCHRONIZE;
e->ev.numKernelsToWait = e->kernelQueue->waiting_count;
MUTEX_UNLOCK(e->stream_lock);
}
cudaError_t cudaGetDeviceCount(int *count) {
// dummy value
*count = 1;
}
cudaError_t cudaGetDeviceProperties(cudaDeviceProp *deviceProp, int device) {
// dummy values
if (device == 0) {
strcpy(deviceProp->name, "pthread");
deviceProp->totalGlobalMem = 0;
deviceProp->sharedMemPerBlock = 0;
deviceProp->regsPerBlock = 0;
deviceProp->warpSize = 0;
deviceProp->memPitch = 0;
deviceProp->maxThreadsPerBlock = 0;
deviceProp->maxThreadsDim[0] = 1;
deviceProp->maxThreadsDim[1] = 1;
deviceProp->maxThreadsDim[2] = 1;
deviceProp->maxGridSize[0] = 1;
deviceProp->maxGridSize[1] = 1;
deviceProp->maxGridSize[2] = 1;
deviceProp->totalConstMem = 0;
deviceProp->major = 0;
deviceProp->minor = 0;
deviceProp->clockRate = 0;
deviceProp->textureAlignment = 0;
deviceProp->deviceOverlap = false;
deviceProp->multiProcessorCount = 0;
}
return cudaSuccess;
}
static cudaError_t lastError = cudaSuccess;
const char *cudaGetErrorString(cudaError_t error) {
if (error == cudaSuccess) {
return "Cuda Get Error Success";
}
}
cudaError_t cudaGetLastError(void) { return lastError; }
static callParams callParamTemp;
/*
Internal Cuda Library Functions
*/
extern "C" {
extern cudaError_t CUDARTAPI __cudaPopCallConfiguration(dim3 *gridDim,
dim3 *blockDim,
size_t *sharedMem,
void **stream) {
// printf("__cudaPopCallConfiguration: Grid: x:%d y:%d z:%d Block: %d, %d, %d
// ShMem: %lu\n",
// gridDim->x, gridDim->y, gridDim->z, blockDim->x, blockDim->y, blockDim->z,
// *sharedMem);
*gridDim = callParamTemp.gridDim;
*blockDim = callParamTemp.blockDim;
*sharedMem = callParamTemp.shareMem;
*stream = callParamTemp.stream;
// printf("__cudaPopCallConfiguration After : Grid: x:%d y:%d z:%d Block: %d,
// %d, %d ShMem: %lu\n", gridDim->x, gridDim->y, gridDim->z, blockDim->x,
// blockDim->y, blockDim->z, *sharedMem);
// exit(1);
return cudaSuccess;
}
extern __host__ __device__ unsigned CUDARTAPI __cudaPushCallConfiguration(
dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0) {
// printf("__cudaPushCallConfiguration Grid: x:%d y:%d z:%d Block: %d, %d, %d
// "
// "ShMem: %lu\n ",
// gridDim.x, gridDim.y, gridDim.z, blockDim.x, blockDim.y, blockDim.z,
// sharedMem);
// memory checks allocations
callParamTemp.gridDim = gridDim;
// std::cout << "assign gridDim" << std::endl;
callParamTemp.blockDim = blockDim;
// std::cout << "assign blockDim" << std::endl;
callParamTemp.shareMem = sharedMem;
// std::cout << "assign shareMem" << std::endl;
(callParamTemp.stream) = stream;
// printf("__cudaPushCallConfiguration After Grid: x:%d y:%d z:%d Block: %d,
// %d, %d ShMem: %lu\n",
// gridDim.x, gridDim.y, gridDim.z, blockDim.x, blockDim.y, blockDim.z,
// sharedMem);
// return 0 continues the Pop
return cudaSuccess;
// return ne 0 skips the Pop
}
}

View File

@ -11,7 +11,7 @@ set(LIB_NAME threadPool)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE Debug)
include_directories(./include)
include_directories(./include/x86)
file(GLOB proj_SOURCES "lib/*.cpp")
file(GLOB proj_SOURCES "src/vortex/*.cpp")
add_library(${LIB_NAME} SHARED ${proj_SOURCES})