32 lines
1.3 KiB
Bash
32 lines
1.3 KiB
Bash
export CuPBoP_PATH=`pwd`/../../
|
|
export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH
|
|
export CUDA_PATH=/usr/local/cuda-11.7
|
|
|
|
cd examples/vecadd
|
|
# Compile CUDA source code (both host and kernel) to bitcode files
|
|
clang++ -std=c++11 vecadd.cu \
|
|
-I../.. --cuda-path=$CUDA_PATH \
|
|
--cuda-gpu-arch=sm_50 -L$CUDA_PATH/lib64 \
|
|
-lcudart_static -ldl -lrt -pthread -save-temps -v || true
|
|
# Apply compilation transformations on the kernel bitcode file
|
|
../..//build/compilation/kernelTranslator \
|
|
vecadd-cuda-nvptx64-nvidia-cuda-sm_50.bc kernel.bc
|
|
# Apply compilation transformations on the host bitcode file
|
|
../..//build/compilation/hostTranslator \
|
|
vecadd-host-x86_64-pc-linux-gnu.bc host.bc
|
|
# Generate object files
|
|
llc --relocation-model=pic --filetype=obj kernel.bc
|
|
llc --relocation-model=pic --filetype=obj host.bc
|
|
|
|
llc kernel.bc --relocation-model=pic -filetype=asm -o kernel.s
|
|
llc host.bc --relocation-model=pic -filetype=asm -o host.s
|
|
|
|
# Link with runtime libraries and generate the executable file
|
|
gcc -o vecadd -fPIC -no-pie \
|
|
-I../../runtime/threadPool/include \
|
|
-L../../build/runtime \
|
|
-L../../build/runtime/threadPool \
|
|
host.o kernel.o \
|
|
-I../.. -lc -lm -lCPUruntime -lthreadPool -lpthread
|
|
# Execute
|
|
./vecadd |