[WIP] migriate to LLVM14

This commit is contained in:
RobinHan 2022-06-17 22:20:13 -04:00
parent bcdcccecc9
commit d7668ccd86
14 changed files with 163 additions and 100 deletions

View File

@ -8,6 +8,7 @@
#include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/ToolOutputFile.h"
#include <iostream> #include <iostream>
#include <map> #include <map>
#include <regex>
#include <set> #include <set>
using namespace llvm; using namespace llvm;
@ -200,8 +201,23 @@ void ReplaceKernelLaunch(llvm::Module *M) {
prior name before _host is add prior name before _host is add
*/ */
std::string oldName = functionOperand->getName().str(); std::string oldName = functionOperand->getName().str();
// For LLVM>=14, it will add _device_stub prefix for the kernel
// name, thus, we need to remove the prefix
// example:
// from: _Z24__device_stub__HistogramPjS_jj
// to: HistogramPjS_jj
oldName = std::regex_replace(oldName,
std::regex("__device_stub__"), "");
// remove _Z24
for (int i = 2; i < oldName.length(); i++) {
if (oldName[i] >= '0' && oldName[i] <= '9')
continue;
oldName = oldName.substr(i);
break;
}
// if parent function is __host and same as the cudaKernelLaunch // if parent function is __host and same as the
// cudaKernelLaunch
std::string newName = oldName + "_wrapper"; std::string newName = oldName + "_wrapper";
if (func_name == oldName && host_changed && if (func_name == oldName && host_changed &&
oldName.find("_host") != std::string::npos) { oldName.find("_host") != std::string::npos) {
@ -220,12 +236,11 @@ void ReplaceKernelLaunch(llvm::Module *M) {
kernels.insert({functionOperand->getName().str(), F}); kernels.insert({functionOperand->getName().str(), F});
} }
} else if (cuda_register_kernel_names.find( } else if (cuda_register_kernel_names.find(
calledFunction->getName()) != calledFunction->getName().str()) !=
cuda_register_kernel_names.end()) { cuda_register_kernel_names.end()) {
// if the called function collides with kernel definiton // if the called function collides with kernel definiton
// TODO: some reason changes all occurences of the function name // TODO: some reason changes all occurences of the function name
// for both cudaKernelLaunch calls and regular function call // for both cudaKernelLaunch calls and regular function call
// errs() << *inst;
host_changed = true; host_changed = true;
calledFunction->setName(calledFunction->getName() + "_host"); calledFunction->setName(calledFunction->getName() + "_host");
std::cout << std::endl; std::cout << std::endl;

View File

@ -18,4 +18,4 @@ file(GLOB proj_HEADERS "include/x86/*.h")
file(GLOB proj_SOURCES "src/x86/*.cpp") file(GLOB proj_SOURCES "src/x86/*.cpp")
# Add core library. # Add core library.
add_library(${LIB_NAME} SHARED ${proj_HEADERS} ${proj_SOURCES}) add_library(${LIB_NAME} STATIC ${proj_HEADERS} ${proj_SOURCES})

View File

@ -1,6 +1,8 @@
#ifndef __NVVM2x86_TOOL__ #ifndef __NVVM2x86_TOOL__
#define __NVVM2x86_TOOL__ #define __NVVM2x86_TOOL__
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
llvm::Module *LoadModuleFromFilr(char *file_name); llvm::Module *LoadModuleFromFilr(char *file_name);
void DumpModule(llvm::Module *M, char *file_name); void DumpModule(llvm::Module *M, char *file_name);
@ -22,4 +24,10 @@ bool has_barrier(llvm::BasicBlock *B);
bool has_block_barrier(llvm::BasicBlock *B); bool has_block_barrier(llvm::BasicBlock *B);
bool has_barrier(llvm::Function *F); bool has_barrier(llvm::Function *F);
void replace_dynamic_shared_memory(llvm::Module *M); void replace_dynamic_shared_memory(llvm::Module *M);
llvm::LoadInst *createLoad(llvm::IRBuilder<> &B, llvm::Value *addr,
bool isVolatile = false);
llvm::Value *createInBoundsGEP(llvm::IRBuilder<> &B, llvm::Value *ptr,
llvm::ArrayRef<llvm::Value *> idxlist);
llvm::Value *createGEP(llvm::IRBuilder<> &B, llvm::Value *ptr,
llvm::ArrayRef<llvm::Value *> idxlist);
#endif #endif

View File

@ -11,7 +11,7 @@
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h" #include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h" #include "llvm/IRReader/IRReader.h"
#include "llvm/Support/TargetRegistry.h" #include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h" #include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachine.h"
@ -19,6 +19,7 @@
#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Transforms/Utils/ValueMapper.h"
#include <iostream> #include <iostream>
#include <map>
using namespace llvm; using namespace llvm;
@ -52,6 +53,14 @@ void decode_input(llvm::Module *M) {
if (!isKernelFunction(M, F)) if (!isKernelFunction(M, F))
continue; continue;
auto func_name = F->getName().str(); auto func_name = F->getName().str();
// remove mangle prefix
// remove _Z24
for (int pos = 2; pos < func_name.length(); pos++) {
if (func_name[pos] >= '0' && func_name[pos] <= '9')
continue;
func_name = func_name.substr(pos);
break;
}
llvm::IRBuilder<> Builder(M->getContext()); llvm::IRBuilder<> Builder(M->getContext());
FunctionCallee fc = FunctionCallee fc =
@ -78,7 +87,7 @@ void decode_input(llvm::Module *M) {
*M, Int32T, false, llvm::GlobalValue::ExternalLinkage, NULL, *M, Int32T, false, llvm::GlobalValue::ExternalLinkage, NULL,
"thread_memory_size", NULL, llvm::GlobalValue::GeneralDynamicTLSModel, "thread_memory_size", NULL, llvm::GlobalValue::GeneralDynamicTLSModel,
0, false); 0, false);
Value *loadedValue = Builder.CreateLoad(global_mem); Value *loadedValue = createLoad(Builder, global_mem);
llvm::FunctionType *LaunchFun2 = FunctionType::get( llvm::FunctionType *LaunchFun2 = FunctionType::get(
PointerType::get(PointerType::get(Int32T, 0), 0), NULL); PointerType::get(PointerType::get(Int32T, 0), 0), NULL);
@ -120,12 +129,12 @@ void decode_input(llvm::Module *M) {
Type *ArgType = ii->getType(); Type *ArgType = ii->getType();
// calculate addr // calculate addr
Value *GEP = Builder.CreateGEP(input_arg, ConstantInt::get(Int32T, idx)); Value *GEP = createGEP(Builder, input_arg, ConstantInt::get(Int32T, idx));
// load corresponding int* // load corresponding int*
GEP = Builder.CreateLoad(GEP); GEP = createLoad(Builder, GEP);
// bitcast // bitcast
GEP = Builder.CreateBitOrPointerCast(GEP, PointerType::get(ArgType, 0)); GEP = Builder.CreateBitOrPointerCast(GEP, PointerType::get(ArgType, 0));
Value *Arg = Builder.CreateLoad(GEP); Value *Arg = createLoad(Builder, GEP);
Arguments.push_back(Arg); Arguments.push_back(Arg);
++idx; ++idx;
} }

View File

@ -42,7 +42,7 @@ bool inline_warp_level_func(llvm::Module *M) {
if (func_name == "_Z10__any_syncji" || if (func_name == "_Z10__any_syncji" ||
func_name.find("shfl_down_sync") != std::string::npos) { func_name.find("shfl_down_sync") != std::string::npos) {
InlineFunctionInfo IFI; InlineFunctionInfo IFI;
InlineFunction(c, IFI); InlineFunction(*c, IFI);
need_remove.insert(c->getCalledFunction()); need_remove.insert(c->getCalledFunction());
changed = true; changed = true;
} }
@ -102,7 +102,7 @@ bool inline_func_with_tid(llvm::Module *M) {
} }
for (auto c : need_inline) { for (auto c : need_inline) {
InlineFunctionInfo IFI; InlineFunctionInfo IFI;
InlineFunction(c, IFI); InlineFunction(*c, IFI);
} }
return changed; return changed;
} }

View File

@ -179,7 +179,7 @@ llvm::Instruction *GetContextArray(llvm::Instruction *instruction,
llvm::AllocaInst *Alloca = nullptr; llvm::AllocaInst *Alloca = nullptr;
auto block_size_addr = M->getGlobalVariable("block_size"); auto block_size_addr = M->getGlobalVariable("block_size");
auto block_size = builder.CreateLoad(block_size_addr); auto block_size = createLoad(builder, block_size_addr);
Alloca = builder.CreateAlloca(AllocType, block_size, varName); Alloca = builder.CreateAlloca(AllocType, block_size, varName);
contextArrays[varName] = Alloca; contextArrays[varName] = Alloca;
@ -208,9 +208,9 @@ llvm::Instruction *AddContextSave(llvm::Instruction *instruction,
std::vector<llvm::Value *> gepArgs; std::vector<llvm::Value *> gepArgs;
auto inter_warp_index = auto inter_warp_index =
builder.CreateLoad(M->getGlobalVariable("inter_warp_index")); createLoad(builder, M->getGlobalVariable("inter_warp_index"));
auto intra_warp_index = auto intra_warp_index =
builder.CreateLoad(M->getGlobalVariable("intra_warp_index")); createLoad(builder, M->getGlobalVariable("intra_warp_index"));
auto thread_idx = builder.CreateBinOp( auto thread_idx = builder.CreateBinOp(
Instruction::Add, intra_warp_index, Instruction::Add, intra_warp_index,
builder.CreateBinOp(Instruction::Mul, inter_warp_index, builder.CreateBinOp(Instruction::Mul, inter_warp_index,
@ -218,7 +218,7 @@ llvm::Instruction *AddContextSave(llvm::Instruction *instruction,
"thread_idx"); "thread_idx");
gepArgs.push_back(thread_idx); gepArgs.push_back(thread_idx);
return builder.CreateStore(instruction, builder.CreateGEP(alloca, gepArgs)); return builder.CreateStore(instruction, createGEP(builder, alloca, gepArgs));
} }
llvm::Instruction *AddContextRestore(llvm::Value *val, llvm::Instruction *AddContextRestore(llvm::Value *val,
@ -242,9 +242,9 @@ llvm::Instruction *AddContextRestore(llvm::Value *val,
auto M = before->getParent()->getParent()->getParent(); auto M = before->getParent()->getParent()->getParent();
auto I32 = llvm::Type::getInt32Ty(M->getContext()); auto I32 = llvm::Type::getInt32Ty(M->getContext());
auto inter_warp_index = auto inter_warp_index =
builder.CreateLoad(M->getGlobalVariable("inter_warp_index")); createLoad(builder, M->getGlobalVariable("inter_warp_index"));
auto intra_warp_index = auto intra_warp_index =
builder.CreateLoad(M->getGlobalVariable("intra_warp_index")); createLoad(builder, M->getGlobalVariable("intra_warp_index"));
auto thread_idx = builder.CreateBinOp( auto thread_idx = builder.CreateBinOp(
Instruction::Add, intra_warp_index, Instruction::Add, intra_warp_index,
builder.CreateBinOp(Instruction::Mul, inter_warp_index, builder.CreateBinOp(Instruction::Mul, inter_warp_index,
@ -253,11 +253,11 @@ llvm::Instruction *AddContextRestore(llvm::Value *val,
gepArgs.push_back(thread_idx); gepArgs.push_back(thread_idx);
llvm::Instruction *gep = llvm::Instruction *gep =
dyn_cast<Instruction>(builder.CreateGEP(alloca, gepArgs)); dyn_cast<Instruction>(createGEP(builder, alloca, gepArgs));
if (isAlloca) { if (isAlloca) {
return gep; return gep;
} }
return builder.CreateLoad(gep); return createLoad(builder, gep);
} }
void AddContextSaveRestore(llvm::Instruction *instruction, void AddContextSaveRestore(llvm::Instruction *instruction,
@ -316,7 +316,7 @@ void handle_alloc(llvm::Function *F) {
// generate a new alloc // generate a new alloc
auto block_size_addr = M->getGlobalVariable("block_size"); auto block_size_addr = M->getGlobalVariable("block_size");
IRBuilder<> builder(inst); IRBuilder<> builder(inst);
auto block_size = builder.CreateLoad(block_size_addr); auto block_size = createLoad(builder, block_size_addr);
llvm::Type *elementType = NULL; llvm::Type *elementType = NULL;
if (dyn_cast<AllocaInst>(inst)->getType()->getElementType()) { if (dyn_cast<AllocaInst>(inst)->getType()->getElementType()) {
@ -338,16 +338,16 @@ void handle_alloc(llvm::Function *F) {
IRBuilder<> builder(user); IRBuilder<> builder(user);
// std::vector<llvm::Value *> gepArgs; // std::vector<llvm::Value *> gepArgs;
auto inter_warp_index = auto inter_warp_index =
builder.CreateLoad(M->getGlobalVariable("inter_warp_index")); createLoad(builder, M->getGlobalVariable("inter_warp_index"));
auto intra_warp_index = auto intra_warp_index =
builder.CreateLoad(M->getGlobalVariable("intra_warp_index")); createLoad(builder, M->getGlobalVariable("intra_warp_index"));
auto thread_idx = builder.CreateBinOp( auto thread_idx = builder.CreateBinOp(
Instruction::Add, intra_warp_index, Instruction::Add, intra_warp_index,
builder.CreateBinOp(Instruction::Mul, inter_warp_index, builder.CreateBinOp(Instruction::Mul, inter_warp_index,
ConstantInt::get(I32, 32)), ConstantInt::get(I32, 32)),
"thread_idx"); "thread_idx");
auto gep = builder.CreateGEP(Alloca, thread_idx); auto gep = createGEP(builder, Alloca, thread_idx);
user->replaceUsesOfWith(inst, gep); user->replaceUsesOfWith(inst, gep);
} }
@ -479,19 +479,19 @@ BasicBlock *insert_loop_cond(llvm::BasicBlock *InsertCondBefore,
auto inter_warp_index = M->getGlobalVariable("inter_warp_index"); auto inter_warp_index = M->getGlobalVariable("inter_warp_index");
auto block_size = M->getGlobalVariable("block_size"); auto block_size = M->getGlobalVariable("block_size");
auto warp_cnt = auto warp_cnt =
builder.CreateBinOp(Instruction::SDiv, builder.CreateLoad(block_size), builder.CreateBinOp(Instruction::SDiv, createLoad(builder, block_size),
ConstantInt::get(I32, 32), "warp_number"); ConstantInt::get(I32, 32), "warp_number");
cmpResult = cmpResult =
builder.CreateICmpULT(builder.CreateLoad(inter_warp_index), warp_cnt); builder.CreateICmpULT(createLoad(builder, inter_warp_index), warp_cnt);
} else { } else {
auto intra_warp_index = M->getGlobalVariable("intra_warp_index"); auto intra_warp_index = M->getGlobalVariable("intra_warp_index");
auto block_size = M->getGlobalVariable("block_size"); auto block_size = M->getGlobalVariable("block_size");
if (!need_nested_loop) { if (!need_nested_loop) {
cmpResult = builder.CreateICmpULT(builder.CreateLoad(intra_warp_index), cmpResult = builder.CreateICmpULT(createLoad(builder, intra_warp_index),
builder.CreateLoad(block_size)); createLoad(builder, block_size));
} else { } else {
cmpResult = builder.CreateICmpULT(builder.CreateLoad(intra_warp_index), cmpResult = builder.CreateICmpULT(createLoad(builder, intra_warp_index),
ConstantInt::get(I32, 32)); ConstantInt::get(I32, 32));
} }
} }
@ -513,13 +513,13 @@ BasicBlock *insert_loop_inc(llvm::BasicBlock *InsertIncBefore,
if (intra_warp_loop) { // intra warp if (intra_warp_loop) { // intra warp
auto intra_warp_index = M->getGlobalVariable("intra_warp_index"); auto intra_warp_index = M->getGlobalVariable("intra_warp_index");
auto new_index = builder.CreateBinOp( auto new_index = builder.CreateBinOp(
Instruction::Add, builder.CreateLoad(intra_warp_index), Instruction::Add, createLoad(builder, intra_warp_index),
ConstantInt::get(I32, 1), "intra_warp_index_increment"); ConstantInt::get(I32, 1), "intra_warp_index_increment");
builder.CreateStore(new_index, intra_warp_index); builder.CreateStore(new_index, intra_warp_index);
} else { // inter warp } else { // inter warp
auto inter_warp_index = M->getGlobalVariable("inter_warp_index"); auto inter_warp_index = M->getGlobalVariable("inter_warp_index");
auto new_index = builder.CreateBinOp( auto new_index = builder.CreateBinOp(
Instruction::Add, builder.CreateLoad(inter_warp_index), Instruction::Add, createLoad(builder, inter_warp_index),
ConstantInt::get(I32, 1), "inter_warp_index_increment"); ConstantInt::get(I32, 1), "inter_warp_index_increment");
builder.CreateStore(new_index, inter_warp_index); builder.CreateStore(new_index, inter_warp_index);
} }

View File

@ -99,7 +99,7 @@ void mem_share2global(llvm::Module *M) {
global_memory->setComdat(comdat); global_memory->setComdat(comdat);
global_memory->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage); global_memory->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
global_memory->setInitializer(undef); global_memory->setInitializer(undef);
global_memory->setAlignment(share_memory->getAlignment()); global_memory->setAlignment(share_memory->getAlign());
corresponding_global_memory.insert( corresponding_global_memory.insert(
std::pair<GlobalVariable *, GlobalVariable *>(share_memory, std::pair<GlobalVariable *, GlobalVariable *>(share_memory,
global_memory)); global_memory));

View File

@ -20,10 +20,11 @@
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
#include "llvm/IR/ValueSymbolTable.h" #include "llvm/IR/ValueSymbolTable.h"
#include "llvm/InitializePasses.h" #include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/PassInfo.h" #include "llvm/PassInfo.h"
#include "llvm/PassRegistry.h" #include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h"

View File

@ -1,8 +1,10 @@
#include "tool.h" #include "tool.h"
#include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h" #include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h" #include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h" #include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h" #include "llvm/IR/Instructions.h"
@ -10,9 +12,16 @@
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h" #include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h" #include "llvm/IRReader/IRReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h" #include "llvm/Transforms/Utils/ValueMapper.h"
#include <iostream> #include <iostream>
#include <set> #include <set>
@ -41,7 +50,7 @@ void DumpModule(llvm::Module *M, char *file_name) {
std::string msg; std::string msg;
llvm::raw_string_ostream os(msg); llvm::raw_string_ostream os(msg);
std::error_code EC; std::error_code EC;
ToolOutputFile Out(file_name, EC, sys::fs::F_None); ToolOutputFile Out(file_name, EC, sys::fs::OF_None);
if (EC) { if (EC) {
errs() << "Fails to open output file: " << EC.message(); errs() << "Fails to open output file: " << EC.message();
return; return;
@ -128,7 +137,7 @@ llvm::Instruction *BreakPHIToAllocas(PHINode *phi) {
} }
builder.SetInsertPoint(phi); builder.SetInsertPoint(phi);
llvm::Instruction *loadedValue = builder.CreateLoad(alloca); llvm::Instruction *loadedValue = createLoad(builder, alloca);
phi->replaceAllUsesWith(loadedValue); phi->replaceAllUsesWith(loadedValue);
phi->eraseFromParent(); phi->eraseFromParent();
@ -219,13 +228,12 @@ void replace_dynamic_shared_memory(llvm::Module *M) {
if (!dynamic_shared_memory_addr) { if (!dynamic_shared_memory_addr) {
return; return;
} }
auto load_shared_memory = auto load_shared_memory = new LoadInst(
new LoadInst(dynamic_shared_memory_addr, "new_load"); dynamic_shared_memory_addr->getType()->getPointerElementType(),
dynamic_shared_memory_addr, "new_load", &*F->begin()->begin());
auto new_bit_cast = auto new_bit_cast =
new BitCastInst(load_shared_memory, new BitCastInst(load_shared_memory,
dynamic_shared_memory_addr->getType(), "new_bit_cast"); dynamic_shared_memory_addr->getType(), "new_bit_cast");
new_bit_cast->insertBefore(&*F->begin()->begin());
load_shared_memory->insertBefore(new_bit_cast);
dynamic_shared_memory_addr->replaceUsesWithIf(new_bit_cast, [&](Use &U) { dynamic_shared_memory_addr->replaceUsesWithIf(new_bit_cast, [&](Use &U) {
auto *Instr = dyn_cast<Instruction>(U.getUser()); auto *Instr = dyn_cast<Instruction>(U.getUser());
return Instr != new_bit_cast && Instr != load_shared_memory; return Instr != new_bit_cast && Instr != load_shared_memory;
@ -281,21 +289,21 @@ void replace_built_in_function(llvm::Module *M) {
auto block_size_addr = M->getGlobalVariable("block_size_x"); auto block_size_addr = M->getGlobalVariable("block_size_x");
IRBuilder<> builder(context); IRBuilder<> builder(context);
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto val = builder.CreateLoad(block_size_addr); auto val = createLoad(builder, block_size_addr);
Call->replaceAllUsesWith(val); Call->replaceAllUsesWith(val);
need_remove.push_back(Call); need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ntid.y") { } else if (func_name == "llvm.nvvm.read.ptx.sreg.ntid.y") {
auto block_size_addr = M->getGlobalVariable("block_size_y"); auto block_size_addr = M->getGlobalVariable("block_size_y");
IRBuilder<> builder(context); IRBuilder<> builder(context);
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto val = builder.CreateLoad(block_size_addr); auto val = createLoad(builder, block_size_addr);
Call->replaceAllUsesWith(val); Call->replaceAllUsesWith(val);
need_remove.push_back(Call); need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ntid.z") { } else if (func_name == "llvm.nvvm.read.ptx.sreg.ntid.z") {
auto block_size_addr = M->getGlobalVariable("block_size_z"); auto block_size_addr = M->getGlobalVariable("block_size_z");
IRBuilder<> builder(context); IRBuilder<> builder(context);
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto val = builder.CreateLoad(block_size_addr); auto val = createLoad(builder, block_size_addr);
Call->replaceAllUsesWith(val); Call->replaceAllUsesWith(val);
need_remove.push_back(Call); need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.tid.x" || } else if (func_name == "llvm.nvvm.read.ptx.sreg.tid.x" ||
@ -307,15 +315,15 @@ void replace_built_in_function(llvm::Module *M) {
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto thread_idx = builder.CreateBinOp( auto thread_idx = builder.CreateBinOp(
Instruction::Mul, builder.CreateLoad(local_inter_warp_idx), Instruction::Mul, createLoad(builder, local_inter_warp_idx),
ConstantInt::get(I32, 32), ""); ConstantInt::get(I32, 32), "");
thread_idx = builder.CreateBinOp( thread_idx = builder.CreateBinOp(
Instruction::Add, builder.CreateLoad(local_intra_warp_idx), Instruction::Add, createLoad(builder, local_intra_warp_idx),
thread_idx, "thread_idx"); thread_idx, "thread_idx");
thread_idx = builder.CreateBinOp( thread_idx = builder.CreateBinOp(
Instruction::SRem, thread_idx, Instruction::SRem, thread_idx,
builder.CreateLoad(M->getGlobalVariable("block_size_x")), createLoad(builder, M->getGlobalVariable("block_size_x")),
"thread_id_x"); "thread_id_x");
Call->replaceAllUsesWith(thread_idx); Call->replaceAllUsesWith(thread_idx);
@ -326,15 +334,15 @@ void replace_built_in_function(llvm::Module *M) {
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto thread_idx = builder.CreateBinOp( auto thread_idx = builder.CreateBinOp(
Instruction::Mul, builder.CreateLoad(local_inter_warp_idx), Instruction::Mul, createLoad(builder, local_inter_warp_idx),
ConstantInt::get(I32, 32), ""); ConstantInt::get(I32, 32), "");
thread_idx = builder.CreateBinOp( thread_idx = builder.CreateBinOp(
Instruction::Add, builder.CreateLoad(local_intra_warp_idx), Instruction::Add, createLoad(builder, local_intra_warp_idx),
thread_idx, "thread_idx"); thread_idx, "thread_idx");
// tidy = tid / block_dim.x // tidy = tid / block_dim.x
thread_idx = builder.CreateBinOp( thread_idx = builder.CreateBinOp(
Instruction::SDiv, thread_idx, Instruction::SDiv, thread_idx,
builder.CreateLoad(M->getGlobalVariable("block_size_x")), createLoad(builder, M->getGlobalVariable("block_size_x")),
"thread_id_y"); "thread_id_y");
Call->replaceAllUsesWith(thread_idx); Call->replaceAllUsesWith(thread_idx);
need_remove.push_back(Call); need_remove.push_back(Call);
@ -350,21 +358,21 @@ void replace_built_in_function(llvm::Module *M) {
auto block_index_addr = M->getGlobalVariable("block_index_x"); auto block_index_addr = M->getGlobalVariable("block_index_x");
IRBuilder<> builder(context); IRBuilder<> builder(context);
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto block_idx = builder.CreateLoad(block_index_addr); auto block_idx = createLoad(builder, block_index_addr);
Call->replaceAllUsesWith(block_idx); Call->replaceAllUsesWith(block_idx);
need_remove.push_back(Call); need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ctaid.y") { } else if (func_name == "llvm.nvvm.read.ptx.sreg.ctaid.y") {
auto block_index_addr = M->getGlobalVariable("block_index_y"); auto block_index_addr = M->getGlobalVariable("block_index_y");
IRBuilder<> builder(context); IRBuilder<> builder(context);
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto block_idx = builder.CreateLoad(block_index_addr); auto block_idx = createLoad(builder, block_index_addr);
Call->replaceAllUsesWith(block_idx); Call->replaceAllUsesWith(block_idx);
need_remove.push_back(Call); need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ctaid.z") { } else if (func_name == "llvm.nvvm.read.ptx.sreg.ctaid.z") {
auto block_index_addr = M->getGlobalVariable("block_index_z"); auto block_index_addr = M->getGlobalVariable("block_index_z");
IRBuilder<> builder(context); IRBuilder<> builder(context);
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto block_idx = builder.CreateLoad(block_index_addr); auto block_idx = createLoad(builder, block_index_addr);
Call->replaceAllUsesWith(block_idx); Call->replaceAllUsesWith(block_idx);
need_remove.push_back(Call); need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.x" || } else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.x" ||
@ -373,21 +381,21 @@ void replace_built_in_function(llvm::Module *M) {
auto grid_size_addr = M->getGlobalVariable("grid_size_x"); auto grid_size_addr = M->getGlobalVariable("grid_size_x");
IRBuilder<> builder(context); IRBuilder<> builder(context);
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto grid_size = builder.CreateLoad(grid_size_addr); auto grid_size = createLoad(builder, grid_size_addr);
Call->replaceAllUsesWith(grid_size); Call->replaceAllUsesWith(grid_size);
need_remove.push_back(Call); need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.y") { } else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.y") {
auto grid_size_addr = M->getGlobalVariable("grid_size_y"); auto grid_size_addr = M->getGlobalVariable("grid_size_y");
IRBuilder<> builder(context); IRBuilder<> builder(context);
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto grid_size = builder.CreateLoad(grid_size_addr); auto grid_size = createLoad(builder, grid_size_addr);
Call->replaceAllUsesWith(grid_size); Call->replaceAllUsesWith(grid_size);
need_remove.push_back(Call); need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.z") { } else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.z") {
auto grid_size_addr = M->getGlobalVariable("grid_size_z"); auto grid_size_addr = M->getGlobalVariable("grid_size_z");
IRBuilder<> builder(context); IRBuilder<> builder(context);
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto grid_size = builder.CreateLoad(grid_size_addr); auto grid_size = createLoad(builder, grid_size_addr);
Call->replaceAllUsesWith(grid_size); Call->replaceAllUsesWith(grid_size);
need_remove.push_back(Call); need_remove.push_back(Call);
} }
@ -401,7 +409,7 @@ void replace_built_in_function(llvm::Module *M) {
// return the rank within the warp // return the rank within the warp
IRBuilder<> builder(context); IRBuilder<> builder(context);
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto intra_warp_index = builder.CreateLoad(local_intra_warp_idx); auto intra_warp_index = createLoad(builder, local_intra_warp_idx);
Call->replaceAllUsesWith(intra_warp_index); Call->replaceAllUsesWith(intra_warp_index);
need_remove.push_back(Call); need_remove.push_back(Call);
} }
@ -460,7 +468,9 @@ void replace_built_in_function(llvm::Module *M) {
src_alloc, // Alloca src_alloc, // Alloca
Indices, // Indices Indices, // Indices
"", Call); "", Call);
auto new_load = new LoadInst(new_GEP, "", Call); auto new_load =
new LoadInst(new_GEP->getType()->getPointerElementType(),
new_GEP, "", Call);
printf_args.push_back(new_load); printf_args.push_back(new_load);
} }
} }
@ -531,7 +541,7 @@ void replace_asm_call(llvm::Module *M) {
builder.SetInsertPoint(Call); builder.SetInsertPoint(Call);
auto intra_warp_index_addr = auto intra_warp_index_addr =
M->getGlobalVariable("intra_warp_index"); M->getGlobalVariable("intra_warp_index");
auto intra_warp_index = builder.CreateLoad(intra_warp_index_addr); auto intra_warp_index = createLoad(builder, intra_warp_index_addr);
Call->replaceAllUsesWith(intra_warp_index); Call->replaceAllUsesWith(intra_warp_index);
need_remove.push_back(Call); need_remove.push_back(Call);
} }
@ -652,20 +662,18 @@ bool find_barrier_in_region(llvm::BasicBlock *start, llvm::BasicBlock *end) {
return 0; return 0;
} }
/* LoadInst *createLoad(IRBuilder<> &B, Value *addr, bool isVolatile) {
Print IR to String Output for Debugging Purposes return B.CreateLoad(addr->getType()->getPointerElementType(), addr,
*/ isVolatile);
// void printModule(llvm::Module *M) { }
// std::string str;
// llvm::raw_string_ostream ss(str); Value *createInBoundsGEP(IRBuilder<> &B, Value *ptr,
// std::cout << "### Printing Module ###" << std::endl; ArrayRef<Value *> idxlist) {
// for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) { return B.CreateInBoundsGEP(
// Function *F = &(*i); ptr->getType()->getScalarType()->getPointerElementType(), ptr, idxlist);
// auto func_name = F->getName().str(); }
// std::cout << func_name << std::endl;
// for (Function::iterator b = F->begin(); b != F->end(); ++b) { Value *createGEP(IRBuilder<> &B, Value *ptr, ArrayRef<Value *> idxlist) {
// BasicBlock *B = &(*b); return B.CreateGEP(ptr->getType()->getScalarType()->getPointerElementType(),
// errs() << *B; ptr, idxlist);
// } }
// }
// }

View File

@ -67,7 +67,8 @@ void handle_warp_vote(llvm::Module *M) {
assert(warp_vote_ptr != NULL); assert(warp_vote_ptr != NULL);
auto intra_warp_index_addr = M->getGlobalVariable("intra_warp_index"); auto intra_warp_index_addr = M->getGlobalVariable("intra_warp_index");
auto intra_warp_index = auto intra_warp_index =
new LoadInst(intra_warp_index_addr, "intra_warp_index", sync_inst); new LoadInst(intra_warp_index_addr->getType()->getPointerElementType(),
intra_warp_index_addr, "intra_warp_index", sync_inst);
auto GEP = GetElementPtrInst::Create(NULL, // Pointee type auto GEP = GetElementPtrInst::Create(NULL, // Pointee type
warp_vote_ptr, // Alloca warp_vote_ptr, // Alloca
@ -168,23 +169,22 @@ void handle_warp_shfl(llvm::Module *M) {
auto shfl_offset = shfl_inst->getArgOperand(2); auto shfl_offset = shfl_inst->getArgOperand(2);
auto intra_warp_index = auto intra_warp_index =
builder.CreateLoad(M->getGlobalVariable("intra_warp_index")); createLoad(builder, M->getGlobalVariable("intra_warp_index"));
builder.CreateStore( builder.CreateStore(shfl_variable, createGEP(builder, warp_shfl_ptr,
shfl_variable, {ZERO, intra_warp_index}));
builder.CreateGEP(warp_shfl_ptr, {ZERO, intra_warp_index}));
// we should create barrier before store // we should create barrier before store
CreateIntraWarpBarrier(intra_warp_index); CreateIntraWarpBarrier(intra_warp_index);
// load shuffled data // load shuffled data
auto new_intra_warp_index = auto new_intra_warp_index =
builder.CreateLoad(M->getGlobalVariable("intra_warp_index")); createLoad(builder, M->getGlobalVariable("intra_warp_index"));
auto shfl_name = shfl_inst->getCalledFunction()->getName().str(); auto shfl_name = shfl_inst->getCalledFunction()->getName().str();
if (shfl_name.find("down") != shfl_name.npos) { if (shfl_name.find("down") != shfl_name.npos) {
auto calculate_offset = builder.CreateBinOp( auto calculate_offset = builder.CreateBinOp(
Instruction::Add, new_intra_warp_index, shfl_offset); Instruction::Add, new_intra_warp_index, shfl_offset);
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset, auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
ConstantInt::get(I32, 32)); ConstantInt::get(I32, 32));
auto gep = builder.CreateGEP(warp_shfl_ptr, {ZERO, new_index}); auto gep = createGEP(builder, warp_shfl_ptr, {ZERO, new_index});
auto load_inst = builder.CreateLoad(gep); auto load_inst = createLoad(builder, gep);
// create barrier // create barrier
CreateIntraWarpBarrier(new_intra_warp_index); CreateIntraWarpBarrier(new_intra_warp_index);
@ -195,8 +195,8 @@ void handle_warp_shfl(llvm::Module *M) {
Instruction::Sub, new_intra_warp_index, shfl_offset); Instruction::Sub, new_intra_warp_index, shfl_offset);
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset, auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
ConstantInt::get(I32, 32)); ConstantInt::get(I32, 32));
auto gep = builder.CreateGEP(warp_shfl_ptr, {ZERO, new_index}); auto gep = createGEP(builder, warp_shfl_ptr, {ZERO, new_index});
auto load_inst = builder.CreateLoad(gep); auto load_inst = createLoad(builder, gep);
// create barrier // create barrier
CreateIntraWarpBarrier(new_intra_warp_index); CreateIntraWarpBarrier(new_intra_warp_index);
@ -207,8 +207,8 @@ void handle_warp_shfl(llvm::Module *M) {
Instruction::Xor, new_intra_warp_index, shfl_offset); Instruction::Xor, new_intra_warp_index, shfl_offset);
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset, auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
ConstantInt::get(I32, 32)); ConstantInt::get(I32, 32));
auto gep = builder.CreateGEP(warp_shfl_ptr, {ZERO, new_index}); auto gep = createGEP(builder, warp_shfl_ptr, {ZERO, new_index});
auto load_inst = builder.CreateLoad(gep); auto load_inst = createLoad(builder, gep);
// create barrier // create barrier
CreateIntraWarpBarrier(new_intra_warp_index); CreateIntraWarpBarrier(new_intra_warp_index);

View File

@ -10,7 +10,9 @@ set(CMAKE_VERBOSE_MAKEFILE ON)
add_subdirectory(threadPool) add_subdirectory(threadPool)
# compile x86 runtime library # compile x86 runtime library
include_directories(./include/)
include_directories(./include/x86) include_directories(./include/x86)
include_directories(./threadPool/include/)
include_directories(./threadPool/include/x86) include_directories(./threadPool/include/x86)
file(GLOB proj_SOURCES "src/vortex/*.cpp") file(GLOB proj_SOURCES "src/x86/*.cpp")
add_library(${LIB_NAME} SHARED ${proj_SOURCES}) add_library(${LIB_NAME} SHARED ${proj_SOURCES})

View File

@ -9,13 +9,31 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
cudaError_t cudaGetDevice(int *devPtr) { *devPtr = 0; } cudaError_t cudaGetDevice(int *devPtr) {
*devPtr = 0;
return cudaSuccess;
}
const char *cudaGetErrorName(cudaError_t error) { return "SUCCESS\n"; } const char *cudaGetErrorName(cudaError_t error) { return "SUCCESS\n"; }
cudaError_t cudaDeviceReset(void) { scheduler_uninit(); } cudaError_t cudaDeviceReset(void) {
cudaError_t cudaDeviceSynchronize(void) { cuSynchronizeBarrier(); } scheduler_uninit();
cudaError_t cudaThreadSynchronize(void) { cuSynchronizeBarrier(); } return cudaSuccess;
cudaError_t cudaFree(void *devPtr) { free(devPtr); } }
cudaError_t cudaFreeHost(void *devPtr) { free(devPtr); } cudaError_t cudaDeviceSynchronize(void) {
cuSynchronizeBarrier();
return cudaSuccess;
}
cudaError_t cudaThreadSynchronize(void) {
cuSynchronizeBarrier();
return cudaSuccess;
}
cudaError_t cudaFree(void *devPtr) {
free(devPtr);
return cudaSuccess;
}
cudaError_t cudaFreeHost(void *devPtr) {
free(devPtr);
return cudaSuccess;
}
cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
void **args, size_t sharedMem, void **args, size_t sharedMem,
@ -31,7 +49,7 @@ cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
int lstatus = cuLaunchKernel(&ker); int lstatus = cuLaunchKernel(&ker);
// std::cout << "ret cudaLKernel" << std::endl; return cudaSuccess;
} }
cudaError_t cudaMalloc(void **devPtr, size_t size) { cudaError_t cudaMalloc(void **devPtr, size_t size) {
*devPtr = malloc(size); *devPtr = malloc(size);
@ -68,15 +86,13 @@ cudaError_t cudaMemcpy(void *dst, const void *src, size_t count,
cudaError_t cudaMemcpyToSymbol_host(void *dst, const void *src, size_t count, cudaError_t cudaMemcpyToSymbol_host(void *dst, const void *src, size_t count,
size_t offset, cudaMemcpyKind kind) { size_t offset, cudaMemcpyKind kind) {
assert(offset == 0 && "DO not support offset !=0\n"); assert(offset == 0 && "DO not support offset !=0\n");
memcpy(dst, src + offset, count); memcpy(dst, (char *)src + offset, count);
return cudaSuccess; return cudaSuccess;
} }
cudaError_t cudaSetDevice(int device) { cudaError_t cudaSetDevice(int device) {
// error checking
// std::cout << "cudaSetDevice Called" << std::endl;
init_device(); init_device();
// std::cout << "cudaSetDevice Ret" << std::endl; return cudaSuccess;
} }
cudaError_t cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) { cudaError_t cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) {

View File

@ -11,7 +11,8 @@ set(LIB_NAME threadPool)
set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE Debug) set(CMAKE_BUILD_TYPE Debug)
include_directories(./include)
include_directories(./include/x86) include_directories(./include/x86)
file(GLOB proj_SOURCES "src/vortex/*.cpp") file(GLOB proj_SOURCES "src/x86/*.cpp")
add_library(${LIB_NAME} SHARED ${proj_SOURCES}) add_library(${LIB_NAME} SHARED ${proj_SOURCES})

View File

@ -29,7 +29,6 @@ int init_device() {
// initialize scheduler // initialize scheduler
int ret = scheduler_init(*device); int ret = scheduler_init(*device);
if (ret != C_SUCCESS) if (ret != C_SUCCESS)
return ret; return ret;
@ -182,6 +181,7 @@ int schedulerEnqueueKernel(cu_kernel **k) {
pthread_cond_broadcast(&(scheduler->wake_pool)); pthread_cond_broadcast(&(scheduler->wake_pool));
MUTEX_UNLOCK(scheduler->work_queue_lock); MUTEX_UNLOCK(scheduler->work_queue_lock);
return 0;
} }
/* /*
@ -191,6 +191,7 @@ int cuLaunchKernel(cu_kernel **k) {
if (!scheduler) { if (!scheduler) {
init_device(); init_device();
} }
std::cout << "launch\n" << std::flush;
// Calculate Block Size N/numBlocks // Calculate Block Size N/numBlocks
cu_kernel *ker = *k; cu_kernel *ker = *k;
@ -238,6 +239,7 @@ int cuLaunchKernel(cu_kernel **k) {
MUTEX_UNLOCK(((cstreamData *)(ker->stream))->stream_lock); MUTEX_UNLOCK(((cstreamData *)(ker->stream))->stream_lock);
} }
} }
return 0;
} }
/* /*
@ -346,7 +348,8 @@ RETRY:
grid_size_x = gridDim.x; grid_size_x = gridDim.x;
grid_size_y = gridDim.y; grid_size_y = gridDim.y;
grid_size_z = gridDim.z; grid_size_z = gridDim.z;
dynamic_shared_memory = (int *)malloc(dynamic_shared_mem_size); if (dynamic_shared_mem_size > 0)
dynamic_shared_memory = (int *)malloc(dynamic_shared_mem_size);
int tmp = block_index; int tmp = block_index;
block_index_x = tmp / (grid_size_y * grid_size_z); block_index_x = tmp / (grid_size_y * grid_size_z);
tmp = tmp % (grid_size_y * grid_size_z); tmp = tmp % (grid_size_y * grid_size_z);