[WIP] migriate to LLVM14

This commit is contained in:
RobinHan 2022-06-17 22:20:13 -04:00
parent bcdcccecc9
commit d7668ccd86
14 changed files with 163 additions and 100 deletions

View File

@ -8,6 +8,7 @@
#include "llvm/Support/ToolOutputFile.h"
#include <iostream>
#include <map>
#include <regex>
#include <set>
using namespace llvm;
@ -200,8 +201,23 @@ void ReplaceKernelLaunch(llvm::Module *M) {
prior name before _host is add
*/
std::string oldName = functionOperand->getName().str();
// For LLVM>=14, it will add _device_stub prefix for the kernel
// name, thus, we need to remove the prefix
// example:
// from: _Z24__device_stub__HistogramPjS_jj
// to: HistogramPjS_jj
oldName = std::regex_replace(oldName,
std::regex("__device_stub__"), "");
// remove _Z24
for (int i = 2; i < oldName.length(); i++) {
if (oldName[i] >= '0' && oldName[i] <= '9')
continue;
oldName = oldName.substr(i);
break;
}
// if parent function is __host and same as the cudaKernelLaunch
// if parent function is __host and same as the
// cudaKernelLaunch
std::string newName = oldName + "_wrapper";
if (func_name == oldName && host_changed &&
oldName.find("_host") != std::string::npos) {
@ -220,12 +236,11 @@ void ReplaceKernelLaunch(llvm::Module *M) {
kernels.insert({functionOperand->getName().str(), F});
}
} else if (cuda_register_kernel_names.find(
calledFunction->getName()) !=
calledFunction->getName().str()) !=
cuda_register_kernel_names.end()) {
// if the called function collides with kernel definiton
// TODO: some reason changes all occurences of the function name
// for both cudaKernelLaunch calls and regular function call
// errs() << *inst;
host_changed = true;
calledFunction->setName(calledFunction->getName() + "_host");
std::cout << std::endl;

View File

@ -18,4 +18,4 @@ file(GLOB proj_HEADERS "include/x86/*.h")
file(GLOB proj_SOURCES "src/x86/*.cpp")
# Add core library.
add_library(${LIB_NAME} SHARED ${proj_HEADERS} ${proj_SOURCES})
add_library(${LIB_NAME} STATIC ${proj_HEADERS} ${proj_SOURCES})

View File

@ -1,6 +1,8 @@
#ifndef __NVVM2x86_TOOL__
#define __NVVM2x86_TOOL__
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
llvm::Module *LoadModuleFromFilr(char *file_name);
void DumpModule(llvm::Module *M, char *file_name);
@ -22,4 +24,10 @@ bool has_barrier(llvm::BasicBlock *B);
bool has_block_barrier(llvm::BasicBlock *B);
bool has_barrier(llvm::Function *F);
void replace_dynamic_shared_memory(llvm::Module *M);
llvm::LoadInst *createLoad(llvm::IRBuilder<> &B, llvm::Value *addr,
bool isVolatile = false);
llvm::Value *createInBoundsGEP(llvm::IRBuilder<> &B, llvm::Value *ptr,
llvm::ArrayRef<llvm::Value *> idxlist);
llvm::Value *createGEP(llvm::IRBuilder<> &B, llvm::Value *ptr,
llvm::ArrayRef<llvm::Value *> idxlist);
#endif

View File

@ -11,7 +11,7 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Target/TargetMachine.h"
@ -19,6 +19,7 @@
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <iostream>
#include <map>
using namespace llvm;
@ -52,6 +53,14 @@ void decode_input(llvm::Module *M) {
if (!isKernelFunction(M, F))
continue;
auto func_name = F->getName().str();
// remove mangle prefix
// remove _Z24
for (int pos = 2; pos < func_name.length(); pos++) {
if (func_name[pos] >= '0' && func_name[pos] <= '9')
continue;
func_name = func_name.substr(pos);
break;
}
llvm::IRBuilder<> Builder(M->getContext());
FunctionCallee fc =
@ -78,7 +87,7 @@ void decode_input(llvm::Module *M) {
*M, Int32T, false, llvm::GlobalValue::ExternalLinkage, NULL,
"thread_memory_size", NULL, llvm::GlobalValue::GeneralDynamicTLSModel,
0, false);
Value *loadedValue = Builder.CreateLoad(global_mem);
Value *loadedValue = createLoad(Builder, global_mem);
llvm::FunctionType *LaunchFun2 = FunctionType::get(
PointerType::get(PointerType::get(Int32T, 0), 0), NULL);
@ -120,12 +129,12 @@ void decode_input(llvm::Module *M) {
Type *ArgType = ii->getType();
// calculate addr
Value *GEP = Builder.CreateGEP(input_arg, ConstantInt::get(Int32T, idx));
Value *GEP = createGEP(Builder, input_arg, ConstantInt::get(Int32T, idx));
// load corresponding int*
GEP = Builder.CreateLoad(GEP);
GEP = createLoad(Builder, GEP);
// bitcast
GEP = Builder.CreateBitOrPointerCast(GEP, PointerType::get(ArgType, 0));
Value *Arg = Builder.CreateLoad(GEP);
Value *Arg = createLoad(Builder, GEP);
Arguments.push_back(Arg);
++idx;
}

View File

@ -42,7 +42,7 @@ bool inline_warp_level_func(llvm::Module *M) {
if (func_name == "_Z10__any_syncji" ||
func_name.find("shfl_down_sync") != std::string::npos) {
InlineFunctionInfo IFI;
InlineFunction(c, IFI);
InlineFunction(*c, IFI);
need_remove.insert(c->getCalledFunction());
changed = true;
}
@ -102,7 +102,7 @@ bool inline_func_with_tid(llvm::Module *M) {
}
for (auto c : need_inline) {
InlineFunctionInfo IFI;
InlineFunction(c, IFI);
InlineFunction(*c, IFI);
}
return changed;
}

View File

@ -179,7 +179,7 @@ llvm::Instruction *GetContextArray(llvm::Instruction *instruction,
llvm::AllocaInst *Alloca = nullptr;
auto block_size_addr = M->getGlobalVariable("block_size");
auto block_size = builder.CreateLoad(block_size_addr);
auto block_size = createLoad(builder, block_size_addr);
Alloca = builder.CreateAlloca(AllocType, block_size, varName);
contextArrays[varName] = Alloca;
@ -208,9 +208,9 @@ llvm::Instruction *AddContextSave(llvm::Instruction *instruction,
std::vector<llvm::Value *> gepArgs;
auto inter_warp_index =
builder.CreateLoad(M->getGlobalVariable("inter_warp_index"));
createLoad(builder, M->getGlobalVariable("inter_warp_index"));
auto intra_warp_index =
builder.CreateLoad(M->getGlobalVariable("intra_warp_index"));
createLoad(builder, M->getGlobalVariable("intra_warp_index"));
auto thread_idx = builder.CreateBinOp(
Instruction::Add, intra_warp_index,
builder.CreateBinOp(Instruction::Mul, inter_warp_index,
@ -218,7 +218,7 @@ llvm::Instruction *AddContextSave(llvm::Instruction *instruction,
"thread_idx");
gepArgs.push_back(thread_idx);
return builder.CreateStore(instruction, builder.CreateGEP(alloca, gepArgs));
return builder.CreateStore(instruction, createGEP(builder, alloca, gepArgs));
}
llvm::Instruction *AddContextRestore(llvm::Value *val,
@ -242,9 +242,9 @@ llvm::Instruction *AddContextRestore(llvm::Value *val,
auto M = before->getParent()->getParent()->getParent();
auto I32 = llvm::Type::getInt32Ty(M->getContext());
auto inter_warp_index =
builder.CreateLoad(M->getGlobalVariable("inter_warp_index"));
createLoad(builder, M->getGlobalVariable("inter_warp_index"));
auto intra_warp_index =
builder.CreateLoad(M->getGlobalVariable("intra_warp_index"));
createLoad(builder, M->getGlobalVariable("intra_warp_index"));
auto thread_idx = builder.CreateBinOp(
Instruction::Add, intra_warp_index,
builder.CreateBinOp(Instruction::Mul, inter_warp_index,
@ -253,11 +253,11 @@ llvm::Instruction *AddContextRestore(llvm::Value *val,
gepArgs.push_back(thread_idx);
llvm::Instruction *gep =
dyn_cast<Instruction>(builder.CreateGEP(alloca, gepArgs));
dyn_cast<Instruction>(createGEP(builder, alloca, gepArgs));
if (isAlloca) {
return gep;
}
return builder.CreateLoad(gep);
return createLoad(builder, gep);
}
void AddContextSaveRestore(llvm::Instruction *instruction,
@ -316,7 +316,7 @@ void handle_alloc(llvm::Function *F) {
// generate a new alloc
auto block_size_addr = M->getGlobalVariable("block_size");
IRBuilder<> builder(inst);
auto block_size = builder.CreateLoad(block_size_addr);
auto block_size = createLoad(builder, block_size_addr);
llvm::Type *elementType = NULL;
if (dyn_cast<AllocaInst>(inst)->getType()->getElementType()) {
@ -338,16 +338,16 @@ void handle_alloc(llvm::Function *F) {
IRBuilder<> builder(user);
// std::vector<llvm::Value *> gepArgs;
auto inter_warp_index =
builder.CreateLoad(M->getGlobalVariable("inter_warp_index"));
createLoad(builder, M->getGlobalVariable("inter_warp_index"));
auto intra_warp_index =
builder.CreateLoad(M->getGlobalVariable("intra_warp_index"));
createLoad(builder, M->getGlobalVariable("intra_warp_index"));
auto thread_idx = builder.CreateBinOp(
Instruction::Add, intra_warp_index,
builder.CreateBinOp(Instruction::Mul, inter_warp_index,
ConstantInt::get(I32, 32)),
"thread_idx");
auto gep = builder.CreateGEP(Alloca, thread_idx);
auto gep = createGEP(builder, Alloca, thread_idx);
user->replaceUsesOfWith(inst, gep);
}
@ -479,19 +479,19 @@ BasicBlock *insert_loop_cond(llvm::BasicBlock *InsertCondBefore,
auto inter_warp_index = M->getGlobalVariable("inter_warp_index");
auto block_size = M->getGlobalVariable("block_size");
auto warp_cnt =
builder.CreateBinOp(Instruction::SDiv, builder.CreateLoad(block_size),
builder.CreateBinOp(Instruction::SDiv, createLoad(builder, block_size),
ConstantInt::get(I32, 32), "warp_number");
cmpResult =
builder.CreateICmpULT(builder.CreateLoad(inter_warp_index), warp_cnt);
builder.CreateICmpULT(createLoad(builder, inter_warp_index), warp_cnt);
} else {
auto intra_warp_index = M->getGlobalVariable("intra_warp_index");
auto block_size = M->getGlobalVariable("block_size");
if (!need_nested_loop) {
cmpResult = builder.CreateICmpULT(builder.CreateLoad(intra_warp_index),
builder.CreateLoad(block_size));
cmpResult = builder.CreateICmpULT(createLoad(builder, intra_warp_index),
createLoad(builder, block_size));
} else {
cmpResult = builder.CreateICmpULT(builder.CreateLoad(intra_warp_index),
cmpResult = builder.CreateICmpULT(createLoad(builder, intra_warp_index),
ConstantInt::get(I32, 32));
}
}
@ -513,13 +513,13 @@ BasicBlock *insert_loop_inc(llvm::BasicBlock *InsertIncBefore,
if (intra_warp_loop) { // intra warp
auto intra_warp_index = M->getGlobalVariable("intra_warp_index");
auto new_index = builder.CreateBinOp(
Instruction::Add, builder.CreateLoad(intra_warp_index),
Instruction::Add, createLoad(builder, intra_warp_index),
ConstantInt::get(I32, 1), "intra_warp_index_increment");
builder.CreateStore(new_index, intra_warp_index);
} else { // inter warp
auto inter_warp_index = M->getGlobalVariable("inter_warp_index");
auto new_index = builder.CreateBinOp(
Instruction::Add, builder.CreateLoad(inter_warp_index),
Instruction::Add, createLoad(builder, inter_warp_index),
ConstantInt::get(I32, 1), "inter_warp_index_increment");
builder.CreateStore(new_index, inter_warp_index);
}

View File

@ -99,7 +99,7 @@ void mem_share2global(llvm::Module *M) {
global_memory->setComdat(comdat);
global_memory->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
global_memory->setInitializer(undef);
global_memory->setAlignment(share_memory->getAlignment());
global_memory->setAlignment(share_memory->getAlign());
corresponding_global_memory.insert(
std::pair<GlobalVariable *, GlobalVariable *>(share_memory,
global_memory));

View File

@ -20,10 +20,11 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/ValueSymbolTable.h"
#include "llvm/InitializePasses.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/PassInfo.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/Host.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Transforms/IPO/PassManagerBuilder.h"

View File

@ -1,8 +1,10 @@
#include "tool.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
@ -10,9 +12,16 @@
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ToolOutputFile.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <iostream>
#include <set>
@ -41,7 +50,7 @@ void DumpModule(llvm::Module *M, char *file_name) {
std::string msg;
llvm::raw_string_ostream os(msg);
std::error_code EC;
ToolOutputFile Out(file_name, EC, sys::fs::F_None);
ToolOutputFile Out(file_name, EC, sys::fs::OF_None);
if (EC) {
errs() << "Fails to open output file: " << EC.message();
return;
@ -128,7 +137,7 @@ llvm::Instruction *BreakPHIToAllocas(PHINode *phi) {
}
builder.SetInsertPoint(phi);
llvm::Instruction *loadedValue = builder.CreateLoad(alloca);
llvm::Instruction *loadedValue = createLoad(builder, alloca);
phi->replaceAllUsesWith(loadedValue);
phi->eraseFromParent();
@ -219,13 +228,12 @@ void replace_dynamic_shared_memory(llvm::Module *M) {
if (!dynamic_shared_memory_addr) {
return;
}
auto load_shared_memory =
new LoadInst(dynamic_shared_memory_addr, "new_load");
auto load_shared_memory = new LoadInst(
dynamic_shared_memory_addr->getType()->getPointerElementType(),
dynamic_shared_memory_addr, "new_load", &*F->begin()->begin());
auto new_bit_cast =
new BitCastInst(load_shared_memory,
dynamic_shared_memory_addr->getType(), "new_bit_cast");
new_bit_cast->insertBefore(&*F->begin()->begin());
load_shared_memory->insertBefore(new_bit_cast);
dynamic_shared_memory_addr->replaceUsesWithIf(new_bit_cast, [&](Use &U) {
auto *Instr = dyn_cast<Instruction>(U.getUser());
return Instr != new_bit_cast && Instr != load_shared_memory;
@ -281,21 +289,21 @@ void replace_built_in_function(llvm::Module *M) {
auto block_size_addr = M->getGlobalVariable("block_size_x");
IRBuilder<> builder(context);
builder.SetInsertPoint(Call);
auto val = builder.CreateLoad(block_size_addr);
auto val = createLoad(builder, block_size_addr);
Call->replaceAllUsesWith(val);
need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ntid.y") {
auto block_size_addr = M->getGlobalVariable("block_size_y");
IRBuilder<> builder(context);
builder.SetInsertPoint(Call);
auto val = builder.CreateLoad(block_size_addr);
auto val = createLoad(builder, block_size_addr);
Call->replaceAllUsesWith(val);
need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ntid.z") {
auto block_size_addr = M->getGlobalVariable("block_size_z");
IRBuilder<> builder(context);
builder.SetInsertPoint(Call);
auto val = builder.CreateLoad(block_size_addr);
auto val = createLoad(builder, block_size_addr);
Call->replaceAllUsesWith(val);
need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.tid.x" ||
@ -307,15 +315,15 @@ void replace_built_in_function(llvm::Module *M) {
builder.SetInsertPoint(Call);
auto thread_idx = builder.CreateBinOp(
Instruction::Mul, builder.CreateLoad(local_inter_warp_idx),
Instruction::Mul, createLoad(builder, local_inter_warp_idx),
ConstantInt::get(I32, 32), "");
thread_idx = builder.CreateBinOp(
Instruction::Add, builder.CreateLoad(local_intra_warp_idx),
Instruction::Add, createLoad(builder, local_intra_warp_idx),
thread_idx, "thread_idx");
thread_idx = builder.CreateBinOp(
Instruction::SRem, thread_idx,
builder.CreateLoad(M->getGlobalVariable("block_size_x")),
createLoad(builder, M->getGlobalVariable("block_size_x")),
"thread_id_x");
Call->replaceAllUsesWith(thread_idx);
@ -326,15 +334,15 @@ void replace_built_in_function(llvm::Module *M) {
builder.SetInsertPoint(Call);
auto thread_idx = builder.CreateBinOp(
Instruction::Mul, builder.CreateLoad(local_inter_warp_idx),
Instruction::Mul, createLoad(builder, local_inter_warp_idx),
ConstantInt::get(I32, 32), "");
thread_idx = builder.CreateBinOp(
Instruction::Add, builder.CreateLoad(local_intra_warp_idx),
Instruction::Add, createLoad(builder, local_intra_warp_idx),
thread_idx, "thread_idx");
// tidy = tid / block_dim.x
thread_idx = builder.CreateBinOp(
Instruction::SDiv, thread_idx,
builder.CreateLoad(M->getGlobalVariable("block_size_x")),
createLoad(builder, M->getGlobalVariable("block_size_x")),
"thread_id_y");
Call->replaceAllUsesWith(thread_idx);
need_remove.push_back(Call);
@ -350,21 +358,21 @@ void replace_built_in_function(llvm::Module *M) {
auto block_index_addr = M->getGlobalVariable("block_index_x");
IRBuilder<> builder(context);
builder.SetInsertPoint(Call);
auto block_idx = builder.CreateLoad(block_index_addr);
auto block_idx = createLoad(builder, block_index_addr);
Call->replaceAllUsesWith(block_idx);
need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ctaid.y") {
auto block_index_addr = M->getGlobalVariable("block_index_y");
IRBuilder<> builder(context);
builder.SetInsertPoint(Call);
auto block_idx = builder.CreateLoad(block_index_addr);
auto block_idx = createLoad(builder, block_index_addr);
Call->replaceAllUsesWith(block_idx);
need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ctaid.z") {
auto block_index_addr = M->getGlobalVariable("block_index_z");
IRBuilder<> builder(context);
builder.SetInsertPoint(Call);
auto block_idx = builder.CreateLoad(block_index_addr);
auto block_idx = createLoad(builder, block_index_addr);
Call->replaceAllUsesWith(block_idx);
need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.x" ||
@ -373,21 +381,21 @@ void replace_built_in_function(llvm::Module *M) {
auto grid_size_addr = M->getGlobalVariable("grid_size_x");
IRBuilder<> builder(context);
builder.SetInsertPoint(Call);
auto grid_size = builder.CreateLoad(grid_size_addr);
auto grid_size = createLoad(builder, grid_size_addr);
Call->replaceAllUsesWith(grid_size);
need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.y") {
auto grid_size_addr = M->getGlobalVariable("grid_size_y");
IRBuilder<> builder(context);
builder.SetInsertPoint(Call);
auto grid_size = builder.CreateLoad(grid_size_addr);
auto grid_size = createLoad(builder, grid_size_addr);
Call->replaceAllUsesWith(grid_size);
need_remove.push_back(Call);
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.z") {
auto grid_size_addr = M->getGlobalVariable("grid_size_z");
IRBuilder<> builder(context);
builder.SetInsertPoint(Call);
auto grid_size = builder.CreateLoad(grid_size_addr);
auto grid_size = createLoad(builder, grid_size_addr);
Call->replaceAllUsesWith(grid_size);
need_remove.push_back(Call);
}
@ -401,7 +409,7 @@ void replace_built_in_function(llvm::Module *M) {
// return the rank within the warp
IRBuilder<> builder(context);
builder.SetInsertPoint(Call);
auto intra_warp_index = builder.CreateLoad(local_intra_warp_idx);
auto intra_warp_index = createLoad(builder, local_intra_warp_idx);
Call->replaceAllUsesWith(intra_warp_index);
need_remove.push_back(Call);
}
@ -460,7 +468,9 @@ void replace_built_in_function(llvm::Module *M) {
src_alloc, // Alloca
Indices, // Indices
"", Call);
auto new_load = new LoadInst(new_GEP, "", Call);
auto new_load =
new LoadInst(new_GEP->getType()->getPointerElementType(),
new_GEP, "", Call);
printf_args.push_back(new_load);
}
}
@ -531,7 +541,7 @@ void replace_asm_call(llvm::Module *M) {
builder.SetInsertPoint(Call);
auto intra_warp_index_addr =
M->getGlobalVariable("intra_warp_index");
auto intra_warp_index = builder.CreateLoad(intra_warp_index_addr);
auto intra_warp_index = createLoad(builder, intra_warp_index_addr);
Call->replaceAllUsesWith(intra_warp_index);
need_remove.push_back(Call);
}
@ -652,20 +662,18 @@ bool find_barrier_in_region(llvm::BasicBlock *start, llvm::BasicBlock *end) {
return 0;
}
/*
Print IR to String Output for Debugging Purposes
*/
// void printModule(llvm::Module *M) {
// std::string str;
// llvm::raw_string_ostream ss(str);
// std::cout << "### Printing Module ###" << std::endl;
// for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) {
// Function *F = &(*i);
// auto func_name = F->getName().str();
// std::cout << func_name << std::endl;
// for (Function::iterator b = F->begin(); b != F->end(); ++b) {
// BasicBlock *B = &(*b);
// errs() << *B;
// }
// }
// }
LoadInst *createLoad(IRBuilder<> &B, Value *addr, bool isVolatile) {
return B.CreateLoad(addr->getType()->getPointerElementType(), addr,
isVolatile);
}
Value *createInBoundsGEP(IRBuilder<> &B, Value *ptr,
ArrayRef<Value *> idxlist) {
return B.CreateInBoundsGEP(
ptr->getType()->getScalarType()->getPointerElementType(), ptr, idxlist);
}
Value *createGEP(IRBuilder<> &B, Value *ptr, ArrayRef<Value *> idxlist) {
return B.CreateGEP(ptr->getType()->getScalarType()->getPointerElementType(),
ptr, idxlist);
}

View File

@ -67,7 +67,8 @@ void handle_warp_vote(llvm::Module *M) {
assert(warp_vote_ptr != NULL);
auto intra_warp_index_addr = M->getGlobalVariable("intra_warp_index");
auto intra_warp_index =
new LoadInst(intra_warp_index_addr, "intra_warp_index", sync_inst);
new LoadInst(intra_warp_index_addr->getType()->getPointerElementType(),
intra_warp_index_addr, "intra_warp_index", sync_inst);
auto GEP = GetElementPtrInst::Create(NULL, // Pointee type
warp_vote_ptr, // Alloca
@ -168,23 +169,22 @@ void handle_warp_shfl(llvm::Module *M) {
auto shfl_offset = shfl_inst->getArgOperand(2);
auto intra_warp_index =
builder.CreateLoad(M->getGlobalVariable("intra_warp_index"));
builder.CreateStore(
shfl_variable,
builder.CreateGEP(warp_shfl_ptr, {ZERO, intra_warp_index}));
createLoad(builder, M->getGlobalVariable("intra_warp_index"));
builder.CreateStore(shfl_variable, createGEP(builder, warp_shfl_ptr,
{ZERO, intra_warp_index}));
// we should create barrier before store
CreateIntraWarpBarrier(intra_warp_index);
// load shuffled data
auto new_intra_warp_index =
builder.CreateLoad(M->getGlobalVariable("intra_warp_index"));
createLoad(builder, M->getGlobalVariable("intra_warp_index"));
auto shfl_name = shfl_inst->getCalledFunction()->getName().str();
if (shfl_name.find("down") != shfl_name.npos) {
auto calculate_offset = builder.CreateBinOp(
Instruction::Add, new_intra_warp_index, shfl_offset);
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
ConstantInt::get(I32, 32));
auto gep = builder.CreateGEP(warp_shfl_ptr, {ZERO, new_index});
auto load_inst = builder.CreateLoad(gep);
auto gep = createGEP(builder, warp_shfl_ptr, {ZERO, new_index});
auto load_inst = createLoad(builder, gep);
// create barrier
CreateIntraWarpBarrier(new_intra_warp_index);
@ -195,8 +195,8 @@ void handle_warp_shfl(llvm::Module *M) {
Instruction::Sub, new_intra_warp_index, shfl_offset);
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
ConstantInt::get(I32, 32));
auto gep = builder.CreateGEP(warp_shfl_ptr, {ZERO, new_index});
auto load_inst = builder.CreateLoad(gep);
auto gep = createGEP(builder, warp_shfl_ptr, {ZERO, new_index});
auto load_inst = createLoad(builder, gep);
// create barrier
CreateIntraWarpBarrier(new_intra_warp_index);
@ -207,8 +207,8 @@ void handle_warp_shfl(llvm::Module *M) {
Instruction::Xor, new_intra_warp_index, shfl_offset);
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
ConstantInt::get(I32, 32));
auto gep = builder.CreateGEP(warp_shfl_ptr, {ZERO, new_index});
auto load_inst = builder.CreateLoad(gep);
auto gep = createGEP(builder, warp_shfl_ptr, {ZERO, new_index});
auto load_inst = createLoad(builder, gep);
// create barrier
CreateIntraWarpBarrier(new_intra_warp_index);

View File

@ -10,7 +10,9 @@ set(CMAKE_VERBOSE_MAKEFILE ON)
add_subdirectory(threadPool)
# compile x86 runtime library
include_directories(./include/)
include_directories(./include/x86)
include_directories(./threadPool/include/)
include_directories(./threadPool/include/x86)
file(GLOB proj_SOURCES "src/vortex/*.cpp")
file(GLOB proj_SOURCES "src/x86/*.cpp")
add_library(${LIB_NAME} SHARED ${proj_SOURCES})

View File

@ -9,13 +9,31 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
cudaError_t cudaGetDevice(int *devPtr) { *devPtr = 0; }
cudaError_t cudaGetDevice(int *devPtr) {
*devPtr = 0;
return cudaSuccess;
}
const char *cudaGetErrorName(cudaError_t error) { return "SUCCESS\n"; }
cudaError_t cudaDeviceReset(void) { scheduler_uninit(); }
cudaError_t cudaDeviceSynchronize(void) { cuSynchronizeBarrier(); }
cudaError_t cudaThreadSynchronize(void) { cuSynchronizeBarrier(); }
cudaError_t cudaFree(void *devPtr) { free(devPtr); }
cudaError_t cudaFreeHost(void *devPtr) { free(devPtr); }
cudaError_t cudaDeviceReset(void) {
scheduler_uninit();
return cudaSuccess;
}
cudaError_t cudaDeviceSynchronize(void) {
cuSynchronizeBarrier();
return cudaSuccess;
}
cudaError_t cudaThreadSynchronize(void) {
cuSynchronizeBarrier();
return cudaSuccess;
}
cudaError_t cudaFree(void *devPtr) {
free(devPtr);
return cudaSuccess;
}
cudaError_t cudaFreeHost(void *devPtr) {
free(devPtr);
return cudaSuccess;
}
cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
void **args, size_t sharedMem,
@ -31,7 +49,7 @@ cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
int lstatus = cuLaunchKernel(&ker);
// std::cout << "ret cudaLKernel" << std::endl;
return cudaSuccess;
}
cudaError_t cudaMalloc(void **devPtr, size_t size) {
*devPtr = malloc(size);
@ -68,15 +86,13 @@ cudaError_t cudaMemcpy(void *dst, const void *src, size_t count,
cudaError_t cudaMemcpyToSymbol_host(void *dst, const void *src, size_t count,
size_t offset, cudaMemcpyKind kind) {
assert(offset == 0 && "DO not support offset !=0\n");
memcpy(dst, src + offset, count);
memcpy(dst, (char *)src + offset, count);
return cudaSuccess;
}
cudaError_t cudaSetDevice(int device) {
// error checking
// std::cout << "cudaSetDevice Called" << std::endl;
init_device();
// std::cout << "cudaSetDevice Ret" << std::endl;
return cudaSuccess;
}
cudaError_t cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) {

View File

@ -11,7 +11,8 @@ set(LIB_NAME threadPool)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_BUILD_TYPE Debug)
include_directories(./include)
include_directories(./include/x86)
file(GLOB proj_SOURCES "src/vortex/*.cpp")
file(GLOB proj_SOURCES "src/x86/*.cpp")
add_library(${LIB_NAME} SHARED ${proj_SOURCES})

View File

@ -29,7 +29,6 @@ int init_device() {
// initialize scheduler
int ret = scheduler_init(*device);
if (ret != C_SUCCESS)
return ret;
@ -182,6 +181,7 @@ int schedulerEnqueueKernel(cu_kernel **k) {
pthread_cond_broadcast(&(scheduler->wake_pool));
MUTEX_UNLOCK(scheduler->work_queue_lock);
return 0;
}
/*
@ -191,6 +191,7 @@ int cuLaunchKernel(cu_kernel **k) {
if (!scheduler) {
init_device();
}
std::cout << "launch\n" << std::flush;
// Calculate Block Size N/numBlocks
cu_kernel *ker = *k;
@ -238,6 +239,7 @@ int cuLaunchKernel(cu_kernel **k) {
MUTEX_UNLOCK(((cstreamData *)(ker->stream))->stream_lock);
}
}
return 0;
}
/*
@ -346,7 +348,8 @@ RETRY:
grid_size_x = gridDim.x;
grid_size_y = gridDim.y;
grid_size_z = gridDim.z;
dynamic_shared_memory = (int *)malloc(dynamic_shared_mem_size);
if (dynamic_shared_mem_size > 0)
dynamic_shared_memory = (int *)malloc(dynamic_shared_mem_size);
int tmp = block_index;
block_index_x = tmp / (grid_size_y * grid_size_z);
tmp = tmp % (grid_size_y * grid_size_z);