[WIP] migriate to LLVM14
This commit is contained in:
parent
bcdcccecc9
commit
d7668ccd86
|
@ -8,6 +8,7 @@
|
||||||
#include "llvm/Support/ToolOutputFile.h"
|
#include "llvm/Support/ToolOutputFile.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <regex>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
@ -200,8 +201,23 @@ void ReplaceKernelLaunch(llvm::Module *M) {
|
||||||
prior name before _host is add
|
prior name before _host is add
|
||||||
*/
|
*/
|
||||||
std::string oldName = functionOperand->getName().str();
|
std::string oldName = functionOperand->getName().str();
|
||||||
|
// For LLVM>=14, it will add _device_stub prefix for the kernel
|
||||||
|
// name, thus, we need to remove the prefix
|
||||||
|
// example:
|
||||||
|
// from: _Z24__device_stub__HistogramPjS_jj
|
||||||
|
// to: HistogramPjS_jj
|
||||||
|
oldName = std::regex_replace(oldName,
|
||||||
|
std::regex("__device_stub__"), "");
|
||||||
|
// remove _Z24
|
||||||
|
for (int i = 2; i < oldName.length(); i++) {
|
||||||
|
if (oldName[i] >= '0' && oldName[i] <= '9')
|
||||||
|
continue;
|
||||||
|
oldName = oldName.substr(i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// if parent function is __host and same as the cudaKernelLaunch
|
// if parent function is __host and same as the
|
||||||
|
// cudaKernelLaunch
|
||||||
std::string newName = oldName + "_wrapper";
|
std::string newName = oldName + "_wrapper";
|
||||||
if (func_name == oldName && host_changed &&
|
if (func_name == oldName && host_changed &&
|
||||||
oldName.find("_host") != std::string::npos) {
|
oldName.find("_host") != std::string::npos) {
|
||||||
|
@ -220,12 +236,11 @@ void ReplaceKernelLaunch(llvm::Module *M) {
|
||||||
kernels.insert({functionOperand->getName().str(), F});
|
kernels.insert({functionOperand->getName().str(), F});
|
||||||
}
|
}
|
||||||
} else if (cuda_register_kernel_names.find(
|
} else if (cuda_register_kernel_names.find(
|
||||||
calledFunction->getName()) !=
|
calledFunction->getName().str()) !=
|
||||||
cuda_register_kernel_names.end()) {
|
cuda_register_kernel_names.end()) {
|
||||||
// if the called function collides with kernel definiton
|
// if the called function collides with kernel definiton
|
||||||
// TODO: some reason changes all occurences of the function name
|
// TODO: some reason changes all occurences of the function name
|
||||||
// for both cudaKernelLaunch calls and regular function call
|
// for both cudaKernelLaunch calls and regular function call
|
||||||
// errs() << *inst;
|
|
||||||
host_changed = true;
|
host_changed = true;
|
||||||
calledFunction->setName(calledFunction->getName() + "_host");
|
calledFunction->setName(calledFunction->getName() + "_host");
|
||||||
std::cout << std::endl;
|
std::cout << std::endl;
|
||||||
|
|
|
@ -18,4 +18,4 @@ file(GLOB proj_HEADERS "include/x86/*.h")
|
||||||
file(GLOB proj_SOURCES "src/x86/*.cpp")
|
file(GLOB proj_SOURCES "src/x86/*.cpp")
|
||||||
|
|
||||||
# Add core library.
|
# Add core library.
|
||||||
add_library(${LIB_NAME} SHARED ${proj_HEADERS} ${proj_SOURCES})
|
add_library(${LIB_NAME} STATIC ${proj_HEADERS} ${proj_SOURCES})
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
#ifndef __NVVM2x86_TOOL__
|
#ifndef __NVVM2x86_TOOL__
|
||||||
#define __NVVM2x86_TOOL__
|
#define __NVVM2x86_TOOL__
|
||||||
|
|
||||||
|
#include "llvm/IR/IRBuilder.h"
|
||||||
|
#include "llvm/IR/Instructions.h"
|
||||||
#include "llvm/IR/Module.h"
|
#include "llvm/IR/Module.h"
|
||||||
llvm::Module *LoadModuleFromFilr(char *file_name);
|
llvm::Module *LoadModuleFromFilr(char *file_name);
|
||||||
void DumpModule(llvm::Module *M, char *file_name);
|
void DumpModule(llvm::Module *M, char *file_name);
|
||||||
|
@ -22,4 +24,10 @@ bool has_barrier(llvm::BasicBlock *B);
|
||||||
bool has_block_barrier(llvm::BasicBlock *B);
|
bool has_block_barrier(llvm::BasicBlock *B);
|
||||||
bool has_barrier(llvm::Function *F);
|
bool has_barrier(llvm::Function *F);
|
||||||
void replace_dynamic_shared_memory(llvm::Module *M);
|
void replace_dynamic_shared_memory(llvm::Module *M);
|
||||||
|
llvm::LoadInst *createLoad(llvm::IRBuilder<> &B, llvm::Value *addr,
|
||||||
|
bool isVolatile = false);
|
||||||
|
llvm::Value *createInBoundsGEP(llvm::IRBuilder<> &B, llvm::Value *ptr,
|
||||||
|
llvm::ArrayRef<llvm::Value *> idxlist);
|
||||||
|
llvm::Value *createGEP(llvm::IRBuilder<> &B, llvm::Value *ptr,
|
||||||
|
llvm::ArrayRef<llvm::Value *> idxlist);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
#include "llvm/IR/Module.h"
|
#include "llvm/IR/Module.h"
|
||||||
#include "llvm/IR/Verifier.h"
|
#include "llvm/IR/Verifier.h"
|
||||||
#include "llvm/IRReader/IRReader.h"
|
#include "llvm/IRReader/IRReader.h"
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
#include "llvm/MC/TargetRegistry.h"
|
||||||
#include "llvm/Support/TargetSelect.h"
|
#include "llvm/Support/TargetSelect.h"
|
||||||
#include "llvm/Support/ToolOutputFile.h"
|
#include "llvm/Support/ToolOutputFile.h"
|
||||||
#include "llvm/Target/TargetMachine.h"
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
@ -19,6 +19,7 @@
|
||||||
#include "llvm/Transforms/Utils/Cloning.h"
|
#include "llvm/Transforms/Utils/Cloning.h"
|
||||||
#include "llvm/Transforms/Utils/ValueMapper.h"
|
#include "llvm/Transforms/Utils/ValueMapper.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <map>
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
|
@ -52,6 +53,14 @@ void decode_input(llvm::Module *M) {
|
||||||
if (!isKernelFunction(M, F))
|
if (!isKernelFunction(M, F))
|
||||||
continue;
|
continue;
|
||||||
auto func_name = F->getName().str();
|
auto func_name = F->getName().str();
|
||||||
|
// remove mangle prefix
|
||||||
|
// remove _Z24
|
||||||
|
for (int pos = 2; pos < func_name.length(); pos++) {
|
||||||
|
if (func_name[pos] >= '0' && func_name[pos] <= '9')
|
||||||
|
continue;
|
||||||
|
func_name = func_name.substr(pos);
|
||||||
|
break;
|
||||||
|
}
|
||||||
llvm::IRBuilder<> Builder(M->getContext());
|
llvm::IRBuilder<> Builder(M->getContext());
|
||||||
|
|
||||||
FunctionCallee fc =
|
FunctionCallee fc =
|
||||||
|
@ -78,7 +87,7 @@ void decode_input(llvm::Module *M) {
|
||||||
*M, Int32T, false, llvm::GlobalValue::ExternalLinkage, NULL,
|
*M, Int32T, false, llvm::GlobalValue::ExternalLinkage, NULL,
|
||||||
"thread_memory_size", NULL, llvm::GlobalValue::GeneralDynamicTLSModel,
|
"thread_memory_size", NULL, llvm::GlobalValue::GeneralDynamicTLSModel,
|
||||||
0, false);
|
0, false);
|
||||||
Value *loadedValue = Builder.CreateLoad(global_mem);
|
Value *loadedValue = createLoad(Builder, global_mem);
|
||||||
|
|
||||||
llvm::FunctionType *LaunchFun2 = FunctionType::get(
|
llvm::FunctionType *LaunchFun2 = FunctionType::get(
|
||||||
PointerType::get(PointerType::get(Int32T, 0), 0), NULL);
|
PointerType::get(PointerType::get(Int32T, 0), 0), NULL);
|
||||||
|
@ -120,12 +129,12 @@ void decode_input(llvm::Module *M) {
|
||||||
Type *ArgType = ii->getType();
|
Type *ArgType = ii->getType();
|
||||||
|
|
||||||
// calculate addr
|
// calculate addr
|
||||||
Value *GEP = Builder.CreateGEP(input_arg, ConstantInt::get(Int32T, idx));
|
Value *GEP = createGEP(Builder, input_arg, ConstantInt::get(Int32T, idx));
|
||||||
// load corresponding int*
|
// load corresponding int*
|
||||||
GEP = Builder.CreateLoad(GEP);
|
GEP = createLoad(Builder, GEP);
|
||||||
// bitcast
|
// bitcast
|
||||||
GEP = Builder.CreateBitOrPointerCast(GEP, PointerType::get(ArgType, 0));
|
GEP = Builder.CreateBitOrPointerCast(GEP, PointerType::get(ArgType, 0));
|
||||||
Value *Arg = Builder.CreateLoad(GEP);
|
Value *Arg = createLoad(Builder, GEP);
|
||||||
Arguments.push_back(Arg);
|
Arguments.push_back(Arg);
|
||||||
++idx;
|
++idx;
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,7 +42,7 @@ bool inline_warp_level_func(llvm::Module *M) {
|
||||||
if (func_name == "_Z10__any_syncji" ||
|
if (func_name == "_Z10__any_syncji" ||
|
||||||
func_name.find("shfl_down_sync") != std::string::npos) {
|
func_name.find("shfl_down_sync") != std::string::npos) {
|
||||||
InlineFunctionInfo IFI;
|
InlineFunctionInfo IFI;
|
||||||
InlineFunction(c, IFI);
|
InlineFunction(*c, IFI);
|
||||||
need_remove.insert(c->getCalledFunction());
|
need_remove.insert(c->getCalledFunction());
|
||||||
changed = true;
|
changed = true;
|
||||||
}
|
}
|
||||||
|
@ -102,7 +102,7 @@ bool inline_func_with_tid(llvm::Module *M) {
|
||||||
}
|
}
|
||||||
for (auto c : need_inline) {
|
for (auto c : need_inline) {
|
||||||
InlineFunctionInfo IFI;
|
InlineFunctionInfo IFI;
|
||||||
InlineFunction(c, IFI);
|
InlineFunction(*c, IFI);
|
||||||
}
|
}
|
||||||
return changed;
|
return changed;
|
||||||
}
|
}
|
||||||
|
|
|
@ -179,7 +179,7 @@ llvm::Instruction *GetContextArray(llvm::Instruction *instruction,
|
||||||
llvm::AllocaInst *Alloca = nullptr;
|
llvm::AllocaInst *Alloca = nullptr;
|
||||||
|
|
||||||
auto block_size_addr = M->getGlobalVariable("block_size");
|
auto block_size_addr = M->getGlobalVariable("block_size");
|
||||||
auto block_size = builder.CreateLoad(block_size_addr);
|
auto block_size = createLoad(builder, block_size_addr);
|
||||||
Alloca = builder.CreateAlloca(AllocType, block_size, varName);
|
Alloca = builder.CreateAlloca(AllocType, block_size, varName);
|
||||||
|
|
||||||
contextArrays[varName] = Alloca;
|
contextArrays[varName] = Alloca;
|
||||||
|
@ -208,9 +208,9 @@ llvm::Instruction *AddContextSave(llvm::Instruction *instruction,
|
||||||
std::vector<llvm::Value *> gepArgs;
|
std::vector<llvm::Value *> gepArgs;
|
||||||
|
|
||||||
auto inter_warp_index =
|
auto inter_warp_index =
|
||||||
builder.CreateLoad(M->getGlobalVariable("inter_warp_index"));
|
createLoad(builder, M->getGlobalVariable("inter_warp_index"));
|
||||||
auto intra_warp_index =
|
auto intra_warp_index =
|
||||||
builder.CreateLoad(M->getGlobalVariable("intra_warp_index"));
|
createLoad(builder, M->getGlobalVariable("intra_warp_index"));
|
||||||
auto thread_idx = builder.CreateBinOp(
|
auto thread_idx = builder.CreateBinOp(
|
||||||
Instruction::Add, intra_warp_index,
|
Instruction::Add, intra_warp_index,
|
||||||
builder.CreateBinOp(Instruction::Mul, inter_warp_index,
|
builder.CreateBinOp(Instruction::Mul, inter_warp_index,
|
||||||
|
@ -218,7 +218,7 @@ llvm::Instruction *AddContextSave(llvm::Instruction *instruction,
|
||||||
"thread_idx");
|
"thread_idx");
|
||||||
gepArgs.push_back(thread_idx);
|
gepArgs.push_back(thread_idx);
|
||||||
|
|
||||||
return builder.CreateStore(instruction, builder.CreateGEP(alloca, gepArgs));
|
return builder.CreateStore(instruction, createGEP(builder, alloca, gepArgs));
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Instruction *AddContextRestore(llvm::Value *val,
|
llvm::Instruction *AddContextRestore(llvm::Value *val,
|
||||||
|
@ -242,9 +242,9 @@ llvm::Instruction *AddContextRestore(llvm::Value *val,
|
||||||
auto M = before->getParent()->getParent()->getParent();
|
auto M = before->getParent()->getParent()->getParent();
|
||||||
auto I32 = llvm::Type::getInt32Ty(M->getContext());
|
auto I32 = llvm::Type::getInt32Ty(M->getContext());
|
||||||
auto inter_warp_index =
|
auto inter_warp_index =
|
||||||
builder.CreateLoad(M->getGlobalVariable("inter_warp_index"));
|
createLoad(builder, M->getGlobalVariable("inter_warp_index"));
|
||||||
auto intra_warp_index =
|
auto intra_warp_index =
|
||||||
builder.CreateLoad(M->getGlobalVariable("intra_warp_index"));
|
createLoad(builder, M->getGlobalVariable("intra_warp_index"));
|
||||||
auto thread_idx = builder.CreateBinOp(
|
auto thread_idx = builder.CreateBinOp(
|
||||||
Instruction::Add, intra_warp_index,
|
Instruction::Add, intra_warp_index,
|
||||||
builder.CreateBinOp(Instruction::Mul, inter_warp_index,
|
builder.CreateBinOp(Instruction::Mul, inter_warp_index,
|
||||||
|
@ -253,11 +253,11 @@ llvm::Instruction *AddContextRestore(llvm::Value *val,
|
||||||
gepArgs.push_back(thread_idx);
|
gepArgs.push_back(thread_idx);
|
||||||
|
|
||||||
llvm::Instruction *gep =
|
llvm::Instruction *gep =
|
||||||
dyn_cast<Instruction>(builder.CreateGEP(alloca, gepArgs));
|
dyn_cast<Instruction>(createGEP(builder, alloca, gepArgs));
|
||||||
if (isAlloca) {
|
if (isAlloca) {
|
||||||
return gep;
|
return gep;
|
||||||
}
|
}
|
||||||
return builder.CreateLoad(gep);
|
return createLoad(builder, gep);
|
||||||
}
|
}
|
||||||
|
|
||||||
void AddContextSaveRestore(llvm::Instruction *instruction,
|
void AddContextSaveRestore(llvm::Instruction *instruction,
|
||||||
|
@ -316,7 +316,7 @@ void handle_alloc(llvm::Function *F) {
|
||||||
// generate a new alloc
|
// generate a new alloc
|
||||||
auto block_size_addr = M->getGlobalVariable("block_size");
|
auto block_size_addr = M->getGlobalVariable("block_size");
|
||||||
IRBuilder<> builder(inst);
|
IRBuilder<> builder(inst);
|
||||||
auto block_size = builder.CreateLoad(block_size_addr);
|
auto block_size = createLoad(builder, block_size_addr);
|
||||||
|
|
||||||
llvm::Type *elementType = NULL;
|
llvm::Type *elementType = NULL;
|
||||||
if (dyn_cast<AllocaInst>(inst)->getType()->getElementType()) {
|
if (dyn_cast<AllocaInst>(inst)->getType()->getElementType()) {
|
||||||
|
@ -338,16 +338,16 @@ void handle_alloc(llvm::Function *F) {
|
||||||
IRBuilder<> builder(user);
|
IRBuilder<> builder(user);
|
||||||
// std::vector<llvm::Value *> gepArgs;
|
// std::vector<llvm::Value *> gepArgs;
|
||||||
auto inter_warp_index =
|
auto inter_warp_index =
|
||||||
builder.CreateLoad(M->getGlobalVariable("inter_warp_index"));
|
createLoad(builder, M->getGlobalVariable("inter_warp_index"));
|
||||||
auto intra_warp_index =
|
auto intra_warp_index =
|
||||||
builder.CreateLoad(M->getGlobalVariable("intra_warp_index"));
|
createLoad(builder, M->getGlobalVariable("intra_warp_index"));
|
||||||
auto thread_idx = builder.CreateBinOp(
|
auto thread_idx = builder.CreateBinOp(
|
||||||
Instruction::Add, intra_warp_index,
|
Instruction::Add, intra_warp_index,
|
||||||
builder.CreateBinOp(Instruction::Mul, inter_warp_index,
|
builder.CreateBinOp(Instruction::Mul, inter_warp_index,
|
||||||
ConstantInt::get(I32, 32)),
|
ConstantInt::get(I32, 32)),
|
||||||
"thread_idx");
|
"thread_idx");
|
||||||
|
|
||||||
auto gep = builder.CreateGEP(Alloca, thread_idx);
|
auto gep = createGEP(builder, Alloca, thread_idx);
|
||||||
|
|
||||||
user->replaceUsesOfWith(inst, gep);
|
user->replaceUsesOfWith(inst, gep);
|
||||||
}
|
}
|
||||||
|
@ -479,19 +479,19 @@ BasicBlock *insert_loop_cond(llvm::BasicBlock *InsertCondBefore,
|
||||||
auto inter_warp_index = M->getGlobalVariable("inter_warp_index");
|
auto inter_warp_index = M->getGlobalVariable("inter_warp_index");
|
||||||
auto block_size = M->getGlobalVariable("block_size");
|
auto block_size = M->getGlobalVariable("block_size");
|
||||||
auto warp_cnt =
|
auto warp_cnt =
|
||||||
builder.CreateBinOp(Instruction::SDiv, builder.CreateLoad(block_size),
|
builder.CreateBinOp(Instruction::SDiv, createLoad(builder, block_size),
|
||||||
ConstantInt::get(I32, 32), "warp_number");
|
ConstantInt::get(I32, 32), "warp_number");
|
||||||
|
|
||||||
cmpResult =
|
cmpResult =
|
||||||
builder.CreateICmpULT(builder.CreateLoad(inter_warp_index), warp_cnt);
|
builder.CreateICmpULT(createLoad(builder, inter_warp_index), warp_cnt);
|
||||||
} else {
|
} else {
|
||||||
auto intra_warp_index = M->getGlobalVariable("intra_warp_index");
|
auto intra_warp_index = M->getGlobalVariable("intra_warp_index");
|
||||||
auto block_size = M->getGlobalVariable("block_size");
|
auto block_size = M->getGlobalVariable("block_size");
|
||||||
if (!need_nested_loop) {
|
if (!need_nested_loop) {
|
||||||
cmpResult = builder.CreateICmpULT(builder.CreateLoad(intra_warp_index),
|
cmpResult = builder.CreateICmpULT(createLoad(builder, intra_warp_index),
|
||||||
builder.CreateLoad(block_size));
|
createLoad(builder, block_size));
|
||||||
} else {
|
} else {
|
||||||
cmpResult = builder.CreateICmpULT(builder.CreateLoad(intra_warp_index),
|
cmpResult = builder.CreateICmpULT(createLoad(builder, intra_warp_index),
|
||||||
ConstantInt::get(I32, 32));
|
ConstantInt::get(I32, 32));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -513,13 +513,13 @@ BasicBlock *insert_loop_inc(llvm::BasicBlock *InsertIncBefore,
|
||||||
if (intra_warp_loop) { // intra warp
|
if (intra_warp_loop) { // intra warp
|
||||||
auto intra_warp_index = M->getGlobalVariable("intra_warp_index");
|
auto intra_warp_index = M->getGlobalVariable("intra_warp_index");
|
||||||
auto new_index = builder.CreateBinOp(
|
auto new_index = builder.CreateBinOp(
|
||||||
Instruction::Add, builder.CreateLoad(intra_warp_index),
|
Instruction::Add, createLoad(builder, intra_warp_index),
|
||||||
ConstantInt::get(I32, 1), "intra_warp_index_increment");
|
ConstantInt::get(I32, 1), "intra_warp_index_increment");
|
||||||
builder.CreateStore(new_index, intra_warp_index);
|
builder.CreateStore(new_index, intra_warp_index);
|
||||||
} else { // inter warp
|
} else { // inter warp
|
||||||
auto inter_warp_index = M->getGlobalVariable("inter_warp_index");
|
auto inter_warp_index = M->getGlobalVariable("inter_warp_index");
|
||||||
auto new_index = builder.CreateBinOp(
|
auto new_index = builder.CreateBinOp(
|
||||||
Instruction::Add, builder.CreateLoad(inter_warp_index),
|
Instruction::Add, createLoad(builder, inter_warp_index),
|
||||||
ConstantInt::get(I32, 1), "inter_warp_index_increment");
|
ConstantInt::get(I32, 1), "inter_warp_index_increment");
|
||||||
builder.CreateStore(new_index, inter_warp_index);
|
builder.CreateStore(new_index, inter_warp_index);
|
||||||
}
|
}
|
||||||
|
|
|
@ -99,7 +99,7 @@ void mem_share2global(llvm::Module *M) {
|
||||||
global_memory->setComdat(comdat);
|
global_memory->setComdat(comdat);
|
||||||
global_memory->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
|
global_memory->setLinkage(llvm::GlobalValue::LinkOnceODRLinkage);
|
||||||
global_memory->setInitializer(undef);
|
global_memory->setInitializer(undef);
|
||||||
global_memory->setAlignment(share_memory->getAlignment());
|
global_memory->setAlignment(share_memory->getAlign());
|
||||||
corresponding_global_memory.insert(
|
corresponding_global_memory.insert(
|
||||||
std::pair<GlobalVariable *, GlobalVariable *>(share_memory,
|
std::pair<GlobalVariable *, GlobalVariable *>(share_memory,
|
||||||
global_memory));
|
global_memory));
|
||||||
|
|
|
@ -20,10 +20,11 @@
|
||||||
#include "llvm/IR/Module.h"
|
#include "llvm/IR/Module.h"
|
||||||
#include "llvm/IR/ValueSymbolTable.h"
|
#include "llvm/IR/ValueSymbolTable.h"
|
||||||
#include "llvm/InitializePasses.h"
|
#include "llvm/InitializePasses.h"
|
||||||
|
#include "llvm/MC/TargetRegistry.h"
|
||||||
#include "llvm/PassInfo.h"
|
#include "llvm/PassInfo.h"
|
||||||
#include "llvm/PassRegistry.h"
|
#include "llvm/PassRegistry.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/TargetRegistry.h"
|
#include "llvm/Support/Host.h"
|
||||||
#include "llvm/Target/TargetMachine.h"
|
#include "llvm/Target/TargetMachine.h"
|
||||||
#include "llvm/Target/TargetOptions.h"
|
#include "llvm/Target/TargetOptions.h"
|
||||||
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
#include "tool.h"
|
#include "tool.h"
|
||||||
#include "llvm/Bitcode/BitcodeWriter.h"
|
#include "llvm/Bitcode/BitcodeWriter.h"
|
||||||
|
#include "llvm/Config/llvm-config.h"
|
||||||
#include "llvm/IR/Constants.h"
|
#include "llvm/IR/Constants.h"
|
||||||
#include "llvm/IR/Function.h"
|
#include "llvm/IR/Function.h"
|
||||||
#include "llvm/IR/GlobalValue.h"
|
#include "llvm/IR/GlobalValue.h"
|
||||||
|
#include "llvm/IR/GlobalVariable.h"
|
||||||
#include "llvm/IR/IRBuilder.h"
|
#include "llvm/IR/IRBuilder.h"
|
||||||
#include "llvm/IR/InlineAsm.h"
|
#include "llvm/IR/InlineAsm.h"
|
||||||
#include "llvm/IR/Instructions.h"
|
#include "llvm/IR/Instructions.h"
|
||||||
|
@ -10,9 +12,16 @@
|
||||||
#include "llvm/IR/Module.h"
|
#include "llvm/IR/Module.h"
|
||||||
#include "llvm/IR/Verifier.h"
|
#include "llvm/IR/Verifier.h"
|
||||||
#include "llvm/IRReader/IRReader.h"
|
#include "llvm/IRReader/IRReader.h"
|
||||||
|
#include "llvm/Support/CommandLine.h"
|
||||||
|
#include "llvm/Support/ErrorOr.h"
|
||||||
|
#include "llvm/Support/FileSystem.h"
|
||||||
|
#include "llvm/Support/ManagedStatic.h"
|
||||||
|
#include "llvm/Support/MemoryBuffer.h"
|
||||||
#include "llvm/Support/ToolOutputFile.h"
|
#include "llvm/Support/ToolOutputFile.h"
|
||||||
|
#include "llvm/Support/raw_ostream.h"
|
||||||
#include "llvm/Transforms/Utils/Cloning.h"
|
#include "llvm/Transforms/Utils/Cloning.h"
|
||||||
#include "llvm/Transforms/Utils/ValueMapper.h"
|
#include "llvm/Transforms/Utils/ValueMapper.h"
|
||||||
|
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <set>
|
#include <set>
|
||||||
|
|
||||||
|
@ -41,7 +50,7 @@ void DumpModule(llvm::Module *M, char *file_name) {
|
||||||
std::string msg;
|
std::string msg;
|
||||||
llvm::raw_string_ostream os(msg);
|
llvm::raw_string_ostream os(msg);
|
||||||
std::error_code EC;
|
std::error_code EC;
|
||||||
ToolOutputFile Out(file_name, EC, sys::fs::F_None);
|
ToolOutputFile Out(file_name, EC, sys::fs::OF_None);
|
||||||
if (EC) {
|
if (EC) {
|
||||||
errs() << "Fails to open output file: " << EC.message();
|
errs() << "Fails to open output file: " << EC.message();
|
||||||
return;
|
return;
|
||||||
|
@ -128,7 +137,7 @@ llvm::Instruction *BreakPHIToAllocas(PHINode *phi) {
|
||||||
}
|
}
|
||||||
builder.SetInsertPoint(phi);
|
builder.SetInsertPoint(phi);
|
||||||
|
|
||||||
llvm::Instruction *loadedValue = builder.CreateLoad(alloca);
|
llvm::Instruction *loadedValue = createLoad(builder, alloca);
|
||||||
phi->replaceAllUsesWith(loadedValue);
|
phi->replaceAllUsesWith(loadedValue);
|
||||||
phi->eraseFromParent();
|
phi->eraseFromParent();
|
||||||
|
|
||||||
|
@ -219,13 +228,12 @@ void replace_dynamic_shared_memory(llvm::Module *M) {
|
||||||
if (!dynamic_shared_memory_addr) {
|
if (!dynamic_shared_memory_addr) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
auto load_shared_memory =
|
auto load_shared_memory = new LoadInst(
|
||||||
new LoadInst(dynamic_shared_memory_addr, "new_load");
|
dynamic_shared_memory_addr->getType()->getPointerElementType(),
|
||||||
|
dynamic_shared_memory_addr, "new_load", &*F->begin()->begin());
|
||||||
auto new_bit_cast =
|
auto new_bit_cast =
|
||||||
new BitCastInst(load_shared_memory,
|
new BitCastInst(load_shared_memory,
|
||||||
dynamic_shared_memory_addr->getType(), "new_bit_cast");
|
dynamic_shared_memory_addr->getType(), "new_bit_cast");
|
||||||
new_bit_cast->insertBefore(&*F->begin()->begin());
|
|
||||||
load_shared_memory->insertBefore(new_bit_cast);
|
|
||||||
dynamic_shared_memory_addr->replaceUsesWithIf(new_bit_cast, [&](Use &U) {
|
dynamic_shared_memory_addr->replaceUsesWithIf(new_bit_cast, [&](Use &U) {
|
||||||
auto *Instr = dyn_cast<Instruction>(U.getUser());
|
auto *Instr = dyn_cast<Instruction>(U.getUser());
|
||||||
return Instr != new_bit_cast && Instr != load_shared_memory;
|
return Instr != new_bit_cast && Instr != load_shared_memory;
|
||||||
|
@ -281,21 +289,21 @@ void replace_built_in_function(llvm::Module *M) {
|
||||||
auto block_size_addr = M->getGlobalVariable("block_size_x");
|
auto block_size_addr = M->getGlobalVariable("block_size_x");
|
||||||
IRBuilder<> builder(context);
|
IRBuilder<> builder(context);
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto val = builder.CreateLoad(block_size_addr);
|
auto val = createLoad(builder, block_size_addr);
|
||||||
Call->replaceAllUsesWith(val);
|
Call->replaceAllUsesWith(val);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ntid.y") {
|
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ntid.y") {
|
||||||
auto block_size_addr = M->getGlobalVariable("block_size_y");
|
auto block_size_addr = M->getGlobalVariable("block_size_y");
|
||||||
IRBuilder<> builder(context);
|
IRBuilder<> builder(context);
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto val = builder.CreateLoad(block_size_addr);
|
auto val = createLoad(builder, block_size_addr);
|
||||||
Call->replaceAllUsesWith(val);
|
Call->replaceAllUsesWith(val);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ntid.z") {
|
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ntid.z") {
|
||||||
auto block_size_addr = M->getGlobalVariable("block_size_z");
|
auto block_size_addr = M->getGlobalVariable("block_size_z");
|
||||||
IRBuilder<> builder(context);
|
IRBuilder<> builder(context);
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto val = builder.CreateLoad(block_size_addr);
|
auto val = createLoad(builder, block_size_addr);
|
||||||
Call->replaceAllUsesWith(val);
|
Call->replaceAllUsesWith(val);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
} else if (func_name == "llvm.nvvm.read.ptx.sreg.tid.x" ||
|
} else if (func_name == "llvm.nvvm.read.ptx.sreg.tid.x" ||
|
||||||
|
@ -307,15 +315,15 @@ void replace_built_in_function(llvm::Module *M) {
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
|
|
||||||
auto thread_idx = builder.CreateBinOp(
|
auto thread_idx = builder.CreateBinOp(
|
||||||
Instruction::Mul, builder.CreateLoad(local_inter_warp_idx),
|
Instruction::Mul, createLoad(builder, local_inter_warp_idx),
|
||||||
ConstantInt::get(I32, 32), "");
|
ConstantInt::get(I32, 32), "");
|
||||||
thread_idx = builder.CreateBinOp(
|
thread_idx = builder.CreateBinOp(
|
||||||
Instruction::Add, builder.CreateLoad(local_intra_warp_idx),
|
Instruction::Add, createLoad(builder, local_intra_warp_idx),
|
||||||
thread_idx, "thread_idx");
|
thread_idx, "thread_idx");
|
||||||
|
|
||||||
thread_idx = builder.CreateBinOp(
|
thread_idx = builder.CreateBinOp(
|
||||||
Instruction::SRem, thread_idx,
|
Instruction::SRem, thread_idx,
|
||||||
builder.CreateLoad(M->getGlobalVariable("block_size_x")),
|
createLoad(builder, M->getGlobalVariable("block_size_x")),
|
||||||
"thread_id_x");
|
"thread_id_x");
|
||||||
|
|
||||||
Call->replaceAllUsesWith(thread_idx);
|
Call->replaceAllUsesWith(thread_idx);
|
||||||
|
@ -326,15 +334,15 @@ void replace_built_in_function(llvm::Module *M) {
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
|
|
||||||
auto thread_idx = builder.CreateBinOp(
|
auto thread_idx = builder.CreateBinOp(
|
||||||
Instruction::Mul, builder.CreateLoad(local_inter_warp_idx),
|
Instruction::Mul, createLoad(builder, local_inter_warp_idx),
|
||||||
ConstantInt::get(I32, 32), "");
|
ConstantInt::get(I32, 32), "");
|
||||||
thread_idx = builder.CreateBinOp(
|
thread_idx = builder.CreateBinOp(
|
||||||
Instruction::Add, builder.CreateLoad(local_intra_warp_idx),
|
Instruction::Add, createLoad(builder, local_intra_warp_idx),
|
||||||
thread_idx, "thread_idx");
|
thread_idx, "thread_idx");
|
||||||
// tidy = tid / block_dim.x
|
// tidy = tid / block_dim.x
|
||||||
thread_idx = builder.CreateBinOp(
|
thread_idx = builder.CreateBinOp(
|
||||||
Instruction::SDiv, thread_idx,
|
Instruction::SDiv, thread_idx,
|
||||||
builder.CreateLoad(M->getGlobalVariable("block_size_x")),
|
createLoad(builder, M->getGlobalVariable("block_size_x")),
|
||||||
"thread_id_y");
|
"thread_id_y");
|
||||||
Call->replaceAllUsesWith(thread_idx);
|
Call->replaceAllUsesWith(thread_idx);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
|
@ -350,21 +358,21 @@ void replace_built_in_function(llvm::Module *M) {
|
||||||
auto block_index_addr = M->getGlobalVariable("block_index_x");
|
auto block_index_addr = M->getGlobalVariable("block_index_x");
|
||||||
IRBuilder<> builder(context);
|
IRBuilder<> builder(context);
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto block_idx = builder.CreateLoad(block_index_addr);
|
auto block_idx = createLoad(builder, block_index_addr);
|
||||||
Call->replaceAllUsesWith(block_idx);
|
Call->replaceAllUsesWith(block_idx);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ctaid.y") {
|
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ctaid.y") {
|
||||||
auto block_index_addr = M->getGlobalVariable("block_index_y");
|
auto block_index_addr = M->getGlobalVariable("block_index_y");
|
||||||
IRBuilder<> builder(context);
|
IRBuilder<> builder(context);
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto block_idx = builder.CreateLoad(block_index_addr);
|
auto block_idx = createLoad(builder, block_index_addr);
|
||||||
Call->replaceAllUsesWith(block_idx);
|
Call->replaceAllUsesWith(block_idx);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ctaid.z") {
|
} else if (func_name == "llvm.nvvm.read.ptx.sreg.ctaid.z") {
|
||||||
auto block_index_addr = M->getGlobalVariable("block_index_z");
|
auto block_index_addr = M->getGlobalVariable("block_index_z");
|
||||||
IRBuilder<> builder(context);
|
IRBuilder<> builder(context);
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto block_idx = builder.CreateLoad(block_index_addr);
|
auto block_idx = createLoad(builder, block_index_addr);
|
||||||
Call->replaceAllUsesWith(block_idx);
|
Call->replaceAllUsesWith(block_idx);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.x" ||
|
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.x" ||
|
||||||
|
@ -373,21 +381,21 @@ void replace_built_in_function(llvm::Module *M) {
|
||||||
auto grid_size_addr = M->getGlobalVariable("grid_size_x");
|
auto grid_size_addr = M->getGlobalVariable("grid_size_x");
|
||||||
IRBuilder<> builder(context);
|
IRBuilder<> builder(context);
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto grid_size = builder.CreateLoad(grid_size_addr);
|
auto grid_size = createLoad(builder, grid_size_addr);
|
||||||
Call->replaceAllUsesWith(grid_size);
|
Call->replaceAllUsesWith(grid_size);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.y") {
|
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.y") {
|
||||||
auto grid_size_addr = M->getGlobalVariable("grid_size_y");
|
auto grid_size_addr = M->getGlobalVariable("grid_size_y");
|
||||||
IRBuilder<> builder(context);
|
IRBuilder<> builder(context);
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto grid_size = builder.CreateLoad(grid_size_addr);
|
auto grid_size = createLoad(builder, grid_size_addr);
|
||||||
Call->replaceAllUsesWith(grid_size);
|
Call->replaceAllUsesWith(grid_size);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.z") {
|
} else if (func_name == "llvm.nvvm.read.ptx.sreg.nctaid.z") {
|
||||||
auto grid_size_addr = M->getGlobalVariable("grid_size_z");
|
auto grid_size_addr = M->getGlobalVariable("grid_size_z");
|
||||||
IRBuilder<> builder(context);
|
IRBuilder<> builder(context);
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto grid_size = builder.CreateLoad(grid_size_addr);
|
auto grid_size = createLoad(builder, grid_size_addr);
|
||||||
Call->replaceAllUsesWith(grid_size);
|
Call->replaceAllUsesWith(grid_size);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
}
|
}
|
||||||
|
@ -401,7 +409,7 @@ void replace_built_in_function(llvm::Module *M) {
|
||||||
// return the rank within the warp
|
// return the rank within the warp
|
||||||
IRBuilder<> builder(context);
|
IRBuilder<> builder(context);
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto intra_warp_index = builder.CreateLoad(local_intra_warp_idx);
|
auto intra_warp_index = createLoad(builder, local_intra_warp_idx);
|
||||||
Call->replaceAllUsesWith(intra_warp_index);
|
Call->replaceAllUsesWith(intra_warp_index);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
}
|
}
|
||||||
|
@ -460,7 +468,9 @@ void replace_built_in_function(llvm::Module *M) {
|
||||||
src_alloc, // Alloca
|
src_alloc, // Alloca
|
||||||
Indices, // Indices
|
Indices, // Indices
|
||||||
"", Call);
|
"", Call);
|
||||||
auto new_load = new LoadInst(new_GEP, "", Call);
|
auto new_load =
|
||||||
|
new LoadInst(new_GEP->getType()->getPointerElementType(),
|
||||||
|
new_GEP, "", Call);
|
||||||
printf_args.push_back(new_load);
|
printf_args.push_back(new_load);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -531,7 +541,7 @@ void replace_asm_call(llvm::Module *M) {
|
||||||
builder.SetInsertPoint(Call);
|
builder.SetInsertPoint(Call);
|
||||||
auto intra_warp_index_addr =
|
auto intra_warp_index_addr =
|
||||||
M->getGlobalVariable("intra_warp_index");
|
M->getGlobalVariable("intra_warp_index");
|
||||||
auto intra_warp_index = builder.CreateLoad(intra_warp_index_addr);
|
auto intra_warp_index = createLoad(builder, intra_warp_index_addr);
|
||||||
Call->replaceAllUsesWith(intra_warp_index);
|
Call->replaceAllUsesWith(intra_warp_index);
|
||||||
need_remove.push_back(Call);
|
need_remove.push_back(Call);
|
||||||
}
|
}
|
||||||
|
@ -652,20 +662,18 @@ bool find_barrier_in_region(llvm::BasicBlock *start, llvm::BasicBlock *end) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
LoadInst *createLoad(IRBuilder<> &B, Value *addr, bool isVolatile) {
|
||||||
Print IR to String Output for Debugging Purposes
|
return B.CreateLoad(addr->getType()->getPointerElementType(), addr,
|
||||||
*/
|
isVolatile);
|
||||||
// void printModule(llvm::Module *M) {
|
}
|
||||||
// std::string str;
|
|
||||||
// llvm::raw_string_ostream ss(str);
|
Value *createInBoundsGEP(IRBuilder<> &B, Value *ptr,
|
||||||
// std::cout << "### Printing Module ###" << std::endl;
|
ArrayRef<Value *> idxlist) {
|
||||||
// for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) {
|
return B.CreateInBoundsGEP(
|
||||||
// Function *F = &(*i);
|
ptr->getType()->getScalarType()->getPointerElementType(), ptr, idxlist);
|
||||||
// auto func_name = F->getName().str();
|
}
|
||||||
// std::cout << func_name << std::endl;
|
|
||||||
// for (Function::iterator b = F->begin(); b != F->end(); ++b) {
|
Value *createGEP(IRBuilder<> &B, Value *ptr, ArrayRef<Value *> idxlist) {
|
||||||
// BasicBlock *B = &(*b);
|
return B.CreateGEP(ptr->getType()->getScalarType()->getPointerElementType(),
|
||||||
// errs() << *B;
|
ptr, idxlist);
|
||||||
// }
|
}
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
|
@ -67,7 +67,8 @@ void handle_warp_vote(llvm::Module *M) {
|
||||||
assert(warp_vote_ptr != NULL);
|
assert(warp_vote_ptr != NULL);
|
||||||
auto intra_warp_index_addr = M->getGlobalVariable("intra_warp_index");
|
auto intra_warp_index_addr = M->getGlobalVariable("intra_warp_index");
|
||||||
auto intra_warp_index =
|
auto intra_warp_index =
|
||||||
new LoadInst(intra_warp_index_addr, "intra_warp_index", sync_inst);
|
new LoadInst(intra_warp_index_addr->getType()->getPointerElementType(),
|
||||||
|
intra_warp_index_addr, "intra_warp_index", sync_inst);
|
||||||
|
|
||||||
auto GEP = GetElementPtrInst::Create(NULL, // Pointee type
|
auto GEP = GetElementPtrInst::Create(NULL, // Pointee type
|
||||||
warp_vote_ptr, // Alloca
|
warp_vote_ptr, // Alloca
|
||||||
|
@ -168,23 +169,22 @@ void handle_warp_shfl(llvm::Module *M) {
|
||||||
auto shfl_offset = shfl_inst->getArgOperand(2);
|
auto shfl_offset = shfl_inst->getArgOperand(2);
|
||||||
|
|
||||||
auto intra_warp_index =
|
auto intra_warp_index =
|
||||||
builder.CreateLoad(M->getGlobalVariable("intra_warp_index"));
|
createLoad(builder, M->getGlobalVariable("intra_warp_index"));
|
||||||
builder.CreateStore(
|
builder.CreateStore(shfl_variable, createGEP(builder, warp_shfl_ptr,
|
||||||
shfl_variable,
|
{ZERO, intra_warp_index}));
|
||||||
builder.CreateGEP(warp_shfl_ptr, {ZERO, intra_warp_index}));
|
|
||||||
// we should create barrier before store
|
// we should create barrier before store
|
||||||
CreateIntraWarpBarrier(intra_warp_index);
|
CreateIntraWarpBarrier(intra_warp_index);
|
||||||
// load shuffled data
|
// load shuffled data
|
||||||
auto new_intra_warp_index =
|
auto new_intra_warp_index =
|
||||||
builder.CreateLoad(M->getGlobalVariable("intra_warp_index"));
|
createLoad(builder, M->getGlobalVariable("intra_warp_index"));
|
||||||
auto shfl_name = shfl_inst->getCalledFunction()->getName().str();
|
auto shfl_name = shfl_inst->getCalledFunction()->getName().str();
|
||||||
if (shfl_name.find("down") != shfl_name.npos) {
|
if (shfl_name.find("down") != shfl_name.npos) {
|
||||||
auto calculate_offset = builder.CreateBinOp(
|
auto calculate_offset = builder.CreateBinOp(
|
||||||
Instruction::Add, new_intra_warp_index, shfl_offset);
|
Instruction::Add, new_intra_warp_index, shfl_offset);
|
||||||
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
|
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
|
||||||
ConstantInt::get(I32, 32));
|
ConstantInt::get(I32, 32));
|
||||||
auto gep = builder.CreateGEP(warp_shfl_ptr, {ZERO, new_index});
|
auto gep = createGEP(builder, warp_shfl_ptr, {ZERO, new_index});
|
||||||
auto load_inst = builder.CreateLoad(gep);
|
auto load_inst = createLoad(builder, gep);
|
||||||
|
|
||||||
// create barrier
|
// create barrier
|
||||||
CreateIntraWarpBarrier(new_intra_warp_index);
|
CreateIntraWarpBarrier(new_intra_warp_index);
|
||||||
|
@ -195,8 +195,8 @@ void handle_warp_shfl(llvm::Module *M) {
|
||||||
Instruction::Sub, new_intra_warp_index, shfl_offset);
|
Instruction::Sub, new_intra_warp_index, shfl_offset);
|
||||||
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
|
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
|
||||||
ConstantInt::get(I32, 32));
|
ConstantInt::get(I32, 32));
|
||||||
auto gep = builder.CreateGEP(warp_shfl_ptr, {ZERO, new_index});
|
auto gep = createGEP(builder, warp_shfl_ptr, {ZERO, new_index});
|
||||||
auto load_inst = builder.CreateLoad(gep);
|
auto load_inst = createLoad(builder, gep);
|
||||||
|
|
||||||
// create barrier
|
// create barrier
|
||||||
CreateIntraWarpBarrier(new_intra_warp_index);
|
CreateIntraWarpBarrier(new_intra_warp_index);
|
||||||
|
@ -207,8 +207,8 @@ void handle_warp_shfl(llvm::Module *M) {
|
||||||
Instruction::Xor, new_intra_warp_index, shfl_offset);
|
Instruction::Xor, new_intra_warp_index, shfl_offset);
|
||||||
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
|
auto new_index = builder.CreateBinOp(Instruction::SRem, calculate_offset,
|
||||||
ConstantInt::get(I32, 32));
|
ConstantInt::get(I32, 32));
|
||||||
auto gep = builder.CreateGEP(warp_shfl_ptr, {ZERO, new_index});
|
auto gep = createGEP(builder, warp_shfl_ptr, {ZERO, new_index});
|
||||||
auto load_inst = builder.CreateLoad(gep);
|
auto load_inst = createLoad(builder, gep);
|
||||||
|
|
||||||
// create barrier
|
// create barrier
|
||||||
CreateIntraWarpBarrier(new_intra_warp_index);
|
CreateIntraWarpBarrier(new_intra_warp_index);
|
||||||
|
|
|
@ -10,7 +10,9 @@ set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||||
add_subdirectory(threadPool)
|
add_subdirectory(threadPool)
|
||||||
|
|
||||||
# compile x86 runtime library
|
# compile x86 runtime library
|
||||||
|
include_directories(./include/)
|
||||||
include_directories(./include/x86)
|
include_directories(./include/x86)
|
||||||
|
include_directories(./threadPool/include/)
|
||||||
include_directories(./threadPool/include/x86)
|
include_directories(./threadPool/include/x86)
|
||||||
file(GLOB proj_SOURCES "src/vortex/*.cpp")
|
file(GLOB proj_SOURCES "src/x86/*.cpp")
|
||||||
add_library(${LIB_NAME} SHARED ${proj_SOURCES})
|
add_library(${LIB_NAME} SHARED ${proj_SOURCES})
|
||||||
|
|
|
@ -9,13 +9,31 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
cudaError_t cudaGetDevice(int *devPtr) { *devPtr = 0; }
|
cudaError_t cudaGetDevice(int *devPtr) {
|
||||||
|
*devPtr = 0;
|
||||||
|
return cudaSuccess;
|
||||||
|
}
|
||||||
const char *cudaGetErrorName(cudaError_t error) { return "SUCCESS\n"; }
|
const char *cudaGetErrorName(cudaError_t error) { return "SUCCESS\n"; }
|
||||||
cudaError_t cudaDeviceReset(void) { scheduler_uninit(); }
|
cudaError_t cudaDeviceReset(void) {
|
||||||
cudaError_t cudaDeviceSynchronize(void) { cuSynchronizeBarrier(); }
|
scheduler_uninit();
|
||||||
cudaError_t cudaThreadSynchronize(void) { cuSynchronizeBarrier(); }
|
return cudaSuccess;
|
||||||
cudaError_t cudaFree(void *devPtr) { free(devPtr); }
|
}
|
||||||
cudaError_t cudaFreeHost(void *devPtr) { free(devPtr); }
|
cudaError_t cudaDeviceSynchronize(void) {
|
||||||
|
cuSynchronizeBarrier();
|
||||||
|
return cudaSuccess;
|
||||||
|
}
|
||||||
|
cudaError_t cudaThreadSynchronize(void) {
|
||||||
|
cuSynchronizeBarrier();
|
||||||
|
return cudaSuccess;
|
||||||
|
}
|
||||||
|
cudaError_t cudaFree(void *devPtr) {
|
||||||
|
free(devPtr);
|
||||||
|
return cudaSuccess;
|
||||||
|
}
|
||||||
|
cudaError_t cudaFreeHost(void *devPtr) {
|
||||||
|
free(devPtr);
|
||||||
|
return cudaSuccess;
|
||||||
|
}
|
||||||
|
|
||||||
cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
|
cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
|
||||||
void **args, size_t sharedMem,
|
void **args, size_t sharedMem,
|
||||||
|
@ -31,7 +49,7 @@ cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim,
|
||||||
|
|
||||||
int lstatus = cuLaunchKernel(&ker);
|
int lstatus = cuLaunchKernel(&ker);
|
||||||
|
|
||||||
// std::cout << "ret cudaLKernel" << std::endl;
|
return cudaSuccess;
|
||||||
}
|
}
|
||||||
cudaError_t cudaMalloc(void **devPtr, size_t size) {
|
cudaError_t cudaMalloc(void **devPtr, size_t size) {
|
||||||
*devPtr = malloc(size);
|
*devPtr = malloc(size);
|
||||||
|
@ -68,15 +86,13 @@ cudaError_t cudaMemcpy(void *dst, const void *src, size_t count,
|
||||||
cudaError_t cudaMemcpyToSymbol_host(void *dst, const void *src, size_t count,
|
cudaError_t cudaMemcpyToSymbol_host(void *dst, const void *src, size_t count,
|
||||||
size_t offset, cudaMemcpyKind kind) {
|
size_t offset, cudaMemcpyKind kind) {
|
||||||
assert(offset == 0 && "DO not support offset !=0\n");
|
assert(offset == 0 && "DO not support offset !=0\n");
|
||||||
memcpy(dst, src + offset, count);
|
memcpy(dst, (char *)src + offset, count);
|
||||||
return cudaSuccess;
|
return cudaSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaError_t cudaSetDevice(int device) {
|
cudaError_t cudaSetDevice(int device) {
|
||||||
// error checking
|
|
||||||
// std::cout << "cudaSetDevice Called" << std::endl;
|
|
||||||
init_device();
|
init_device();
|
||||||
// std::cout << "cudaSetDevice Ret" << std::endl;
|
return cudaSuccess;
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaError_t cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) {
|
cudaError_t cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) {
|
||||||
|
|
|
@ -11,7 +11,8 @@ set(LIB_NAME threadPool)
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
set(CMAKE_BUILD_TYPE Debug)
|
set(CMAKE_BUILD_TYPE Debug)
|
||||||
|
include_directories(./include)
|
||||||
include_directories(./include/x86)
|
include_directories(./include/x86)
|
||||||
|
|
||||||
file(GLOB proj_SOURCES "src/vortex/*.cpp")
|
file(GLOB proj_SOURCES "src/x86/*.cpp")
|
||||||
add_library(${LIB_NAME} SHARED ${proj_SOURCES})
|
add_library(${LIB_NAME} SHARED ${proj_SOURCES})
|
||||||
|
|
|
@ -29,7 +29,6 @@ int init_device() {
|
||||||
|
|
||||||
// initialize scheduler
|
// initialize scheduler
|
||||||
int ret = scheduler_init(*device);
|
int ret = scheduler_init(*device);
|
||||||
|
|
||||||
if (ret != C_SUCCESS)
|
if (ret != C_SUCCESS)
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
|
@ -182,6 +181,7 @@ int schedulerEnqueueKernel(cu_kernel **k) {
|
||||||
|
|
||||||
pthread_cond_broadcast(&(scheduler->wake_pool));
|
pthread_cond_broadcast(&(scheduler->wake_pool));
|
||||||
MUTEX_UNLOCK(scheduler->work_queue_lock);
|
MUTEX_UNLOCK(scheduler->work_queue_lock);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -191,6 +191,7 @@ int cuLaunchKernel(cu_kernel **k) {
|
||||||
if (!scheduler) {
|
if (!scheduler) {
|
||||||
init_device();
|
init_device();
|
||||||
}
|
}
|
||||||
|
std::cout << "launch\n" << std::flush;
|
||||||
// Calculate Block Size N/numBlocks
|
// Calculate Block Size N/numBlocks
|
||||||
|
|
||||||
cu_kernel *ker = *k;
|
cu_kernel *ker = *k;
|
||||||
|
@ -238,6 +239,7 @@ int cuLaunchKernel(cu_kernel **k) {
|
||||||
MUTEX_UNLOCK(((cstreamData *)(ker->stream))->stream_lock);
|
MUTEX_UNLOCK(((cstreamData *)(ker->stream))->stream_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -346,7 +348,8 @@ RETRY:
|
||||||
grid_size_x = gridDim.x;
|
grid_size_x = gridDim.x;
|
||||||
grid_size_y = gridDim.y;
|
grid_size_y = gridDim.y;
|
||||||
grid_size_z = gridDim.z;
|
grid_size_z = gridDim.z;
|
||||||
dynamic_shared_memory = (int *)malloc(dynamic_shared_mem_size);
|
if (dynamic_shared_mem_size > 0)
|
||||||
|
dynamic_shared_memory = (int *)malloc(dynamic_shared_mem_size);
|
||||||
int tmp = block_index;
|
int tmp = block_index;
|
||||||
block_index_x = tmp / (grid_size_y * grid_size_z);
|
block_index_x = tmp / (grid_size_y * grid_size_z);
|
||||||
tmp = tmp % (grid_size_y * grid_size_z);
|
tmp = tmp % (grid_size_y * grid_size_z);
|
||||||
|
|
Loading…
Reference in New Issue