support CloverLeaf on LLVM14
This commit is contained in:
parent
8fddb647bd
commit
cf12d604eb
|
@ -9,20 +9,77 @@
|
|||
#include "llvm/IR/LLVMContext.h"
|
||||
#include "llvm/IR/Module.h"
|
||||
#include "llvm/Support/ToolOutputFile.h"
|
||||
#include "llvm/Transforms/Utils/CtorUtils.h"
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
/// Given a llvm.global_ctors list that we can understand,
|
||||
/// return a list of the functions and null terminator as a vector.
|
||||
static std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
|
||||
if (GV->getInitializer()->isNullValue())
|
||||
return std::vector<Function *>();
|
||||
ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
|
||||
std::vector<Function *> Result;
|
||||
Result.reserve(CA->getNumOperands());
|
||||
for (auto &V : CA->operands()) {
|
||||
ConstantStruct *CS = cast<ConstantStruct>(V);
|
||||
Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
void RemoveCudaBuiltin(llvm::Module *M) {
|
||||
|
||||
std::set<llvm::Function *> need_remove;
|
||||
|
||||
if (GlobalVariable *gv = M->getGlobalVariable("llvm.global_ctors")) {
|
||||
gv->dropAllReferences();
|
||||
gv->eraseFromParent();
|
||||
// remove cuda built-in from Ctors
|
||||
if (GlobalVariable *GV = M->getGlobalVariable("llvm.global_ctors")) {
|
||||
std::vector<Function *> Ctors = parseGlobalCtors(GV);
|
||||
if (!Ctors.empty()) {
|
||||
ConstantArray *OldCA = cast<ConstantArray>(GV->getInitializer());
|
||||
SmallVector<Constant *, 10> CAList;
|
||||
for (int i = 0; i < OldCA->getNumOperands(); i++) {
|
||||
if (!Ctors[i])
|
||||
continue;
|
||||
if (Ctors[i]->hasName() &&
|
||||
Ctors[i]->getName().str().find("__cuda") == std::string::npos) {
|
||||
std::cout << "keep: " << Ctors[i]->getName().str() << std::endl
|
||||
<< std::flush;
|
||||
CAList.push_back(OldCA->getOperand(i));
|
||||
}
|
||||
}
|
||||
|
||||
// Create the new array initializer.
|
||||
ArrayType *ATy =
|
||||
ArrayType::get(OldCA->getType()->getElementType(), CAList.size());
|
||||
Constant *CA = ConstantArray::get(ATy, CAList);
|
||||
|
||||
// If we didn't change the number of elements, don't create a new GV.
|
||||
if (CA->getType() == OldCA->getType()) {
|
||||
GV->setInitializer(CA);
|
||||
} else {
|
||||
// Create the new global and insert it next to the existing list.
|
||||
GlobalVariable *NGV = new GlobalVariable(
|
||||
CA->getType(), GV->isConstant(), GV->getLinkage(), CA, "",
|
||||
GV->getThreadLocalMode());
|
||||
GV->getParent()->getGlobalList().insert(GV->getIterator(), NGV);
|
||||
NGV->takeName(GV);
|
||||
|
||||
// Nuke the old list, replacing any uses with the new one.
|
||||
if (!GV->use_empty()) {
|
||||
Constant *V = NGV;
|
||||
if (V->getType() != GV->getType())
|
||||
V = ConstantExpr::getBitCast(V, GV->getType());
|
||||
GV->replaceAllUsesWith(V);
|
||||
}
|
||||
GV->eraseFromParent();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Function *c_tor = NULL;
|
||||
if (c_tor = M->getFunction("__cuda_module_ctor")) {
|
||||
c_tor->dropAllReferences();
|
||||
|
|
|
@ -360,6 +360,12 @@ void replace_cuda_math_built_in(llvm::Module *M) {
|
|||
if (func_name.find("_ZL3expd") != std::string::npos) {
|
||||
F->deleteBody();
|
||||
}
|
||||
if (func_name.find("_ZL8copysigndd") != std::string::npos) {
|
||||
F->deleteBody();
|
||||
}
|
||||
if (func_name.find("_ZL8copysigndd.8") != std::string::npos) {
|
||||
F->deleteBody();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -370,6 +376,8 @@ void init_block(llvm::Module *M, std::ofstream &fout) {
|
|||
remove_cuda_built_in(M);
|
||||
// replace CUDA math function, like expf
|
||||
replace_cuda_math_built_in(M);
|
||||
// replace CUDA math function, like expf
|
||||
replace_cuda_math_built_in(M);
|
||||
|
||||
// lower ConstantExpression
|
||||
bool modified;
|
||||
|
|
|
@ -464,10 +464,12 @@ void replace_built_in_function(llvm::Module *M) {
|
|||
std::vector<Value *> Indices;
|
||||
Indices.push_back(ConstantInt::get(I32, 0));
|
||||
Indices.push_back(ConstantInt::get(I32, i));
|
||||
auto new_GEP = GetElementPtrInst::Create(NULL, // Pointee type
|
||||
src_alloc, // Alloca
|
||||
Indices, // Indices
|
||||
"", Call);
|
||||
auto new_GEP = GetElementPtrInst::Create(
|
||||
cast<PointerType>(src_alloc->getType()->getScalarType())
|
||||
->getElementType(),
|
||||
src_alloc, // Alloca
|
||||
Indices, // Indices
|
||||
"", Call);
|
||||
auto new_load =
|
||||
new LoadInst(new_GEP->getType()->getPointerElementType(),
|
||||
new_GEP, "", Call);
|
||||
|
@ -503,8 +505,14 @@ void replace_built_in_function(llvm::Module *M) {
|
|||
Call->getCalledFunction()->setName("__nvvm_lohi_i2d");
|
||||
} else if (func_name == "llvm.nvvm.fabs.f") {
|
||||
Call->getCalledFunction()->setName("__nvvm_fabs_f");
|
||||
} else if (func_name == "llvm.nvvm.fabs.d") {
|
||||
Call->getCalledFunction()->setName("__nv_fabsd");
|
||||
} else if (func_name == "llvm.nvvm.mul24.i") {
|
||||
Call->getCalledFunction()->setName("__nvvm_mul24_i");
|
||||
} else if (func_name == "llvm.nvvm.fmin.d") {
|
||||
Call->getCalledFunction()->setName("__nv_fmind");
|
||||
} else if (func_name == "llvm.nvvm.fmax.d") {
|
||||
Call->getCalledFunction()->setName("__nv_fmaxd");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -70,10 +70,12 @@ void handle_warp_vote(llvm::Module *M) {
|
|||
new LoadInst(intra_warp_index_addr->getType()->getPointerElementType(),
|
||||
intra_warp_index_addr, "intra_warp_index", sync_inst);
|
||||
|
||||
auto GEP = GetElementPtrInst::Create(NULL, // Pointee type
|
||||
warp_vote_ptr, // Alloca
|
||||
{zero, intra_warp_index}, // Indices
|
||||
"", sync_inst);
|
||||
auto GEP = GetElementPtrInst::Create(
|
||||
cast<PointerType>(warp_vote_ptr->getType()->getScalarType())
|
||||
->getElementType(),
|
||||
warp_vote_ptr, // Alloca
|
||||
{zero, intra_warp_index}, // Indices
|
||||
"", sync_inst);
|
||||
|
||||
// as AVX only support 8bit for each thread
|
||||
// so we have to cast the predict into int8
|
||||
|
|
|
@ -19,7 +19,11 @@ float __nv_fmodf(float, float);
|
|||
int __nv_isnanf(float);
|
||||
int __nv_isinff(float);
|
||||
float __nv_fabsf(float);
|
||||
double __nv_fabsd(double);
|
||||
double __nv_fmind(double, double);
|
||||
double __nv_fmaxd(double, double);
|
||||
int __nvvm_mul24_i(int, int);
|
||||
double _ZL3expd(double);
|
||||
double _ZL8copysigndd(double, double);
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -15,5 +15,9 @@ float __nv_fmodf(float x, float y) { return fmod(x, y); }
|
|||
int __nv_isnanf(float v) { return isnan(v); }
|
||||
int __nv_isinff(float v) { return isinf(v); }
|
||||
float __nv_fabsf(float v) { return abs(v); }
|
||||
double __nv_fabsd(double v) { return abs(v); }
|
||||
double __nv_fmind(double a, double b) { return (a < b) ? a : b; }
|
||||
double __nv_fmaxd(double a, double b) { return (a > b) ? a : b; }
|
||||
int __nvvm_mul24_i(int a, int b) { return a * b; }
|
||||
double _ZL3expd(double base) { return exp(base); }
|
||||
double _ZL8copysigndd(double x, double y) { return y > 0 ? abs(x) : -abs(x); };
|
||||
|
|
|
@ -17,7 +17,11 @@
|
|||
Initialize the device
|
||||
*/
|
||||
int device_max_compute_units = 1;
|
||||
bool device_initilized = false;
|
||||
int init_device() {
|
||||
if (device_initilized)
|
||||
return 0;
|
||||
device_initilized = true;
|
||||
cu_device *device = (cu_device *)calloc(1, sizeof(cu_device));
|
||||
if (device == NULL)
|
||||
return C_ERROR_MEMALLOC;
|
||||
|
@ -231,6 +235,9 @@ void scheduler_uninit() {
|
|||
Counting Barrier basically
|
||||
*/
|
||||
void cuSynchronizeBarrier() {
|
||||
if (!device_initilized) {
|
||||
init_device();
|
||||
}
|
||||
while (1) {
|
||||
// (TODO): currently, we assume each kernel launch will have a
|
||||
// following sync
|
||||
|
|
Loading…
Reference in New Issue