From 7d29a409f6c99c5ccfc9f090e197e21b27094d98 Mon Sep 17 00:00:00 2001 From: RobinHan Date: Sat, 18 Jun 2022 13:39:26 -0400 Subject: [PATCH] fix bug for inserting sync after kernelLaunch --- .../src/x86/ReplaceCudaBuiltin.cpp | 27 ++++--------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/compilation/HostTranslation/src/x86/ReplaceCudaBuiltin.cpp b/compilation/HostTranslation/src/x86/ReplaceCudaBuiltin.cpp index 57cba77..7352566 100644 --- a/compilation/HostTranslation/src/x86/ReplaceCudaBuiltin.cpp +++ b/compilation/HostTranslation/src/x86/ReplaceCudaBuiltin.cpp @@ -26,7 +26,7 @@ void InsertSyncAfterKernelLaunch(llvm::Module *M) { llvm::FunctionCallee _f = M->getOrInsertFunction("cudaDeviceSynchronize", LauncherFuncT); llvm::Function *func_launch = llvm::cast(_f.getCallee()); - std::set launch_function_name; + std::set kernel_launch_instruction; for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) { Function *F = &(*i); auto func_name = F->getName().str(); @@ -40,33 +40,16 @@ void InsertSyncAfterKernelLaunch(llvm::Module *M) { if (Function *calledFunction = callInst->getCalledFunction()) { if (calledFunction->getName().startswith("cudaLaunchKernel")) { // F is a kernel launch function - launch_function_name.insert(func_name); + kernel_launch_instruction.insert(callInst); } } } } } } - for (Module::iterator i = M->begin(), e = M->end(); i != e; ++i) { - Function *F = &(*i); - for (Function::iterator b = F->begin(); b != F->end(); ++b) { - BasicBlock *B = &(*b); - - for (BasicBlock::iterator i = B->begin(); i != B->end(); ++i) { - Instruction *inst = &(*i); - if (llvm::CallBase *callInst = llvm::dyn_cast(inst)) { - if (Function *calledFunction = callInst->getCalledFunction()) { - if (launch_function_name.find(calledFunction->getName().str()) != - launch_function_name.end()) { - // insert a sync after launch - auto sync_call = - llvm::CallInst::Create(func_launch, "inserted_sync"); - sync_call->insertAfter(callInst); - } - } - } - } - } + for (auto call : kernel_launch_instruction) { + auto sync_call = llvm::CallInst::Create(func_launch, "inserted_sync"); + sync_call->insertAfter(call); } }