CuPBoP/compilation/KernelTranslation.cpp

62 lines
1.6 KiB
C++

#include "generate_cpu_format.h"
#include "handle_sync.h"
#include "init.h"
#include "insert_sync.h"
#include "insert_warp_loop.h"
#include "performance.h"
#include "tool.h"
#include "warp_func.h"
#include <assert.h>
using namespace llvm;
// to support constant memory variables, we need to convert information
// from kernelTranslator to HostTranslator, since HostTranslator knows nothing
// about the kernel functions, we need to write the information to a file
// by KernelTranslator and read it in HostTranslator
std::string PATH = "kernel_meta.log";
int main(int argc, char **argv) {
assert(argc == 3 && "incorrect number of arguments\n");
llvm::Module *program = LoadModuleFromFilr(argv[1]);
std::ofstream fout;
fout.open(PATH);
// inline __device__ functions, and create auxiliary global variables
init_block(program, fout);
// insert sync before each vote, and replace the
// original vote function to warp vote
handle_warp_vote(program);
// replace warp shuffle
handle_warp_shfl(program);
// insert sync
insert_sync(program);
// split block by sync
split_block_by_sync(program);
// add loop for intra&intera thread, it refers 'hierarchical collapsing' in
// COX paper.
insert_warp_loop(program);
replace_built_in_function(program);
// the input kernel programs have NVIDIA metadata, they need to be replaced to
// CPU metadata
generate_cpu_format(program);
// execute O3 pipeline on the transformed program
performance_optimization(program);
VerifyModule(program);
DumpModule(program, argv[2]);
fout.close();
return 0;
}