2023-12-14 03:29:17 +08:00
|
|
|
#include "generate_cpu_format.h"
|
2022-01-12 00:01:42 +08:00
|
|
|
#include "handle_sync.h"
|
|
|
|
#include "init.h"
|
|
|
|
#include "insert_sync.h"
|
|
|
|
#include "insert_warp_loop.h"
|
|
|
|
#include "performance.h"
|
|
|
|
#include "tool.h"
|
|
|
|
#include "warp_func.h"
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
2023-12-14 03:29:17 +08:00
|
|
|
// to support constant memory variables, we need to convert information
|
|
|
|
// from kernelTranslator to HostTranslator, since HostTranslator knows nothing
|
|
|
|
// about the kernel functions, we need to write the information to a file
|
|
|
|
// by KernelTranslator and read it in HostTranslator
|
2022-05-04 20:59:38 +08:00
|
|
|
std::string PATH = "kernel_meta.log";
|
|
|
|
|
2022-01-12 00:01:42 +08:00
|
|
|
int main(int argc, char **argv) {
|
2022-05-04 20:59:38 +08:00
|
|
|
assert(argc == 3 && "incorrect number of arguments\n");
|
2022-01-12 00:01:42 +08:00
|
|
|
llvm::Module *program = LoadModuleFromFilr(argv[1]);
|
2022-05-04 20:59:38 +08:00
|
|
|
|
|
|
|
std::ofstream fout;
|
|
|
|
fout.open(PATH);
|
2022-01-12 00:01:42 +08:00
|
|
|
|
2023-12-14 03:29:17 +08:00
|
|
|
// inline __device__ functions, and create auxiliary global variables
|
2022-05-04 20:59:38 +08:00
|
|
|
init_block(program, fout);
|
2023-12-14 03:29:17 +08:00
|
|
|
|
2022-01-12 00:01:42 +08:00
|
|
|
// insert sync before each vote, and replace the
|
|
|
|
// original vote function to warp vote
|
|
|
|
handle_warp_vote(program);
|
2022-05-04 20:59:38 +08:00
|
|
|
|
2022-01-12 00:01:42 +08:00
|
|
|
// replace warp shuffle
|
|
|
|
handle_warp_shfl(program);
|
2022-09-16 00:33:28 +08:00
|
|
|
|
2022-01-12 00:01:42 +08:00
|
|
|
// insert sync
|
|
|
|
insert_sync(program);
|
2022-09-16 00:33:28 +08:00
|
|
|
|
2022-01-12 00:01:42 +08:00
|
|
|
// split block by sync
|
|
|
|
split_block_by_sync(program);
|
2023-12-14 03:29:17 +08:00
|
|
|
|
|
|
|
// add loop for intra&intera thread, it refers 'hierarchical collapsing' in
|
|
|
|
// COX paper.
|
2022-01-12 00:01:42 +08:00
|
|
|
insert_warp_loop(program);
|
2022-05-04 20:59:38 +08:00
|
|
|
|
|
|
|
replace_built_in_function(program);
|
|
|
|
|
2023-12-14 03:29:17 +08:00
|
|
|
// the input kernel programs have NVIDIA metadata, they need to be replaced to
|
|
|
|
// CPU metadata
|
|
|
|
generate_cpu_format(program);
|
2022-05-04 20:59:38 +08:00
|
|
|
|
2023-12-14 03:29:17 +08:00
|
|
|
// execute O3 pipeline on the transformed program
|
2022-01-12 00:01:42 +08:00
|
|
|
performance_optimization(program);
|
|
|
|
|
2022-05-04 20:59:38 +08:00
|
|
|
VerifyModule(program);
|
|
|
|
|
2022-01-12 00:01:42 +08:00
|
|
|
DumpModule(program, argv[2]);
|
2022-05-04 20:59:38 +08:00
|
|
|
|
|
|
|
fout.close();
|
2022-01-12 00:01:42 +08:00
|
|
|
return 0;
|
|
|
|
}
|