1219 lines
187 KiB
LLVM
1219 lines
187 KiB
LLVM
|
; ModuleID = 'needle-host-x86_64-unknown-linux-gnu.bc'
|
||
|
source_filename = "needle.cu"
|
||
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||
|
target triple = "x86_64-unknown-linux-gnu"
|
||
|
|
||
|
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
|
||
|
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
|
||
|
%struct.dim3 = type { i32, i32, i32 }
|
||
|
%struct.CUstream_st = type opaque
|
||
|
%struct.timeval = type { i64, i64 }
|
||
|
%struct.timezone = type { i32, i32 }
|
||
|
|
||
|
$_ZN4dim3C2Ejjj = comdat any
|
||
|
|
||
|
@blosum62 = dso_local global [24 x [24 x i32]] [[24 x i32] [i32 4, i32 -1, i32 -2, i32 -2, i32 0, i32 -1, i32 -1, i32 0, i32 -2, i32 -1, i32 -1, i32 -1, i32 -1, i32 -2, i32 -1, i32 1, i32 0, i32 -3, i32 -2, i32 0, i32 -2, i32 -1, i32 0, i32 -4], [24 x i32] [i32 -1, i32 5, i32 0, i32 -2, i32 -3, i32 1, i32 0, i32 -2, i32 0, i32 -3, i32 -2, i32 2, i32 -1, i32 -3, i32 -2, i32 -1, i32 -1, i32 -3, i32 -2, i32 -3, i32 -1, i32 0, i32 -1, i32 -4], [24 x i32] [i32 -2, i32 0, i32 6, i32 1, i32 -3, i32 0, i32 0, i32 0, i32 1, i32 -3, i32 -3, i32 0, i32 -2, i32 -3, i32 -2, i32 1, i32 0, i32 -4, i32 -2, i32 -3, i32 3, i32 0, i32 -1, i32 -4], [24 x i32] [i32 -2, i32 -2, i32 1, i32 6, i32 -3, i32 0, i32 2, i32 -1, i32 -1, i32 -3, i32 -4, i32 -1, i32 -3, i32 -3, i32 -1, i32 0, i32 -1, i32 -4, i32 -3, i32 -3, i32 4, i32 1, i32 -1, i32 -4], [24 x i32] [i32 0, i32 -3, i32 -3, i32 -3, i32 9, i32 -3, i32 -4, i32 -3, i32 -3, i32 -1, i32 -1, i32 -3, i32 -1, i32 -2, i32 -3, i32 -1, i32 -1, i32 -2, i32 -2, i32 -1, i32 -3, i32 -3, i32 -2, i32 -4], [24 x i32] [i32 -1, i32 1, i32 0, i32 0, i32 -3, i32 5, i32 2, i32 -2, i32 0, i32 -3, i32 -2, i32 1, i32 0, i32 -3, i32 -1, i32 0, i32 -1, i32 -2, i32 -1, i32 -2, i32 0, i32 3, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 0, i32 0, i32 2, i32 -4, i32 2, i32 5, i32 -2, i32 0, i32 -3, i32 -3, i32 1, i32 -2, i32 -3, i32 -1, i32 0, i32 -1, i32 -3, i32 -2, i32 -2, i32 1, i32 4, i32 -1, i32 -4], [24 x i32] [i32 0, i32 -2, i32 0, i32 -1, i32 -3, i32 -2, i32 -2, i32 6, i32 -2, i32 -4, i32 -4, i32 -2, i32 -3, i32 -3, i32 -2, i32 0, i32 -2, i32 -2, i32 -3, i32 -3, i32 -1, i32 -2, i32 -1, i32 -4], [24 x i32] [i32 -2, i32 0, i32 1, i32 -1, i32 -3, i32 0, i32 0, i32 -2, i32 8, i32 -3, i32 -3, i32 -1, i32 -2, i32 -1, i32 -2, i32 -1, i32 -2, i32 -2, i32 2, i32 -3, i32 0, i32 0, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 -3, i32 -3, i32 -3, i32 -1, i32 -3, i32 -3, i32 -4, i32 -3, i32 4, i32 2, i32 -3, i32 1, i32 0, i32 -3, i32 -2, i32 -1, i32 -3, i32 -1, i32 3, i32 -3, i32 -3, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 -2, i32 -3, i32 -4, i32 -1, i32 -2, i32 -3, i32 -4, i32 -3, i32 2, i32 4, i32 -2, i32 2, i32 0, i32 -3, i32 -2, i32 -1, i32 -2, i32 -1, i32 1, i32 -4, i32 -3, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 2, i32 0, i32 -1, i32 -3, i32 1, i32 1, i32 -2, i32 -1, i32 -3, i32 -2, i32 5, i32 -1, i32 -3, i32 -1, i32 0, i32 -1, i32 -3, i32 -2, i32 -2, i32 0, i32 1, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 -1, i32 -2, i32 -3, i32 -1, i32 0, i32 -2, i32 -3, i32 -2, i32 1, i32 2, i32 -1, i32 5, i32 0, i32 -2, i32 -1, i32 -1, i32 -1, i32 -1, i32 1, i32 -3, i32 -1, i32 -1, i32 -4], [24 x i32] [i32 -2, i32 -3, i32 -3, i32 -3, i32 -2, i32 -3, i32 -3, i32 -3, i32 -1, i32 0, i32 0, i32 -3, i32 0, i32 6, i32 -4, i32 -2, i32 -2, i32 1, i32 3, i32 -1, i32 -3, i32 -3, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 -2, i32 -2, i32 -1, i32 -3, i32 -1, i32 -1, i32 -2, i32 -2, i32 -3, i32 -3, i32 -1, i32 -2, i32 -4, i32 7, i32 -1, i32 -1, i32 -4, i32 -3, i32 -2, i32 -2, i32 -1, i32 -2, i32 -4], [24 x i32] [i32 1, i32 -1, i32 1, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 -1, i32 -2, i32 -2, i32 0, i32 -1, i32 -2, i32 -1, i32 4, i32 1, i32 -3, i32 -2, i32 -2, i32 0, i32 0, i32 0, i32 -4], [24 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 -1, i32 -1, i32 -1, i32 -2, i32 -2, i32 -1, i32 -1, i32 -1, i32 -1, i32 -2, i32 -1, i32 1, i32 5, i32 -2, i32 -2, i32 0, i32 -1, i32 -1, i32 0, i32 -4], [24 x i32] [i32 -3, i32 -3, i32 -4, i32 -4, i32 -2, i32 -2, i32 -3, i32 -2, i32 -2, i32 -3, i32 -2, i32 -3, i32 -1, i32 1, i32 -4, i32 -3, i32 -2, i32 11, i32 2, i32 -3, i32 -4, i32 -3, i32 -2, i32 -4], [24 x i32] [i32 -2, i32 -2, i32 -2, i32 -3, i32 -2, i32 -1, i32 -2, i32 -3, i32 2, i32 -1, i32 -1, i32 -2, i32 -1, i32 3, i32 -3, i32 -2, i32 -2, i32 2, i32 7, i32 -1, i32 -3, i32 -2, i32 -1, i32 -4], [24 x i32] [i32 0, i32 -3, i32 -3, i32 -3, i32 -1, i32 -2, i32 -2, i32 -3, i32 -3, i32 3, i32 1, i32 -2, i32 1, i32 -1, i32 -2, i32 -2, i32 0, i32 -3, i32 -1, i32 4, i32 -3, i32 -2, i32 -1, i32 -4], [24 x i32] [i32 -2, i32 -1, i32 3, i32 4, i32 -3, i32 0, i32 1, i32 -1, i32
|
||
|
@.str = private unnamed_addr constant [25 x i8] c"WG size of kernel = %d \0A\00", align 1
|
||
|
@stderr = external dso_local global %struct._IO_FILE*, align 8
|
||
|
@.str.1 = private unnamed_addr constant [42 x i8] c"Usage: %s <max_rows/max_cols> <penalty> \0A\00", align 1
|
||
|
@.str.2 = private unnamed_addr constant [36 x i8] c"\09<dimension> - x and y dimensions\0A\00", align 1
|
||
|
@.str.3 = private unnamed_addr constant [40 x i8] c"\09<penalty> - penalty(positive integer)\0A\00", align 1
|
||
|
@.str.4 = private unnamed_addr constant [47 x i8] c"The dimension values must be a multiple of 16\0A\00", align 1
|
||
|
@.str.5 = private unnamed_addr constant [31 x i8] c"error: can not allocate memory\00", align 1
|
||
|
@.str.6 = private unnamed_addr constant [24 x i8] c"Start Needleman-Wunsch\0A\00", align 1
|
||
|
@.str.7 = private unnamed_addr constant [28 x i8] c"Processing top-left matrix\0A\00", align 1
|
||
|
@.str.8 = private unnamed_addr constant [32 x i8] c"Processing bottom-right matrix\0A\00", align 1
|
||
|
@.str.9 = private unnamed_addr constant [11 x i8] c"result.txt\00", align 1
|
||
|
@.str.10 = private unnamed_addr constant [2 x i8] c"w\00", align 1
|
||
|
@.str.11 = private unnamed_addr constant [28 x i8] c"print traceback value GPU:\0A\00", align 1
|
||
|
@.str.12 = private unnamed_addr constant [4 x i8] c"%d \00", align 1
|
||
|
@0 = private unnamed_addr constant [33 x i8] c"_Z20needle_cuda_shared_1PiS_iiii\00", align 1
|
||
|
@1 = private unnamed_addr constant [33 x i8] c"_Z20needle_cuda_shared_2PiS_iiii\00", align 1
|
||
|
@2 = private constant [48849 x i8] c"P\EDU\BA\01\00\10\00\C0\BE\00\00\00\00\00\00\02\00\01\01@\00\00\00\88\A8\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\E0\A7\00\00\00\00\00\00`\A4\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0E\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z20needle_cuda_shared_2PiS_iiii\00.nv.info._Z20needle_cuda_shared_2PiS_iiii\00.nv.shared._Z20needle_cuda_shared_2PiS_iiii\00.nv.global\00.nv.constant0._Z20needle_cuda_shared_2PiS_iiii\00.text._Z20needle_cuda_shared_1PiS_iiii\00.nv.info._Z20needle_cuda_shared_1PiS_iiii\00.nv.shared._Z20needle_cuda_shared_1PiS_iiii\00.nv.constant0._Z20needle_cuda_shared_1PiS_iiii\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z20needle_cuda_shared_2PiS_iiii\00.text._Z20needle_cuda_shared_2PiS_iiii\00.nv.info._Z20needle_cuda_shared_2PiS_iiii\00.nv.shared._Z20needle_cuda_shared_2PiS_iiii\00.nv.global\00blockIdx\00threadIdx\00$_Z20needle_cuda_shared_2PiS_iiii$_Z14maximum_deviceiii\00$___ZZ20needle_cuda_shared_2PiS_iiiiE4temp__635\00$___ZZ20needle_cuda_shared_2PiS_iiiiE3ref__637\00.nv.constant0._Z20needle_cuda_shared_2PiS_iiii\00_param\00_Z20needle_cuda_shared_1PiS_iiii\00.text._Z20needle_cuda_shared_1PiS_iiii\00.nv.info._Z20needle_cuda_shared_1PiS_iiii\00.nv.shared._Z20needle_cuda_shared_1PiS_iiii\00$_Z20needle_cuda_shared_1PiS_iiii$_Z14maximum_deviceiii\00$___ZZ20needle_cuda_shared_1PiS_iiiiE4temp__240\00$___ZZ20needle_cuda_shared_1PiS_iiiiE3ref__242\00.nv.constant0._Z20needle_cuda_shared_1PiS_iiii\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00S\00\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A4\00\00\00\03\00\0B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\D0\00\00\00\03\00\0C\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\DB\00\00\00\01\00\0C\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\E4\00\00\00\01\00\0C\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\85\01\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\DC\01\00\00\03\00\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00-\02\00\00\03\00\0D\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\F0\02\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\09\00\00\00\00\00\00\00\00\00@M\00\00\00\00\00\00\EE\00\00\00\12\02\09\00\B0E\00\00\00\00\00\00\90\07\00\00\00\00\00\00\BB\01\00\00\12\10\0A\00\00\00\00\00\00\00\00\00@L\00\00\00\00\00\00Y\02\00\00\12\02\0A\00\D8D\00\00\00\00\00\00h\07\00\00\00\00\00\00\04/\08\00\0C\00\00\00\13\00\00\00\04#\08\00\0D\00\00\00\00\00\00\00\04\12\08\00\0D\00\00\00\00\00\00\00\04\11\08\00\0D\00\00\00\00\00\00\00\04#\08\00\0C\00\00\00\00\00\00\00\04\12\08\00\0C\00\00\00x\00\00\00\04\11\08\00\0C\00\00\00x\00\00\00\04/\08\00\0A\00\00\00\13\00\00\00\04#\08\00\0B\00\00\00\00\00\00\00\04\12\08\00\0B\00\00\00\00\00\00\00\04\11\08\00\0B\00\00\00\00\00\00\00\04#\08\00\0A\00\00\00\00\00\00\00\04\12\08\00\0A\00\00\00x\00\00\00\04\11\08\00\0A\00\00\00x\00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\01 \00\03\19 \00\04\17\0C\00\00\00\00\00\05\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\14\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\88\04\00\00\04\1C\04\00\A8E\00\00\04\1E\04\00`\00\00\00\010\00\00\01*\00\00\04\0A\08\00\09\00\00\00@\01 \00\03\19 \00\04\17\0C\00\00\00\00\00\05\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\14\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\88\04\00\00\04\1C\04\00\D0D\00\00\04\1E\04\00`\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00
|
||
|
@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([48849 x i8], [48849 x i8]* @2, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8
|
||
|
@__cuda_gpubin_handle = internal global i8** null, align 8
|
||
|
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }]
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z20needle_cuda_shared_1PiS_iiii(i32* %referrence, i32* %matrix_cuda, i32 %cols, i32 %penalty, i32 %i, i32 %block_width) #0 {
|
||
|
entry:
|
||
|
%referrence.addr = alloca i32*, align 8
|
||
|
%matrix_cuda.addr = alloca i32*, align 8
|
||
|
%cols.addr = alloca i32, align 4
|
||
|
%penalty.addr = alloca i32, align 4
|
||
|
%i.addr = alloca i32, align 4
|
||
|
%block_width.addr = alloca i32, align 4
|
||
|
%grid_dim = alloca %struct.dim3, align 8
|
||
|
%block_dim = alloca %struct.dim3, align 8
|
||
|
%shmem_size = alloca i64, align 8
|
||
|
%stream = alloca i8*, align 8
|
||
|
%grid_dim.coerce = alloca { i64, i32 }, align 8
|
||
|
%block_dim.coerce = alloca { i64, i32 }, align 8
|
||
|
store i32* %referrence, i32** %referrence.addr, align 8
|
||
|
store i32* %matrix_cuda, i32** %matrix_cuda.addr, align 8
|
||
|
store i32 %cols, i32* %cols.addr, align 4
|
||
|
store i32 %penalty, i32* %penalty.addr, align 4
|
||
|
store i32 %i, i32* %i.addr, align 4
|
||
|
store i32 %block_width, i32* %block_width.addr, align 4
|
||
|
%kernel_args = alloca i8*, i64 6, align 16
|
||
|
%0 = bitcast i32** %referrence.addr to i8*
|
||
|
%1 = getelementptr i8*, i8** %kernel_args, i32 0
|
||
|
store i8* %0, i8** %1
|
||
|
%2 = bitcast i32** %matrix_cuda.addr to i8*
|
||
|
%3 = getelementptr i8*, i8** %kernel_args, i32 1
|
||
|
store i8* %2, i8** %3
|
||
|
%4 = bitcast i32* %cols.addr to i8*
|
||
|
%5 = getelementptr i8*, i8** %kernel_args, i32 2
|
||
|
store i8* %4, i8** %5
|
||
|
%6 = bitcast i32* %penalty.addr to i8*
|
||
|
%7 = getelementptr i8*, i8** %kernel_args, i32 3
|
||
|
store i8* %6, i8** %7
|
||
|
%8 = bitcast i32* %i.addr to i8*
|
||
|
%9 = getelementptr i8*, i8** %kernel_args, i32 4
|
||
|
store i8* %8, i8** %9
|
||
|
%10 = bitcast i32* %block_width.addr to i8*
|
||
|
%11 = getelementptr i8*, i8** %kernel_args, i32 5
|
||
|
store i8* %10, i8** %11
|
||
|
%12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream)
|
||
|
%13 = load i64, i64* %shmem_size, align 8
|
||
|
%14 = load i8*, i8** %stream, align 8
|
||
|
%15 = bitcast { i64, i32 }* %grid_dim.coerce to i8*
|
||
|
%16 = bitcast %struct.dim3* %grid_dim to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false)
|
||
|
%17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0
|
||
|
%18 = load i64, i64* %17, align 8
|
||
|
%19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1
|
||
|
%20 = load i32, i32* %19, align 8
|
||
|
%21 = bitcast { i64, i32 }* %block_dim.coerce to i8*
|
||
|
%22 = bitcast %struct.dim3* %block_dim to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false)
|
||
|
%23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0
|
||
|
%24 = load i64, i64* %23, align 8
|
||
|
%25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1
|
||
|
%26 = load i32, i32* %25, align 8
|
||
|
%27 = bitcast i8* %14 to %struct.CUstream_st*
|
||
|
%call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32*, i32*, i32, i32, i32, i32)* @_Z20needle_cuda_shared_1PiS_iiii to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27)
|
||
|
br label %setup.end
|
||
|
|
||
|
setup.end: ; preds = %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**)
|
||
|
|
||
|
declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*)
|
||
|
|
||
|
; Function Attrs: argmemonly nounwind willreturn
|
||
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z20needle_cuda_shared_2PiS_iiii(i32* %referrence, i32* %matrix_cuda, i32 %cols, i32 %penalty, i32 %i, i32 %block_width) #0 {
|
||
|
entry:
|
||
|
%referrence.addr = alloca i32*, align 8
|
||
|
%matrix_cuda.addr = alloca i32*, align 8
|
||
|
%cols.addr = alloca i32, align 4
|
||
|
%penalty.addr = alloca i32, align 4
|
||
|
%i.addr = alloca i32, align 4
|
||
|
%block_width.addr = alloca i32, align 4
|
||
|
%grid_dim = alloca %struct.dim3, align 8
|
||
|
%block_dim = alloca %struct.dim3, align 8
|
||
|
%shmem_size = alloca i64, align 8
|
||
|
%stream = alloca i8*, align 8
|
||
|
%grid_dim.coerce = alloca { i64, i32 }, align 8
|
||
|
%block_dim.coerce = alloca { i64, i32 }, align 8
|
||
|
store i32* %referrence, i32** %referrence.addr, align 8
|
||
|
store i32* %matrix_cuda, i32** %matrix_cuda.addr, align 8
|
||
|
store i32 %cols, i32* %cols.addr, align 4
|
||
|
store i32 %penalty, i32* %penalty.addr, align 4
|
||
|
store i32 %i, i32* %i.addr, align 4
|
||
|
store i32 %block_width, i32* %block_width.addr, align 4
|
||
|
%kernel_args = alloca i8*, i64 6, align 16
|
||
|
%0 = bitcast i32** %referrence.addr to i8*
|
||
|
%1 = getelementptr i8*, i8** %kernel_args, i32 0
|
||
|
store i8* %0, i8** %1
|
||
|
%2 = bitcast i32** %matrix_cuda.addr to i8*
|
||
|
%3 = getelementptr i8*, i8** %kernel_args, i32 1
|
||
|
store i8* %2, i8** %3
|
||
|
%4 = bitcast i32* %cols.addr to i8*
|
||
|
%5 = getelementptr i8*, i8** %kernel_args, i32 2
|
||
|
store i8* %4, i8** %5
|
||
|
%6 = bitcast i32* %penalty.addr to i8*
|
||
|
%7 = getelementptr i8*, i8** %kernel_args, i32 3
|
||
|
store i8* %6, i8** %7
|
||
|
%8 = bitcast i32* %i.addr to i8*
|
||
|
%9 = getelementptr i8*, i8** %kernel_args, i32 4
|
||
|
store i8* %8, i8** %9
|
||
|
%10 = bitcast i32* %block_width.addr to i8*
|
||
|
%11 = getelementptr i8*, i8** %kernel_args, i32 5
|
||
|
store i8* %10, i8** %11
|
||
|
%12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream)
|
||
|
%13 = load i64, i64* %shmem_size, align 8
|
||
|
%14 = load i8*, i8** %stream, align 8
|
||
|
%15 = bitcast { i64, i32 }* %grid_dim.coerce to i8*
|
||
|
%16 = bitcast %struct.dim3* %grid_dim to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false)
|
||
|
%17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0
|
||
|
%18 = load i64, i64* %17, align 8
|
||
|
%19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1
|
||
|
%20 = load i32, i32* %19, align 8
|
||
|
%21 = bitcast { i64, i32 }* %block_dim.coerce to i8*
|
||
|
%22 = bitcast %struct.dim3* %block_dim to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false)
|
||
|
%23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0
|
||
|
%24 = load i64, i64* %23, align 8
|
||
|
%25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1
|
||
|
%26 = load i32, i32* %25, align 8
|
||
|
%27 = bitcast i8* %14 to %struct.CUstream_st*
|
||
|
%call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32*, i32*, i32, i32, i32, i32)* @_Z20needle_cuda_shared_2PiS_iiii to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27)
|
||
|
br label %setup.end
|
||
|
|
||
|
setup.end: ; preds = %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||
|
define dso_local i32 @_Z7maximumiii(i32 %a, i32 %b, i32 %c) #2 {
|
||
|
entry:
|
||
|
%retval = alloca i32, align 4
|
||
|
%a.addr = alloca i32, align 4
|
||
|
%b.addr = alloca i32, align 4
|
||
|
%c.addr = alloca i32, align 4
|
||
|
%k = alloca i32, align 4
|
||
|
store i32 %a, i32* %a.addr, align 4
|
||
|
store i32 %b, i32* %b.addr, align 4
|
||
|
store i32 %c, i32* %c.addr, align 4
|
||
|
%0 = load i32, i32* %a.addr, align 4
|
||
|
%1 = load i32, i32* %b.addr, align 4
|
||
|
%cmp = icmp sle i32 %0, %1
|
||
|
br i1 %cmp, label %if.then, label %if.else
|
||
|
|
||
|
if.then: ; preds = %entry
|
||
|
%2 = load i32, i32* %b.addr, align 4
|
||
|
store i32 %2, i32* %k, align 4
|
||
|
br label %if.end
|
||
|
|
||
|
if.else: ; preds = %entry
|
||
|
%3 = load i32, i32* %a.addr, align 4
|
||
|
store i32 %3, i32* %k, align 4
|
||
|
br label %if.end
|
||
|
|
||
|
if.end: ; preds = %if.else, %if.then
|
||
|
%4 = load i32, i32* %k, align 4
|
||
|
%5 = load i32, i32* %c.addr, align 4
|
||
|
%cmp1 = icmp sle i32 %4, %5
|
||
|
br i1 %cmp1, label %if.then2, label %if.else3
|
||
|
|
||
|
if.then2: ; preds = %if.end
|
||
|
%6 = load i32, i32* %c.addr, align 4
|
||
|
store i32 %6, i32* %retval, align 4
|
||
|
br label %return
|
||
|
|
||
|
if.else3: ; preds = %if.end
|
||
|
%7 = load i32, i32* %k, align 4
|
||
|
store i32 %7, i32* %retval, align 4
|
||
|
br label %return
|
||
|
|
||
|
return: ; preds = %if.else3, %if.then2
|
||
|
%8 = load i32, i32* %retval, align 4
|
||
|
ret i32 %8
|
||
|
}
|
||
|
|
||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||
|
define dso_local double @_Z7gettimev() #2 {
|
||
|
entry:
|
||
|
%t = alloca %struct.timeval, align 8
|
||
|
%call = call i32 @gettimeofday(%struct.timeval* %t, %struct.timezone* null) #8
|
||
|
%tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %t, i32 0, i32 0
|
||
|
%0 = load i64, i64* %tv_sec, align 8
|
||
|
%conv = sitofp i64 %0 to double
|
||
|
%tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %t, i32 0, i32 1
|
||
|
%1 = load i64, i64* %tv_usec, align 8
|
||
|
%conv1 = sitofp i64 %1 to double
|
||
|
%mul = fmul contract double %conv1, 0x3EB0C6F7A0B5ED8D
|
||
|
%add = fadd contract double %conv, %mul
|
||
|
ret double %add
|
||
|
}
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #3
|
||
|
|
||
|
; Function Attrs: noinline norecurse optnone uwtable
|
||
|
define dso_local i32 @main(i32 %argc, i8** %argv) #4 {
|
||
|
entry:
|
||
|
%retval = alloca i32, align 4
|
||
|
%argc.addr = alloca i32, align 4
|
||
|
%argv.addr = alloca i8**, align 8
|
||
|
store i32 0, i32* %retval, align 4
|
||
|
store i32 %argc, i32* %argc.addr, align 4
|
||
|
store i8** %argv, i8*** %argv.addr, align 8
|
||
|
%call = call i32 @cudaSetDevice(i32 0)
|
||
|
%call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str, i64 0, i64 0), i32 16)
|
||
|
%0 = load i32, i32* %argc.addr, align 4
|
||
|
%1 = load i8**, i8*** %argv.addr, align 8
|
||
|
call void @_Z7runTestiPPc(i32 %0, i8** %1)
|
||
|
ret i32 0
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @cudaSetDevice(i32) #5
|
||
|
|
||
|
declare dso_local i32 @printf(i8*, ...) #5
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z7runTestiPPc(i32 %argc, i8** %argv) #0 {
|
||
|
entry:
|
||
|
%argc.addr = alloca i32, align 4
|
||
|
%argv.addr = alloca i8**, align 8
|
||
|
%max_rows = alloca i32, align 4
|
||
|
%max_cols = alloca i32, align 4
|
||
|
%penalty = alloca i32, align 4
|
||
|
%input_itemsets = alloca i32*, align 8
|
||
|
%output_itemsets = alloca i32*, align 8
|
||
|
%referrence = alloca i32*, align 8
|
||
|
%matrix_cuda = alloca i32*, align 8
|
||
|
%referrence_cuda = alloca i32*, align 8
|
||
|
%size = alloca i32, align 4
|
||
|
%i = alloca i32, align 4
|
||
|
%j = alloca i32, align 4
|
||
|
%i36 = alloca i32, align 4
|
||
|
%j49 = alloca i32, align 4
|
||
|
%i61 = alloca i32, align 4
|
||
|
%j65 = alloca i32, align 4
|
||
|
%i88 = alloca i32, align 4
|
||
|
%j99 = alloca i32, align 4
|
||
|
%dimGrid = alloca %struct.dim3, align 4
|
||
|
%dimBlock = alloca %struct.dim3, align 4
|
||
|
%block_width = alloca i32, align 4
|
||
|
%i126 = alloca i32, align 4
|
||
|
%agg.tmp = alloca %struct.dim3, align 4
|
||
|
%agg.tmp130 = alloca %struct.dim3, align 4
|
||
|
%agg.tmp.coerce = alloca { i64, i32 }, align 4
|
||
|
%agg.tmp130.coerce = alloca { i64, i32 }, align 4
|
||
|
%i142 = alloca i32, align 4
|
||
|
%agg.tmp149 = alloca %struct.dim3, align 4
|
||
|
%agg.tmp150 = alloca %struct.dim3, align 4
|
||
|
%agg.tmp149.coerce = alloca { i64, i32 }, align 4
|
||
|
%agg.tmp150.coerce = alloca { i64, i32 }, align 4
|
||
|
%fpo = alloca %struct._IO_FILE*, align 8
|
||
|
%i163 = alloca i32, align 4
|
||
|
%j165 = alloca i32, align 4
|
||
|
%nw = alloca i32, align 4
|
||
|
%n = alloca i32, align 4
|
||
|
%w = alloca i32, align 4
|
||
|
%traceback = alloca i32, align 4
|
||
|
%new_nw = alloca i32, align 4
|
||
|
%new_w = alloca i32, align 4
|
||
|
%new_n = alloca i32, align 4
|
||
|
store i32 %argc, i32* %argc.addr, align 4
|
||
|
store i8** %argv, i8*** %argv.addr, align 8
|
||
|
%0 = load i32, i32* %argc.addr, align 4
|
||
|
%cmp = icmp eq i32 %0, 3
|
||
|
br i1 %cmp, label %if.then, label %if.else
|
||
|
|
||
|
if.then: ; preds = %entry
|
||
|
%1 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx = getelementptr inbounds i8*, i8** %1, i64 1
|
||
|
%2 = load i8*, i8** %arrayidx, align 8
|
||
|
%call = call i32 @atoi(i8* %2) #9
|
||
|
store i32 %call, i32* %max_rows, align 4
|
||
|
%3 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx1 = getelementptr inbounds i8*, i8** %3, i64 1
|
||
|
%4 = load i8*, i8** %arrayidx1, align 8
|
||
|
%call2 = call i32 @atoi(i8* %4) #9
|
||
|
store i32 %call2, i32* %max_cols, align 4
|
||
|
%5 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx3 = getelementptr inbounds i8*, i8** %5, i64 2
|
||
|
%6 = load i8*, i8** %arrayidx3, align 8
|
||
|
%call4 = call i32 @atoi(i8* %6) #9
|
||
|
store i32 %call4, i32* %penalty, align 4
|
||
|
br label %if.end
|
||
|
|
||
|
if.else: ; preds = %entry
|
||
|
%7 = load i32, i32* %argc.addr, align 4
|
||
|
%8 = load i8**, i8*** %argv.addr, align 8
|
||
|
call void @_Z5usageiPPc(i32 %7, i8** %8)
|
||
|
br label %if.end
|
||
|
|
||
|
if.end: ; preds = %if.else, %if.then
|
||
|
%9 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx5 = getelementptr inbounds i8*, i8** %9, i64 1
|
||
|
%10 = load i8*, i8** %arrayidx5, align 8
|
||
|
%call6 = call i32 @atoi(i8* %10) #9
|
||
|
%rem = srem i32 %call6, 16
|
||
|
%cmp7 = icmp ne i32 %rem, 0
|
||
|
br i1 %cmp7, label %if.then8, label %if.end10
|
||
|
|
||
|
if.then8: ; preds = %if.end
|
||
|
%11 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %11, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.4, i64 0, i64 0))
|
||
|
call void @exit(i32 1) #10
|
||
|
unreachable
|
||
|
|
||
|
if.end10: ; preds = %if.end
|
||
|
%12 = load i32, i32* %max_rows, align 4
|
||
|
%add = add nsw i32 %12, 1
|
||
|
store i32 %add, i32* %max_rows, align 4
|
||
|
%13 = load i32, i32* %max_cols, align 4
|
||
|
%add11 = add nsw i32 %13, 1
|
||
|
store i32 %add11, i32* %max_cols, align 4
|
||
|
%14 = load i32, i32* %max_rows, align 4
|
||
|
%15 = load i32, i32* %max_cols, align 4
|
||
|
%mul = mul nsw i32 %14, %15
|
||
|
%conv = sext i32 %mul to i64
|
||
|
%mul12 = mul i64 %conv, 4
|
||
|
%call13 = call noalias i8* @malloc(i64 %mul12) #8
|
||
|
%16 = bitcast i8* %call13 to i32*
|
||
|
store i32* %16, i32** %referrence, align 8
|
||
|
%17 = load i32, i32* %max_rows, align 4
|
||
|
%18 = load i32, i32* %max_cols, align 4
|
||
|
%mul14 = mul nsw i32 %17, %18
|
||
|
%conv15 = sext i32 %mul14 to i64
|
||
|
%mul16 = mul i64 %conv15, 4
|
||
|
%call17 = call noalias i8* @malloc(i64 %mul16) #8
|
||
|
%19 = bitcast i8* %call17 to i32*
|
||
|
store i32* %19, i32** %input_itemsets, align 8
|
||
|
%20 = load i32, i32* %max_rows, align 4
|
||
|
%21 = load i32, i32* %max_cols, align 4
|
||
|
%mul18 = mul nsw i32 %20, %21
|
||
|
%conv19 = sext i32 %mul18 to i64
|
||
|
%mul20 = mul i64 %conv19, 4
|
||
|
%call21 = call noalias i8* @malloc(i64 %mul20) #8
|
||
|
%22 = bitcast i8* %call21 to i32*
|
||
|
store i32* %22, i32** %output_itemsets, align 8
|
||
|
%23 = load i32*, i32** %input_itemsets, align 8
|
||
|
%tobool = icmp ne i32* %23, null
|
||
|
br i1 %tobool, label %if.end24, label %if.then22
|
||
|
|
||
|
if.then22: ; preds = %if.end10
|
||
|
%24 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%call23 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %24, i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str.5, i64 0, i64 0))
|
||
|
br label %if.end24
|
||
|
|
||
|
if.end24: ; preds = %if.then22, %if.end10
|
||
|
call void @srand(i32 7) #8
|
||
|
store i32 0, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.cond: ; preds = %for.inc32, %if.end24
|
||
|
%25 = load i32, i32* %i, align 4
|
||
|
%26 = load i32, i32* %max_cols, align 4
|
||
|
%cmp25 = icmp slt i32 %25, %26
|
||
|
br i1 %cmp25, label %for.body, label %for.end34
|
||
|
|
||
|
for.body: ; preds = %for.cond
|
||
|
store i32 0, i32* %j, align 4
|
||
|
br label %for.cond26
|
||
|
|
||
|
for.cond26: ; preds = %for.inc, %for.body
|
||
|
%27 = load i32, i32* %j, align 4
|
||
|
%28 = load i32, i32* %max_rows, align 4
|
||
|
%cmp27 = icmp slt i32 %27, %28
|
||
|
br i1 %cmp27, label %for.body28, label %for.end
|
||
|
|
||
|
for.body28: ; preds = %for.cond26
|
||
|
%29 = load i32*, i32** %input_itemsets, align 8
|
||
|
%30 = load i32, i32* %i, align 4
|
||
|
%31 = load i32, i32* %max_cols, align 4
|
||
|
%mul29 = mul nsw i32 %30, %31
|
||
|
%32 = load i32, i32* %j, align 4
|
||
|
%add30 = add nsw i32 %mul29, %32
|
||
|
%idxprom = sext i32 %add30 to i64
|
||
|
%arrayidx31 = getelementptr inbounds i32, i32* %29, i64 %idxprom
|
||
|
store i32 0, i32* %arrayidx31, align 4
|
||
|
br label %for.inc
|
||
|
|
||
|
for.inc: ; preds = %for.body28
|
||
|
%33 = load i32, i32* %j, align 4
|
||
|
%inc = add nsw i32 %33, 1
|
||
|
store i32 %inc, i32* %j, align 4
|
||
|
br label %for.cond26
|
||
|
|
||
|
for.end: ; preds = %for.cond26
|
||
|
br label %for.inc32
|
||
|
|
||
|
for.inc32: ; preds = %for.end
|
||
|
%34 = load i32, i32* %i, align 4
|
||
|
%inc33 = add nsw i32 %34, 1
|
||
|
store i32 %inc33, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.end34: ; preds = %for.cond
|
||
|
%call35 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.6, i64 0, i64 0))
|
||
|
store i32 1, i32* %i36, align 4
|
||
|
br label %for.cond37
|
||
|
|
||
|
for.cond37: ; preds = %for.inc46, %for.end34
|
||
|
%35 = load i32, i32* %i36, align 4
|
||
|
%36 = load i32, i32* %max_rows, align 4
|
||
|
%cmp38 = icmp slt i32 %35, %36
|
||
|
br i1 %cmp38, label %for.body39, label %for.end48
|
||
|
|
||
|
for.body39: ; preds = %for.cond37
|
||
|
%call40 = call i32 @rand() #8
|
||
|
%rem41 = srem i32 %call40, 10
|
||
|
%add42 = add nsw i32 %rem41, 1
|
||
|
%37 = load i32*, i32** %input_itemsets, align 8
|
||
|
%38 = load i32, i32* %i36, align 4
|
||
|
%39 = load i32, i32* %max_cols, align 4
|
||
|
%mul43 = mul nsw i32 %38, %39
|
||
|
%idxprom44 = sext i32 %mul43 to i64
|
||
|
%arrayidx45 = getelementptr inbounds i32, i32* %37, i64 %idxprom44
|
||
|
store i32 %add42, i32* %arrayidx45, align 4
|
||
|
br label %for.inc46
|
||
|
|
||
|
for.inc46: ; preds = %for.body39
|
||
|
%40 = load i32, i32* %i36, align 4
|
||
|
%inc47 = add nsw i32 %40, 1
|
||
|
store i32 %inc47, i32* %i36, align 4
|
||
|
br label %for.cond37
|
||
|
|
||
|
for.end48: ; preds = %for.cond37
|
||
|
store i32 1, i32* %j49, align 4
|
||
|
br label %for.cond50
|
||
|
|
||
|
for.cond50: ; preds = %for.inc58, %for.end48
|
||
|
%41 = load i32, i32* %j49, align 4
|
||
|
%42 = load i32, i32* %max_cols, align 4
|
||
|
%cmp51 = icmp slt i32 %41, %42
|
||
|
br i1 %cmp51, label %for.body52, label %for.end60
|
||
|
|
||
|
for.body52: ; preds = %for.cond50
|
||
|
%call53 = call i32 @rand() #8
|
||
|
%rem54 = srem i32 %call53, 10
|
||
|
%add55 = add nsw i32 %rem54, 1
|
||
|
%43 = load i32*, i32** %input_itemsets, align 8
|
||
|
%44 = load i32, i32* %j49, align 4
|
||
|
%idxprom56 = sext i32 %44 to i64
|
||
|
%arrayidx57 = getelementptr inbounds i32, i32* %43, i64 %idxprom56
|
||
|
store i32 %add55, i32* %arrayidx57, align 4
|
||
|
br label %for.inc58
|
||
|
|
||
|
for.inc58: ; preds = %for.body52
|
||
|
%45 = load i32, i32* %j49, align 4
|
||
|
%inc59 = add nsw i32 %45, 1
|
||
|
store i32 %inc59, i32* %j49, align 4
|
||
|
br label %for.cond50
|
||
|
|
||
|
for.end60: ; preds = %for.cond50
|
||
|
store i32 1, i32* %i61, align 4
|
||
|
br label %for.cond62
|
||
|
|
||
|
for.cond62: ; preds = %for.inc85, %for.end60
|
||
|
%46 = load i32, i32* %i61, align 4
|
||
|
%47 = load i32, i32* %max_cols, align 4
|
||
|
%cmp63 = icmp slt i32 %46, %47
|
||
|
br i1 %cmp63, label %for.body64, label %for.end87
|
||
|
|
||
|
for.body64: ; preds = %for.cond62
|
||
|
store i32 1, i32* %j65, align 4
|
||
|
br label %for.cond66
|
||
|
|
||
|
for.cond66: ; preds = %for.inc82, %for.body64
|
||
|
%48 = load i32, i32* %j65, align 4
|
||
|
%49 = load i32, i32* %max_rows, align 4
|
||
|
%cmp67 = icmp slt i32 %48, %49
|
||
|
br i1 %cmp67, label %for.body68, label %for.end84
|
||
|
|
||
|
for.body68: ; preds = %for.cond66
|
||
|
%50 = load i32*, i32** %input_itemsets, align 8
|
||
|
%51 = load i32, i32* %i61, align 4
|
||
|
%52 = load i32, i32* %max_cols, align 4
|
||
|
%mul69 = mul nsw i32 %51, %52
|
||
|
%idxprom70 = sext i32 %mul69 to i64
|
||
|
%arrayidx71 = getelementptr inbounds i32, i32* %50, i64 %idxprom70
|
||
|
%53 = load i32, i32* %arrayidx71, align 4
|
||
|
%idxprom72 = sext i32 %53 to i64
|
||
|
%arrayidx73 = getelementptr inbounds [24 x [24 x i32]], [24 x [24 x i32]]* @blosum62, i64 0, i64 %idxprom72
|
||
|
%54 = load i32*, i32** %input_itemsets, align 8
|
||
|
%55 = load i32, i32* %j65, align 4
|
||
|
%idxprom74 = sext i32 %55 to i64
|
||
|
%arrayidx75 = getelementptr inbounds i32, i32* %54, i64 %idxprom74
|
||
|
%56 = load i32, i32* %arrayidx75, align 4
|
||
|
%idxprom76 = sext i32 %56 to i64
|
||
|
%arrayidx77 = getelementptr inbounds [24 x i32], [24 x i32]* %arrayidx73, i64 0, i64 %idxprom76
|
||
|
%57 = load i32, i32* %arrayidx77, align 4
|
||
|
%58 = load i32*, i32** %referrence, align 8
|
||
|
%59 = load i32, i32* %i61, align 4
|
||
|
%60 = load i32, i32* %max_cols, align 4
|
||
|
%mul78 = mul nsw i32 %59, %60
|
||
|
%61 = load i32, i32* %j65, align 4
|
||
|
%add79 = add nsw i32 %mul78, %61
|
||
|
%idxprom80 = sext i32 %add79 to i64
|
||
|
%arrayidx81 = getelementptr inbounds i32, i32* %58, i64 %idxprom80
|
||
|
store i32 %57, i32* %arrayidx81, align 4
|
||
|
br label %for.inc82
|
||
|
|
||
|
for.inc82: ; preds = %for.body68
|
||
|
%62 = load i32, i32* %j65, align 4
|
||
|
%inc83 = add nsw i32 %62, 1
|
||
|
store i32 %inc83, i32* %j65, align 4
|
||
|
br label %for.cond66
|
||
|
|
||
|
for.end84: ; preds = %for.cond66
|
||
|
br label %for.inc85
|
||
|
|
||
|
for.inc85: ; preds = %for.end84
|
||
|
%63 = load i32, i32* %i61, align 4
|
||
|
%inc86 = add nsw i32 %63, 1
|
||
|
store i32 %inc86, i32* %i61, align 4
|
||
|
br label %for.cond62
|
||
|
|
||
|
for.end87: ; preds = %for.cond62
|
||
|
store i32 1, i32* %i88, align 4
|
||
|
br label %for.cond89
|
||
|
|
||
|
for.cond89: ; preds = %for.inc96, %for.end87
|
||
|
%64 = load i32, i32* %i88, align 4
|
||
|
%65 = load i32, i32* %max_rows, align 4
|
||
|
%cmp90 = icmp slt i32 %64, %65
|
||
|
br i1 %cmp90, label %for.body91, label %for.end98
|
||
|
|
||
|
for.body91: ; preds = %for.cond89
|
||
|
%66 = load i32, i32* %i88, align 4
|
||
|
%sub = sub nsw i32 0, %66
|
||
|
%67 = load i32, i32* %penalty, align 4
|
||
|
%mul92 = mul nsw i32 %sub, %67
|
||
|
%68 = load i32*, i32** %input_itemsets, align 8
|
||
|
%69 = load i32, i32* %i88, align 4
|
||
|
%70 = load i32, i32* %max_cols, align 4
|
||
|
%mul93 = mul nsw i32 %69, %70
|
||
|
%idxprom94 = sext i32 %mul93 to i64
|
||
|
%arrayidx95 = getelementptr inbounds i32, i32* %68, i64 %idxprom94
|
||
|
store i32 %mul92, i32* %arrayidx95, align 4
|
||
|
br label %for.inc96
|
||
|
|
||
|
for.inc96: ; preds = %for.body91
|
||
|
%71 = load i32, i32* %i88, align 4
|
||
|
%inc97 = add nsw i32 %71, 1
|
||
|
store i32 %inc97, i32* %i88, align 4
|
||
|
br label %for.cond89
|
||
|
|
||
|
for.end98: ; preds = %for.cond89
|
||
|
store i32 1, i32* %j99, align 4
|
||
|
br label %for.cond100
|
||
|
|
||
|
for.cond100: ; preds = %for.inc107, %for.end98
|
||
|
%72 = load i32, i32* %j99, align 4
|
||
|
%73 = load i32, i32* %max_cols, align 4
|
||
|
%cmp101 = icmp slt i32 %72, %73
|
||
|
br i1 %cmp101, label %for.body102, label %for.end109
|
||
|
|
||
|
for.body102: ; preds = %for.cond100
|
||
|
%74 = load i32, i32* %j99, align 4
|
||
|
%sub103 = sub nsw i32 0, %74
|
||
|
%75 = load i32, i32* %penalty, align 4
|
||
|
%mul104 = mul nsw i32 %sub103, %75
|
||
|
%76 = load i32*, i32** %input_itemsets, align 8
|
||
|
%77 = load i32, i32* %j99, align 4
|
||
|
%idxprom105 = sext i32 %77 to i64
|
||
|
%arrayidx106 = getelementptr inbounds i32, i32* %76, i64 %idxprom105
|
||
|
store i32 %mul104, i32* %arrayidx106, align 4
|
||
|
br label %for.inc107
|
||
|
|
||
|
for.inc107: ; preds = %for.body102
|
||
|
%78 = load i32, i32* %j99, align 4
|
||
|
%inc108 = add nsw i32 %78, 1
|
||
|
store i32 %inc108, i32* %j99, align 4
|
||
|
br label %for.cond100
|
||
|
|
||
|
for.end109: ; preds = %for.cond100
|
||
|
%79 = load i32, i32* %max_cols, align 4
|
||
|
%80 = load i32, i32* %max_rows, align 4
|
||
|
%mul110 = mul nsw i32 %79, %80
|
||
|
store i32 %mul110, i32* %size, align 4
|
||
|
%81 = bitcast i32** %referrence_cuda to i8**
|
||
|
%82 = load i32, i32* %size, align 4
|
||
|
%conv111 = sext i32 %82 to i64
|
||
|
%mul112 = mul i64 4, %conv111
|
||
|
%call113 = call i32 @cudaMalloc(i8** %81, i64 %mul112)
|
||
|
%83 = bitcast i32** %matrix_cuda to i8**
|
||
|
%84 = load i32, i32* %size, align 4
|
||
|
%conv114 = sext i32 %84 to i64
|
||
|
%mul115 = mul i64 4, %conv114
|
||
|
%call116 = call i32 @cudaMalloc(i8** %83, i64 %mul115)
|
||
|
%85 = load i32*, i32** %referrence_cuda, align 8
|
||
|
%86 = bitcast i32* %85 to i8*
|
||
|
%87 = load i32*, i32** %referrence, align 8
|
||
|
%88 = bitcast i32* %87 to i8*
|
||
|
%89 = load i32, i32* %size, align 4
|
||
|
%conv117 = sext i32 %89 to i64
|
||
|
%mul118 = mul i64 4, %conv117
|
||
|
%call119 = call i32 @cudaMemcpy(i8* %86, i8* %88, i64 %mul118, i32 1)
|
||
|
%90 = load i32*, i32** %matrix_cuda, align 8
|
||
|
%91 = bitcast i32* %90 to i8*
|
||
|
%92 = load i32*, i32** %input_itemsets, align 8
|
||
|
%93 = bitcast i32* %92 to i8*
|
||
|
%94 = load i32, i32* %size, align 4
|
||
|
%conv120 = sext i32 %94 to i64
|
||
|
%mul121 = mul i64 4, %conv120
|
||
|
%call122 = call i32 @cudaMemcpy(i8* %91, i8* %93, i64 %mul121, i32 1)
|
||
|
%call123 = call i32 @cudaDeviceSynchronize()
|
||
|
call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimGrid, i32 1, i32 1, i32 1)
|
||
|
call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimBlock, i32 16, i32 1, i32 1)
|
||
|
%95 = load i32, i32* %max_cols, align 4
|
||
|
%sub124 = sub nsw i32 %95, 1
|
||
|
%div = sdiv i32 %sub124, 16
|
||
|
store i32 %div, i32* %block_width, align 4
|
||
|
%call125 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.7, i64 0, i64 0))
|
||
|
store i32 1, i32* %i126, align 4
|
||
|
br label %for.cond127
|
||
|
|
||
|
for.cond127: ; preds = %for.inc134, %for.end109
|
||
|
%96 = load i32, i32* %i126, align 4
|
||
|
%97 = load i32, i32* %block_width, align 4
|
||
|
%cmp128 = icmp sle i32 %96, %97
|
||
|
br i1 %cmp128, label %for.body129, label %for.end136
|
||
|
|
||
|
for.body129: ; preds = %for.cond127
|
||
|
%98 = load i32, i32* %i126, align 4
|
||
|
%x = getelementptr inbounds %struct.dim3, %struct.dim3* %dimGrid, i32 0, i32 0
|
||
|
store i32 %98, i32* %x, align 4
|
||
|
%y = getelementptr inbounds %struct.dim3, %struct.dim3* %dimGrid, i32 0, i32 1
|
||
|
store i32 1, i32* %y, align 4
|
||
|
%99 = bitcast %struct.dim3* %agg.tmp to i8*
|
||
|
%100 = bitcast %struct.dim3* %dimGrid to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %99, i8* align 4 %100, i64 12, i1 false)
|
||
|
%101 = bitcast %struct.dim3* %agg.tmp130 to i8*
|
||
|
%102 = bitcast %struct.dim3* %dimBlock to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %101, i8* align 4 %102, i64 12, i1 false)
|
||
|
%103 = bitcast { i64, i32 }* %agg.tmp.coerce to i8*
|
||
|
%104 = bitcast %struct.dim3* %agg.tmp to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %103, i8* align 4 %104, i64 12, i1 false)
|
||
|
%105 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0
|
||
|
%106 = load i64, i64* %105, align 4
|
||
|
%107 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1
|
||
|
%108 = load i32, i32* %107, align 4
|
||
|
%109 = bitcast { i64, i32 }* %agg.tmp130.coerce to i8*
|
||
|
%110 = bitcast %struct.dim3* %agg.tmp130 to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %109, i8* align 4 %110, i64 12, i1 false)
|
||
|
%111 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp130.coerce, i32 0, i32 0
|
||
|
%112 = load i64, i64* %111, align 4
|
||
|
%113 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp130.coerce, i32 0, i32 1
|
||
|
%114 = load i32, i32* %113, align 4
|
||
|
%call131 = call i32 @__cudaPushCallConfiguration(i64 %106, i32 %108, i64 %112, i32 %114, i64 0, i8* null)
|
||
|
%tobool132 = icmp ne i32 %call131, 0
|
||
|
br i1 %tobool132, label %kcall.end, label %kcall.configok
|
||
|
|
||
|
kcall.configok: ; preds = %for.body129
|
||
|
%115 = load i32*, i32** %referrence_cuda, align 8
|
||
|
%116 = load i32*, i32** %matrix_cuda, align 8
|
||
|
%117 = load i32, i32* %max_cols, align 4
|
||
|
%118 = load i32, i32* %penalty, align 4
|
||
|
%119 = load i32, i32* %i126, align 4
|
||
|
%120 = load i32, i32* %block_width, align 4
|
||
|
call void @_Z20needle_cuda_shared_1PiS_iiii(i32* %115, i32* %116, i32 %117, i32 %118, i32 %119, i32 %120)
|
||
|
br label %kcall.end
|
||
|
|
||
|
kcall.end: ; preds = %kcall.configok, %for.body129
|
||
|
%call133 = call i32 @cudaDeviceSynchronize()
|
||
|
br label %for.inc134
|
||
|
|
||
|
for.inc134: ; preds = %kcall.end
|
||
|
%121 = load i32, i32* %i126, align 4
|
||
|
%inc135 = add nsw i32 %121, 1
|
||
|
store i32 %inc135, i32* %i126, align 4
|
||
|
br label %for.cond127
|
||
|
|
||
|
for.end136: ; preds = %for.cond127
|
||
|
%122 = load i32*, i32** %output_itemsets, align 8
|
||
|
%123 = bitcast i32* %122 to i8*
|
||
|
%124 = load i32*, i32** %matrix_cuda, align 8
|
||
|
%125 = bitcast i32* %124 to i8*
|
||
|
%126 = load i32, i32* %size, align 4
|
||
|
%conv137 = sext i32 %126 to i64
|
||
|
%mul138 = mul i64 4, %conv137
|
||
|
%call139 = call i32 @cudaMemcpy(i8* %123, i8* %125, i64 %mul138, i32 2)
|
||
|
%call140 = call i32 @cudaDeviceSynchronize()
|
||
|
%call141 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str.8, i64 0, i64 0))
|
||
|
%127 = load i32, i32* %block_width, align 4
|
||
|
%sub143 = sub nsw i32 %127, 1
|
||
|
store i32 %sub143, i32* %i142, align 4
|
||
|
br label %for.cond144
|
||
|
|
||
|
for.cond144: ; preds = %for.inc156, %for.end136
|
||
|
%128 = load i32, i32* %i142, align 4
|
||
|
%cmp145 = icmp sge i32 %128, 1
|
||
|
br i1 %cmp145, label %for.body146, label %for.end157
|
||
|
|
||
|
for.body146: ; preds = %for.cond144
|
||
|
%129 = load i32, i32* %i142, align 4
|
||
|
%x147 = getelementptr inbounds %struct.dim3, %struct.dim3* %dimGrid, i32 0, i32 0
|
||
|
store i32 %129, i32* %x147, align 4
|
||
|
%y148 = getelementptr inbounds %struct.dim3, %struct.dim3* %dimGrid, i32 0, i32 1
|
||
|
store i32 1, i32* %y148, align 4
|
||
|
%130 = bitcast %struct.dim3* %agg.tmp149 to i8*
|
||
|
%131 = bitcast %struct.dim3* %dimGrid to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %130, i8* align 4 %131, i64 12, i1 false)
|
||
|
%132 = bitcast %struct.dim3* %agg.tmp150 to i8*
|
||
|
%133 = bitcast %struct.dim3* %dimBlock to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %132, i8* align 4 %133, i64 12, i1 false)
|
||
|
%134 = bitcast { i64, i32 }* %agg.tmp149.coerce to i8*
|
||
|
%135 = bitcast %struct.dim3* %agg.tmp149 to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %134, i8* align 4 %135, i64 12, i1 false)
|
||
|
%136 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp149.coerce, i32 0, i32 0
|
||
|
%137 = load i64, i64* %136, align 4
|
||
|
%138 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp149.coerce, i32 0, i32 1
|
||
|
%139 = load i32, i32* %138, align 4
|
||
|
%140 = bitcast { i64, i32 }* %agg.tmp150.coerce to i8*
|
||
|
%141 = bitcast %struct.dim3* %agg.tmp150 to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %140, i8* align 4 %141, i64 12, i1 false)
|
||
|
%142 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp150.coerce, i32 0, i32 0
|
||
|
%143 = load i64, i64* %142, align 4
|
||
|
%144 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp150.coerce, i32 0, i32 1
|
||
|
%145 = load i32, i32* %144, align 4
|
||
|
%call151 = call i32 @__cudaPushCallConfiguration(i64 %137, i32 %139, i64 %143, i32 %145, i64 0, i8* null)
|
||
|
%tobool152 = icmp ne i32 %call151, 0
|
||
|
br i1 %tobool152, label %kcall.end154, label %kcall.configok153
|
||
|
|
||
|
kcall.configok153: ; preds = %for.body146
|
||
|
%146 = load i32*, i32** %referrence_cuda, align 8
|
||
|
%147 = load i32*, i32** %matrix_cuda, align 8
|
||
|
%148 = load i32, i32* %max_cols, align 4
|
||
|
%149 = load i32, i32* %penalty, align 4
|
||
|
%150 = load i32, i32* %i142, align 4
|
||
|
%151 = load i32, i32* %block_width, align 4
|
||
|
call void @_Z20needle_cuda_shared_2PiS_iiii(i32* %146, i32* %147, i32 %148, i32 %149, i32 %150, i32 %151)
|
||
|
br label %kcall.end154
|
||
|
|
||
|
kcall.end154: ; preds = %kcall.configok153, %for.body146
|
||
|
%call155 = call i32 @cudaDeviceSynchronize()
|
||
|
br label %for.inc156
|
||
|
|
||
|
for.inc156: ; preds = %kcall.end154
|
||
|
%152 = load i32, i32* %i142, align 4
|
||
|
%dec = add nsw i32 %152, -1
|
||
|
store i32 %dec, i32* %i142, align 4
|
||
|
br label %for.cond144
|
||
|
|
||
|
for.end157: ; preds = %for.cond144
|
||
|
%153 = load i32*, i32** %output_itemsets, align 8
|
||
|
%154 = bitcast i32* %153 to i8*
|
||
|
%155 = load i32*, i32** %matrix_cuda, align 8
|
||
|
%156 = bitcast i32* %155 to i8*
|
||
|
%157 = load i32, i32* %size, align 4
|
||
|
%conv158 = sext i32 %157 to i64
|
||
|
%mul159 = mul i64 4, %conv158
|
||
|
%call160 = call i32 @cudaMemcpy(i8* %154, i8* %156, i64 %mul159, i32 2)
|
||
|
%call161 = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.9, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.10, i64 0, i64 0))
|
||
|
store %struct._IO_FILE* %call161, %struct._IO_FILE** %fpo, align 8
|
||
|
%158 = load %struct._IO_FILE*, %struct._IO_FILE** %fpo, align 8
|
||
|
%call162 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %158, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.11, i64 0, i64 0))
|
||
|
%159 = load i32, i32* %max_rows, align 4
|
||
|
%sub164 = sub nsw i32 %159, 2
|
||
|
store i32 %sub164, i32* %i163, align 4
|
||
|
%160 = load i32, i32* %max_rows, align 4
|
||
|
%sub166 = sub nsw i32 %160, 2
|
||
|
store i32 %sub166, i32* %j165, align 4
|
||
|
br label %for.cond167
|
||
|
|
||
|
for.cond167: ; preds = %if.end260, %if.then255, %if.then251, %if.then246, %for.end157
|
||
|
%161 = load i32, i32* %i163, align 4
|
||
|
%cmp168 = icmp sge i32 %161, 0
|
||
|
%162 = load i32, i32* %j165, align 4
|
||
|
%cmp169 = icmp sge i32 %162, 0
|
||
|
br i1 %cmp169, label %for.body170, label %for.end261
|
||
|
|
||
|
for.body170: ; preds = %for.cond167
|
||
|
%163 = load i32, i32* %i163, align 4
|
||
|
%164 = load i32, i32* %max_rows, align 4
|
||
|
%sub171 = sub nsw i32 %164, 2
|
||
|
%cmp172 = icmp eq i32 %163, %sub171
|
||
|
br i1 %cmp172, label %land.lhs.true, label %if.end181
|
||
|
|
||
|
land.lhs.true: ; preds = %for.body170
|
||
|
%165 = load i32, i32* %j165, align 4
|
||
|
%166 = load i32, i32* %max_rows, align 4
|
||
|
%sub173 = sub nsw i32 %166, 2
|
||
|
%cmp174 = icmp eq i32 %165, %sub173
|
||
|
br i1 %cmp174, label %if.then175, label %if.end181
|
||
|
|
||
|
if.then175: ; preds = %land.lhs.true
|
||
|
%167 = load %struct._IO_FILE*, %struct._IO_FILE** %fpo, align 8
|
||
|
%168 = load i32*, i32** %output_itemsets, align 8
|
||
|
%169 = load i32, i32* %i163, align 4
|
||
|
%170 = load i32, i32* %max_cols, align 4
|
||
|
%mul176 = mul nsw i32 %169, %170
|
||
|
%171 = load i32, i32* %j165, align 4
|
||
|
%add177 = add nsw i32 %mul176, %171
|
||
|
%idxprom178 = sext i32 %add177 to i64
|
||
|
%arrayidx179 = getelementptr inbounds i32, i32* %168, i64 %idxprom178
|
||
|
%172 = load i32, i32* %arrayidx179, align 4
|
||
|
%call180 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %167, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.12, i64 0, i64 0), i32 %172)
|
||
|
br label %if.end181
|
||
|
|
||
|
if.end181: ; preds = %if.then175, %land.lhs.true, %for.body170
|
||
|
%173 = load i32, i32* %i163, align 4
|
||
|
%cmp182 = icmp eq i32 %173, 0
|
||
|
br i1 %cmp182, label %land.lhs.true183, label %if.end186
|
||
|
|
||
|
land.lhs.true183: ; preds = %if.end181
|
||
|
%174 = load i32, i32* %j165, align 4
|
||
|
%cmp184 = icmp eq i32 %174, 0
|
||
|
br i1 %cmp184, label %if.then185, label %if.end186
|
||
|
|
||
|
if.then185: ; preds = %land.lhs.true183
|
||
|
br label %for.end261
|
||
|
|
||
|
if.end186: ; preds = %land.lhs.true183, %if.end181
|
||
|
%175 = load i32, i32* %i163, align 4
|
||
|
%cmp187 = icmp sgt i32 %175, 0
|
||
|
br i1 %cmp187, label %land.lhs.true188, label %if.else207
|
||
|
|
||
|
land.lhs.true188: ; preds = %if.end186
|
||
|
%176 = load i32, i32* %j165, align 4
|
||
|
%cmp189 = icmp sgt i32 %176, 0
|
||
|
br i1 %cmp189, label %if.then190, label %if.else207
|
||
|
|
||
|
if.then190: ; preds = %land.lhs.true188
|
||
|
%177 = load i32*, i32** %output_itemsets, align 8
|
||
|
%178 = load i32, i32* %i163, align 4
|
||
|
%sub191 = sub nsw i32 %178, 1
|
||
|
%179 = load i32, i32* %max_cols, align 4
|
||
|
%mul192 = mul nsw i32 %sub191, %179
|
||
|
%180 = load i32, i32* %j165, align 4
|
||
|
%add193 = add nsw i32 %mul192, %180
|
||
|
%sub194 = sub nsw i32 %add193, 1
|
||
|
%idxprom195 = sext i32 %sub194 to i64
|
||
|
%arrayidx196 = getelementptr inbounds i32, i32* %177, i64 %idxprom195
|
||
|
%181 = load i32, i32* %arrayidx196, align 4
|
||
|
store i32 %181, i32* %nw, align 4
|
||
|
%182 = load i32*, i32** %output_itemsets, align 8
|
||
|
%183 = load i32, i32* %i163, align 4
|
||
|
%184 = load i32, i32* %max_cols, align 4
|
||
|
%mul197 = mul nsw i32 %183, %184
|
||
|
%185 = load i32, i32* %j165, align 4
|
||
|
%add198 = add nsw i32 %mul197, %185
|
||
|
%sub199 = sub nsw i32 %add198, 1
|
||
|
%idxprom200 = sext i32 %sub199 to i64
|
||
|
%arrayidx201 = getelementptr inbounds i32, i32* %182, i64 %idxprom200
|
||
|
%186 = load i32, i32* %arrayidx201, align 4
|
||
|
store i32 %186, i32* %w, align 4
|
||
|
%187 = load i32*, i32** %output_itemsets, align 8
|
||
|
%188 = load i32, i32* %i163, align 4
|
||
|
%sub202 = sub nsw i32 %188, 1
|
||
|
%189 = load i32, i32* %max_cols, align 4
|
||
|
%mul203 = mul nsw i32 %sub202, %189
|
||
|
%190 = load i32, i32* %j165, align 4
|
||
|
%add204 = add nsw i32 %mul203, %190
|
||
|
%idxprom205 = sext i32 %add204 to i64
|
||
|
%arrayidx206 = getelementptr inbounds i32, i32* %187, i64 %idxprom205
|
||
|
%191 = load i32, i32* %arrayidx206, align 4
|
||
|
store i32 %191, i32* %n, align 4
|
||
|
br label %if.end226
|
||
|
|
||
|
if.else207: ; preds = %land.lhs.true188, %if.end186
|
||
|
%192 = load i32, i32* %i163, align 4
|
||
|
%cmp208 = icmp eq i32 %192, 0
|
||
|
br i1 %cmp208, label %if.then209, label %if.else215
|
||
|
|
||
|
if.then209: ; preds = %if.else207
|
||
|
store i32 -999, i32* %n, align 4
|
||
|
store i32 -999, i32* %nw, align 4
|
||
|
%193 = load i32*, i32** %output_itemsets, align 8
|
||
|
%194 = load i32, i32* %i163, align 4
|
||
|
%195 = load i32, i32* %max_cols, align 4
|
||
|
%mul210 = mul nsw i32 %194, %195
|
||
|
%196 = load i32, i32* %j165, align 4
|
||
|
%add211 = add nsw i32 %mul210, %196
|
||
|
%sub212 = sub nsw i32 %add211, 1
|
||
|
%idxprom213 = sext i32 %sub212 to i64
|
||
|
%arrayidx214 = getelementptr inbounds i32, i32* %193, i64 %idxprom213
|
||
|
%197 = load i32, i32* %arrayidx214, align 4
|
||
|
store i32 %197, i32* %w, align 4
|
||
|
br label %if.end225
|
||
|
|
||
|
if.else215: ; preds = %if.else207
|
||
|
%198 = load i32, i32* %j165, align 4
|
||
|
%cmp216 = icmp eq i32 %198, 0
|
||
|
br i1 %cmp216, label %if.then217, label %if.else223
|
||
|
|
||
|
if.then217: ; preds = %if.else215
|
||
|
store i32 -999, i32* %w, align 4
|
||
|
store i32 -999, i32* %nw, align 4
|
||
|
%199 = load i32*, i32** %output_itemsets, align 8
|
||
|
%200 = load i32, i32* %i163, align 4
|
||
|
%sub218 = sub nsw i32 %200, 1
|
||
|
%201 = load i32, i32* %max_cols, align 4
|
||
|
%mul219 = mul nsw i32 %sub218, %201
|
||
|
%202 = load i32, i32* %j165, align 4
|
||
|
%add220 = add nsw i32 %mul219, %202
|
||
|
%idxprom221 = sext i32 %add220 to i64
|
||
|
%arrayidx222 = getelementptr inbounds i32, i32* %199, i64 %idxprom221
|
||
|
%203 = load i32, i32* %arrayidx222, align 4
|
||
|
store i32 %203, i32* %n, align 4
|
||
|
br label %if.end224
|
||
|
|
||
|
if.else223: ; preds = %if.else215
|
||
|
br label %if.end224
|
||
|
|
||
|
if.end224: ; preds = %if.else223, %if.then217
|
||
|
br label %if.end225
|
||
|
|
||
|
if.end225: ; preds = %if.end224, %if.then209
|
||
|
br label %if.end226
|
||
|
|
||
|
if.end226: ; preds = %if.end225, %if.then190
|
||
|
%204 = load i32, i32* %nw, align 4
|
||
|
%205 = load i32*, i32** %referrence, align 8
|
||
|
%206 = load i32, i32* %i163, align 4
|
||
|
%207 = load i32, i32* %max_cols, align 4
|
||
|
%mul227 = mul nsw i32 %206, %207
|
||
|
%208 = load i32, i32* %j165, align 4
|
||
|
%add228 = add nsw i32 %mul227, %208
|
||
|
%idxprom229 = sext i32 %add228 to i64
|
||
|
%arrayidx230 = getelementptr inbounds i32, i32* %205, i64 %idxprom229
|
||
|
%209 = load i32, i32* %arrayidx230, align 4
|
||
|
%add231 = add nsw i32 %204, %209
|
||
|
store i32 %add231, i32* %new_nw, align 4
|
||
|
%210 = load i32, i32* %w, align 4
|
||
|
%211 = load i32, i32* %penalty, align 4
|
||
|
%sub232 = sub nsw i32 %210, %211
|
||
|
store i32 %sub232, i32* %new_w, align 4
|
||
|
%212 = load i32, i32* %n, align 4
|
||
|
%213 = load i32, i32* %penalty, align 4
|
||
|
%sub233 = sub nsw i32 %212, %213
|
||
|
store i32 %sub233, i32* %new_n, align 4
|
||
|
%214 = load i32, i32* %new_nw, align 4
|
||
|
%215 = load i32, i32* %new_w, align 4
|
||
|
%216 = load i32, i32* %new_n, align 4
|
||
|
%call234 = call i32 @_Z7maximumiii(i32 %214, i32 %215, i32 %216)
|
||
|
store i32 %call234, i32* %traceback, align 4
|
||
|
%217 = load i32, i32* %traceback, align 4
|
||
|
%218 = load i32, i32* %new_nw, align 4
|
||
|
%cmp235 = icmp eq i32 %217, %218
|
||
|
br i1 %cmp235, label %if.then236, label %if.end237
|
||
|
|
||
|
if.then236: ; preds = %if.end226
|
||
|
%219 = load i32, i32* %nw, align 4
|
||
|
store i32 %219, i32* %traceback, align 4
|
||
|
br label %if.end237
|
||
|
|
||
|
if.end237: ; preds = %if.then236, %if.end226
|
||
|
%220 = load i32, i32* %traceback, align 4
|
||
|
%221 = load i32, i32* %new_w, align 4
|
||
|
%cmp238 = icmp eq i32 %220, %221
|
||
|
br i1 %cmp238, label %if.then239, label %if.end240
|
||
|
|
||
|
if.then239: ; preds = %if.end237
|
||
|
%222 = load i32, i32* %w, align 4
|
||
|
store i32 %222, i32* %traceback, align 4
|
||
|
br label %if.end240
|
||
|
|
||
|
if.end240: ; preds = %if.then239, %if.end237
|
||
|
%223 = load i32, i32* %traceback, align 4
|
||
|
%224 = load i32, i32* %new_n, align 4
|
||
|
%cmp241 = icmp eq i32 %223, %224
|
||
|
br i1 %cmp241, label %if.then242, label %if.end243
|
||
|
|
||
|
if.then242: ; preds = %if.end240
|
||
|
%225 = load i32, i32* %n, align 4
|
||
|
store i32 %225, i32* %traceback, align 4
|
||
|
br label %if.end243
|
||
|
|
||
|
if.end243: ; preds = %if.then242, %if.end240
|
||
|
%226 = load %struct._IO_FILE*, %struct._IO_FILE** %fpo, align 8
|
||
|
%227 = load i32, i32* %traceback, align 4
|
||
|
%call244 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %226, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.12, i64 0, i64 0), i32 %227)
|
||
|
%228 = load i32, i32* %traceback, align 4
|
||
|
%229 = load i32, i32* %nw, align 4
|
||
|
%cmp245 = icmp eq i32 %228, %229
|
||
|
br i1 %cmp245, label %if.then246, label %if.else249
|
||
|
|
||
|
if.then246: ; preds = %if.end243
|
||
|
%230 = load i32, i32* %i163, align 4
|
||
|
%dec247 = add nsw i32 %230, -1
|
||
|
store i32 %dec247, i32* %i163, align 4
|
||
|
%231 = load i32, i32* %j165, align 4
|
||
|
%dec248 = add nsw i32 %231, -1
|
||
|
store i32 %dec248, i32* %j165, align 4
|
||
|
br label %for.cond167
|
||
|
|
||
|
if.else249: ; preds = %if.end243
|
||
|
%232 = load i32, i32* %traceback, align 4
|
||
|
%233 = load i32, i32* %w, align 4
|
||
|
%cmp250 = icmp eq i32 %232, %233
|
||
|
br i1 %cmp250, label %if.then251, label %if.else253
|
||
|
|
||
|
if.then251: ; preds = %if.else249
|
||
|
%234 = load i32, i32* %j165, align 4
|
||
|
%dec252 = add nsw i32 %234, -1
|
||
|
store i32 %dec252, i32* %j165, align 4
|
||
|
br label %for.cond167
|
||
|
|
||
|
if.else253: ; preds = %if.else249
|
||
|
%235 = load i32, i32* %traceback, align 4
|
||
|
%236 = load i32, i32* %n, align 4
|
||
|
%cmp254 = icmp eq i32 %235, %236
|
||
|
br i1 %cmp254, label %if.then255, label %if.else257
|
||
|
|
||
|
if.then255: ; preds = %if.else253
|
||
|
%237 = load i32, i32* %i163, align 4
|
||
|
%dec256 = add nsw i32 %237, -1
|
||
|
store i32 %dec256, i32* %i163, align 4
|
||
|
br label %for.cond167
|
||
|
|
||
|
if.else257: ; preds = %if.else253
|
||
|
br label %if.end258
|
||
|
|
||
|
if.end258: ; preds = %if.else257
|
||
|
br label %if.end259
|
||
|
|
||
|
if.end259: ; preds = %if.end258
|
||
|
br label %if.end260
|
||
|
|
||
|
if.end260: ; preds = %if.end259
|
||
|
br label %for.cond167
|
||
|
|
||
|
for.end261: ; preds = %if.then185, %for.cond167
|
||
|
%238 = load %struct._IO_FILE*, %struct._IO_FILE** %fpo, align 8
|
||
|
%call262 = call i32 @fclose(%struct._IO_FILE* %238)
|
||
|
%239 = load i32*, i32** %referrence_cuda, align 8
|
||
|
%240 = bitcast i32* %239 to i8*
|
||
|
%call263 = call i32 @cudaFree(i8* %240)
|
||
|
%241 = load i32*, i32** %matrix_cuda, align 8
|
||
|
%242 = bitcast i32* %241 to i8*
|
||
|
%call264 = call i32 @cudaFree(i8* %242)
|
||
|
%243 = load i32*, i32** %referrence, align 8
|
||
|
%244 = bitcast i32* %243 to i8*
|
||
|
call void @free(i8* %244) #8
|
||
|
%245 = load i32*, i32** %input_itemsets, align 8
|
||
|
%246 = bitcast i32* %245 to i8*
|
||
|
call void @free(i8* %246) #8
|
||
|
%247 = load i32*, i32** %output_itemsets, align 8
|
||
|
%248 = bitcast i32* %247 to i8*
|
||
|
call void @free(i8* %248) #8
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z5usageiPPc(i32 %argc, i8** %argv) #0 {
|
||
|
entry:
|
||
|
%argc.addr = alloca i32, align 4
|
||
|
%argv.addr = alloca i8**, align 8
|
||
|
store i32 %argc, i32* %argc.addr, align 4
|
||
|
store i8** %argv, i8*** %argv.addr, align 8
|
||
|
%0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%1 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx = getelementptr inbounds i8*, i8** %1, i64 0
|
||
|
%2 = load i8*, i8** %arrayidx, align 8
|
||
|
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.1, i64 0, i64 0), i8* %2)
|
||
|
%3 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.2, i64 0, i64 0))
|
||
|
%4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([40 x i8], [40 x i8]* @.str.3, i64 0, i64 0))
|
||
|
call void @exit(i32 1) #10
|
||
|
unreachable
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #5
|
||
|
|
||
|
; Function Attrs: noreturn nounwind
|
||
|
declare dso_local void @exit(i32) #6
|
||
|
|
||
|
; Function Attrs: nounwind readonly
|
||
|
declare dso_local i32 @atoi(i8*) #7
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local noalias i8* @malloc(i64) #3
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local void @srand(i32) #3
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local i32 @rand() #3
|
||
|
|
||
|
declare dso_local i32 @cudaMalloc(i8**, i64) #5
|
||
|
|
||
|
declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #5
|
||
|
|
||
|
declare dso_local i32 @cudaDeviceSynchronize() #5
|
||
|
|
||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||
|
define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #2 comdat align 2 {
|
||
|
entry:
|
||
|
%this.addr = alloca %struct.dim3*, align 8
|
||
|
%vx.addr = alloca i32, align 4
|
||
|
%vy.addr = alloca i32, align 4
|
||
|
%vz.addr = alloca i32, align 4
|
||
|
store %struct.dim3* %this, %struct.dim3** %this.addr, align 8
|
||
|
store i32 %vx, i32* %vx.addr, align 4
|
||
|
store i32 %vy, i32* %vy.addr, align 4
|
||
|
store i32 %vz, i32* %vz.addr, align 4
|
||
|
%this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8
|
||
|
%x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0
|
||
|
%0 = load i32, i32* %vx.addr, align 4
|
||
|
store i32 %0, i32* %x, align 4
|
||
|
%y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1
|
||
|
%1 = load i32, i32* %vy.addr, align 4
|
||
|
store i32 %1, i32* %y, align 4
|
||
|
%z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2
|
||
|
%2 = load i32, i32* %vz.addr, align 4
|
||
|
store i32 %2, i32* %z, align 4
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #5
|
||
|
|
||
|
declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #5
|
||
|
|
||
|
declare dso_local i32 @fclose(%struct._IO_FILE*) #5
|
||
|
|
||
|
declare dso_local i32 @cudaFree(i8*) #5
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local void @free(i8*) #3
|
||
|
|
||
|
define internal void @__cuda_register_globals(i8** %0) {
|
||
|
entry:
|
||
|
%1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32*, i32*, i32, i32, i32, i32)* @_Z20needle_cuda_shared_1PiS_iiii to i8*), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null)
|
||
|
%2 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32*, i32*, i32, i32, i32, i32)* @_Z20needle_cuda_shared_2PiS_iiii to i8*), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @1, i64 0, i64 0), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @1, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*)
|
||
|
|
||
|
declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32)
|
||
|
|
||
|
declare dso_local i8** @__cudaRegisterFatBinary(i8*)
|
||
|
|
||
|
define internal void @__cuda_module_ctor(i8* %0) {
|
||
|
entry:
|
||
|
%1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*))
|
||
|
store i8** %1, i8*** @__cuda_gpubin_handle, align 8
|
||
|
call void @__cuda_register_globals(i8** %1)
|
||
|
call void @__cudaRegisterFatBinaryEnd(i8** %1)
|
||
|
%2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local void @__cudaRegisterFatBinaryEnd(i8**)
|
||
|
|
||
|
declare dso_local void @__cudaUnregisterFatBinary(i8**)
|
||
|
|
||
|
define internal void @__cuda_module_dtor(i8* %0) {
|
||
|
entry:
|
||
|
%1 = load i8**, i8*** @__cuda_gpubin_handle, align 8
|
||
|
call void @__cudaUnregisterFatBinary(i8** %1)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @atexit(void (i8*)*)
|
||
|
|
||
|
attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #1 = { argmemonly nounwind willreturn }
|
||
|
attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #4 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #5 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #6 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #7 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #8 = { nounwind }
|
||
|
attributes #9 = { nounwind readonly }
|
||
|
attributes #10 = { noreturn nounwind }
|
||
|
|
||
|
!llvm.module.flags = !{!0, !1}
|
||
|
!llvm.ident = !{!2}
|
||
|
|
||
|
!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]}
|
||
|
!1 = !{i32 1, !"wchar_size", i32 4}
|
||
|
!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}
|