746 lines
88 KiB
LLVM
746 lines
88 KiB
LLVM
|
; ModuleID = 'pathfinder-host-x86_64-unknown-linux-gnu.bc'
|
||
|
source_filename = "pathfinder.cu"
|
||
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||
|
target triple = "x86_64-unknown-linux-gnu"
|
||
|
|
||
|
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
|
||
|
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
|
||
|
%struct.dim3 = type { i32, i32, i32 }
|
||
|
%struct.CUstream_st = type opaque
|
||
|
|
||
|
$_ZN4dim3C2Ejjj = comdat any
|
||
|
|
||
|
@rows = dso_local global i32 0, align 4
|
||
|
@cols = dso_local global i32 0, align 4
|
||
|
@data = dso_local global i32* null, align 8
|
||
|
@wall = dso_local global i32** null, align 8
|
||
|
@result = dso_local global i32* null, align 8
|
||
|
@pyramid_height = dso_local global i32 0, align 4
|
||
|
@.str = private unnamed_addr constant [47 x i8] c"Usage: dynproc row_len col_len pyramid_height\0A\00", align 1
|
||
|
@stderr = external dso_local global %struct._IO_FILE*, align 8
|
||
|
@.str.1 = private unnamed_addr constant [11 x i8] c"error: %s\0A\00", align 1
|
||
|
@.str.2 = private unnamed_addr constant [92 x i8] c"pyramidHeight: %d\0AgridSize: [%d]\0Aborder:[%d]\0AblockSize: %d\0AblockGrid:[%d]\0AtargetBlock:[%d]\0A\00", align 1
|
||
|
@.str.3 = private unnamed_addr constant [4 x i8] c"%d \00", align 1
|
||
|
@.str.4 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
|
||
|
@0 = private unnamed_addr constant [30 x i8] c"_Z14dynproc_kerneliPiS_S_iiii\00", align 1
|
||
|
@1 = private constant [20737 x i8] c"P\EDU\BA\01\00\10\00\F0P\00\00\00\00\00\00\02\00\01\01@\00\00\00\E8B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00@B\00\00\00\00\00\00\C0?\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0A\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z14dynproc_kerneliPiS_S_iiii\00.nv.info._Z14dynproc_kerneliPiS_S_iiii\00.nv.shared._Z14dynproc_kerneliPiS_S_iiii\00.nv.global\00.nv.constant0._Z14dynproc_kerneliPiS_S_iiii\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z14dynproc_kerneliPiS_S_iiii\00.text._Z14dynproc_kerneliPiS_S_iiii\00.nv.info._Z14dynproc_kerneliPiS_S_iiii\00.nv.shared._Z14dynproc_kerneliPiS_S_iiii\00.nv.global\00blockIdx\00threadIdx\00$___ZZ14dynproc_kerneliPiS_S_iiiiE4prev__187\00$___ZZ14dynproc_kerneliPiS_S_iiiiE6result__189\00.nv.constant0._Z14dynproc_kerneliPiS_S_iiii\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00P\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\9B\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\C4\00\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\CF\00\00\00\01\00\09\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\D8\00\00\00\01\00\09\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00>\01\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00\00:\00\00\00\00\00\00\04/\08\00\07\00\00\00\11\00\00\00\04#\08\00\07\00\00\00\00\00\00\00\04\12\08\00\07\00\00\00x\00\00\00\04\11\08\00\07\00\00\00x\00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\010\00\03\190\00\04\17\0C\00\00\00\00\00\07\00,\00\00\F0\11\00\04\17\0C\00\00\00\00\00\06\00(\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00$\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0\11\00\03\1B\FF\00\04\1D\04\00\E8\05\00\00\04\1C\04\00\D89\00\00\04\1E\04\00\80\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03<d\00\01\00\87\00\80\07\98L\01\01\87\F8\FF\FF\0F\1C\00\00w\03\00\00\C8\F0\EF\1F\E0\FD\03\BC\7F\00\07\01\07\00\80\03l[\0F\00\80\00\00\00@\E2\C0\00\10\00\00\00\A0\E3\EF\1F\E0!\03\BC\7F\00\00\01\F7\0F\00\00\10\\\00\0A\07\00\00\00\E0\\\02\00\07\00\80\07\98\\\EF\1F\E0\FD\03\BC\7F\00\03\00\F7\0F\80\07\98\\\00\00'\00\80\07\98\\\04\007\00\80\07\98\\\EF\1F\E0\FD\03\BC\7F\00\03\00\07\00\80\07\98\\\04\00G\00\80\07\98\\\02\00\17\00\80\07\98L\EF\1F\E0\FD\03\BC\7F\00\00\00\17\04\80\07\98L\02\03'\00\00\80\10\\\00\04\07\00\00\08\10\\\EF\1F\E0!\03\BC\7F\00\03\F0\C7\16\00\00\00\01\03\03\07\00\00\00\94\
|
||
|
@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([20737 x i8], [20737 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8
|
||
|
@__cuda_gpubin_handle = internal global i8** null, align 8
|
||
|
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }]
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z4initiPPc(i32 %argc, i8** %argv) #0 {
|
||
|
entry:
|
||
|
%argc.addr = alloca i32, align 4
|
||
|
%argv.addr = alloca i8**, align 8
|
||
|
%n = alloca i32, align 4
|
||
|
%seed = alloca i32, align 4
|
||
|
%i = alloca i32, align 4
|
||
|
%j = alloca i32, align 4
|
||
|
store i32 %argc, i32* %argc.addr, align 4
|
||
|
store i8** %argv, i8*** %argv.addr, align 8
|
||
|
%0 = load i32, i32* %argc.addr, align 4
|
||
|
%cmp = icmp eq i32 %0, 4
|
||
|
br i1 %cmp, label %if.then, label %if.else
|
||
|
|
||
|
if.then: ; preds = %entry
|
||
|
%1 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx = getelementptr inbounds i8*, i8** %1, i64 1
|
||
|
%2 = load i8*, i8** %arrayidx, align 8
|
||
|
%call = call i32 @atoi(i8* %2) #11
|
||
|
store i32 %call, i32* @cols, align 4
|
||
|
%3 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx1 = getelementptr inbounds i8*, i8** %3, i64 2
|
||
|
%4 = load i8*, i8** %arrayidx1, align 8
|
||
|
%call2 = call i32 @atoi(i8* %4) #11
|
||
|
store i32 %call2, i32* @rows, align 4
|
||
|
%5 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx3 = getelementptr inbounds i8*, i8** %5, i64 3
|
||
|
%6 = load i8*, i8** %arrayidx3, align 8
|
||
|
%call4 = call i32 @atoi(i8* %6) #11
|
||
|
store i32 %call4, i32* @pyramid_height, align 4
|
||
|
br label %if.end
|
||
|
|
||
|
if.else: ; preds = %entry
|
||
|
%call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str, i64 0, i64 0))
|
||
|
call void @exit(i32 0) #12
|
||
|
unreachable
|
||
|
|
||
|
if.end: ; preds = %if.then
|
||
|
%7 = load i32, i32* @rows, align 4
|
||
|
%8 = load i32, i32* @cols, align 4
|
||
|
%mul = mul nsw i32 %7, %8
|
||
|
%9 = sext i32 %mul to i64
|
||
|
%10 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %9, i64 4)
|
||
|
%11 = extractvalue { i64, i1 } %10, 1
|
||
|
%12 = extractvalue { i64, i1 } %10, 0
|
||
|
%13 = select i1 %11, i64 -1, i64 %12
|
||
|
%call6 = call i8* @_Znam(i64 %13) #13
|
||
|
%14 = bitcast i8* %call6 to i32*
|
||
|
store i32* %14, i32** @data, align 8
|
||
|
%15 = load i32, i32* @rows, align 4
|
||
|
%16 = sext i32 %15 to i64
|
||
|
%17 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %16, i64 8)
|
||
|
%18 = extractvalue { i64, i1 } %17, 1
|
||
|
%19 = extractvalue { i64, i1 } %17, 0
|
||
|
%20 = select i1 %18, i64 -1, i64 %19
|
||
|
%call7 = call i8* @_Znam(i64 %20) #13
|
||
|
%21 = bitcast i8* %call7 to i32**
|
||
|
store i32** %21, i32*** @wall, align 8
|
||
|
store i32 0, i32* %n, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.cond: ; preds = %for.inc, %if.end
|
||
|
%22 = load i32, i32* %n, align 4
|
||
|
%23 = load i32, i32* @rows, align 4
|
||
|
%cmp8 = icmp slt i32 %22, %23
|
||
|
br i1 %cmp8, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %for.cond
|
||
|
%24 = load i32*, i32** @data, align 8
|
||
|
%25 = load i32, i32* @cols, align 4
|
||
|
%26 = load i32, i32* %n, align 4
|
||
|
%mul9 = mul nsw i32 %25, %26
|
||
|
%idx.ext = sext i32 %mul9 to i64
|
||
|
%add.ptr = getelementptr inbounds i32, i32* %24, i64 %idx.ext
|
||
|
%27 = load i32**, i32*** @wall, align 8
|
||
|
%28 = load i32, i32* %n, align 4
|
||
|
%idxprom = sext i32 %28 to i64
|
||
|
%arrayidx10 = getelementptr inbounds i32*, i32** %27, i64 %idxprom
|
||
|
store i32* %add.ptr, i32** %arrayidx10, align 8
|
||
|
br label %for.inc
|
||
|
|
||
|
for.inc: ; preds = %for.body
|
||
|
%29 = load i32, i32* %n, align 4
|
||
|
%inc = add nsw i32 %29, 1
|
||
|
store i32 %inc, i32* %n, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.end: ; preds = %for.cond
|
||
|
%30 = load i32, i32* @cols, align 4
|
||
|
%31 = sext i32 %30 to i64
|
||
|
%32 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %31, i64 4)
|
||
|
%33 = extractvalue { i64, i1 } %32, 1
|
||
|
%34 = extractvalue { i64, i1 } %32, 0
|
||
|
%35 = select i1 %33, i64 -1, i64 %34
|
||
|
%call11 = call i8* @_Znam(i64 %35) #13
|
||
|
%36 = bitcast i8* %call11 to i32*
|
||
|
store i32* %36, i32** @result, align 8
|
||
|
store i32 9, i32* %seed, align 4
|
||
|
%37 = load i32, i32* %seed, align 4
|
||
|
call void @srand(i32 %37) #14
|
||
|
store i32 0, i32* %i, align 4
|
||
|
br label %for.cond12
|
||
|
|
||
|
for.cond12: ; preds = %for.inc26, %for.end
|
||
|
%38 = load i32, i32* %i, align 4
|
||
|
%39 = load i32, i32* @rows, align 4
|
||
|
%cmp13 = icmp slt i32 %38, %39
|
||
|
br i1 %cmp13, label %for.body14, label %for.end28
|
||
|
|
||
|
for.body14: ; preds = %for.cond12
|
||
|
store i32 0, i32* %j, align 4
|
||
|
br label %for.cond15
|
||
|
|
||
|
for.cond15: ; preds = %for.inc23, %for.body14
|
||
|
%40 = load i32, i32* %j, align 4
|
||
|
%41 = load i32, i32* @cols, align 4
|
||
|
%cmp16 = icmp slt i32 %40, %41
|
||
|
br i1 %cmp16, label %for.body17, label %for.end25
|
||
|
|
||
|
for.body17: ; preds = %for.cond15
|
||
|
%call18 = call i32 @rand() #14
|
||
|
%rem = srem i32 %call18, 10
|
||
|
%42 = load i32**, i32*** @wall, align 8
|
||
|
%43 = load i32, i32* %i, align 4
|
||
|
%idxprom19 = sext i32 %43 to i64
|
||
|
%arrayidx20 = getelementptr inbounds i32*, i32** %42, i64 %idxprom19
|
||
|
%44 = load i32*, i32** %arrayidx20, align 8
|
||
|
%45 = load i32, i32* %j, align 4
|
||
|
%idxprom21 = sext i32 %45 to i64
|
||
|
%arrayidx22 = getelementptr inbounds i32, i32* %44, i64 %idxprom21
|
||
|
store i32 %rem, i32* %arrayidx22, align 4
|
||
|
br label %for.inc23
|
||
|
|
||
|
for.inc23: ; preds = %for.body17
|
||
|
%46 = load i32, i32* %j, align 4
|
||
|
%inc24 = add nsw i32 %46, 1
|
||
|
store i32 %inc24, i32* %j, align 4
|
||
|
br label %for.cond15
|
||
|
|
||
|
for.end25: ; preds = %for.cond15
|
||
|
br label %for.inc26
|
||
|
|
||
|
for.inc26: ; preds = %for.end25
|
||
|
%47 = load i32, i32* %i, align 4
|
||
|
%inc27 = add nsw i32 %47, 1
|
||
|
store i32 %inc27, i32* %i, align 4
|
||
|
br label %for.cond12
|
||
|
|
||
|
for.end28: ; preds = %for.cond12
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Function Attrs: nounwind readonly
|
||
|
declare dso_local i32 @atoi(i8*) #1
|
||
|
|
||
|
declare dso_local i32 @printf(i8*, ...) #2
|
||
|
|
||
|
; Function Attrs: noreturn nounwind
|
||
|
declare dso_local void @exit(i32) #3
|
||
|
|
||
|
; Function Attrs: nounwind readnone speculatable willreturn
|
||
|
declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #4
|
||
|
|
||
|
; Function Attrs: nobuiltin
|
||
|
declare dso_local noalias i8* @_Znam(i64) #5
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local void @srand(i32) #6
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local i32 @rand() #6
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z5fatalPc(i8* %s) #0 {
|
||
|
entry:
|
||
|
%s.addr = alloca i8*, align 8
|
||
|
store i8* %s, i8** %s.addr, align 8
|
||
|
%0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%1 = load i8*, i8** %s.addr, align 8
|
||
|
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.1, i64 0, i64 0), i8* %1)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #2
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z14dynproc_kerneliPiS_S_iiii(i32 %iteration, i32* %gpuWall, i32* %gpuSrc, i32* %gpuResults, i32 %cols, i32 %rows, i32 %startStep, i32 %border) #0 {
|
||
|
entry:
|
||
|
%iteration.addr = alloca i32, align 4
|
||
|
%gpuWall.addr = alloca i32*, align 8
|
||
|
%gpuSrc.addr = alloca i32*, align 8
|
||
|
%gpuResults.addr = alloca i32*, align 8
|
||
|
%cols.addr = alloca i32, align 4
|
||
|
%rows.addr = alloca i32, align 4
|
||
|
%startStep.addr = alloca i32, align 4
|
||
|
%border.addr = alloca i32, align 4
|
||
|
%grid_dim = alloca %struct.dim3, align 8
|
||
|
%block_dim = alloca %struct.dim3, align 8
|
||
|
%shmem_size = alloca i64, align 8
|
||
|
%stream = alloca i8*, align 8
|
||
|
%grid_dim.coerce = alloca { i64, i32 }, align 8
|
||
|
%block_dim.coerce = alloca { i64, i32 }, align 8
|
||
|
store i32 %iteration, i32* %iteration.addr, align 4
|
||
|
store i32* %gpuWall, i32** %gpuWall.addr, align 8
|
||
|
store i32* %gpuSrc, i32** %gpuSrc.addr, align 8
|
||
|
store i32* %gpuResults, i32** %gpuResults.addr, align 8
|
||
|
store i32 %cols, i32* %cols.addr, align 4
|
||
|
store i32 %rows, i32* %rows.addr, align 4
|
||
|
store i32 %startStep, i32* %startStep.addr, align 4
|
||
|
store i32 %border, i32* %border.addr, align 4
|
||
|
%kernel_args = alloca i8*, i64 8, align 16
|
||
|
%0 = bitcast i32* %iteration.addr to i8*
|
||
|
%1 = getelementptr i8*, i8** %kernel_args, i32 0
|
||
|
store i8* %0, i8** %1
|
||
|
%2 = bitcast i32** %gpuWall.addr to i8*
|
||
|
%3 = getelementptr i8*, i8** %kernel_args, i32 1
|
||
|
store i8* %2, i8** %3
|
||
|
%4 = bitcast i32** %gpuSrc.addr to i8*
|
||
|
%5 = getelementptr i8*, i8** %kernel_args, i32 2
|
||
|
store i8* %4, i8** %5
|
||
|
%6 = bitcast i32** %gpuResults.addr to i8*
|
||
|
%7 = getelementptr i8*, i8** %kernel_args, i32 3
|
||
|
store i8* %6, i8** %7
|
||
|
%8 = bitcast i32* %cols.addr to i8*
|
||
|
%9 = getelementptr i8*, i8** %kernel_args, i32 4
|
||
|
store i8* %8, i8** %9
|
||
|
%10 = bitcast i32* %rows.addr to i8*
|
||
|
%11 = getelementptr i8*, i8** %kernel_args, i32 5
|
||
|
store i8* %10, i8** %11
|
||
|
%12 = bitcast i32* %startStep.addr to i8*
|
||
|
%13 = getelementptr i8*, i8** %kernel_args, i32 6
|
||
|
store i8* %12, i8** %13
|
||
|
%14 = bitcast i32* %border.addr to i8*
|
||
|
%15 = getelementptr i8*, i8** %kernel_args, i32 7
|
||
|
store i8* %14, i8** %15
|
||
|
%16 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream)
|
||
|
%17 = load i64, i64* %shmem_size, align 8
|
||
|
%18 = load i8*, i8** %stream, align 8
|
||
|
%19 = bitcast { i64, i32 }* %grid_dim.coerce to i8*
|
||
|
%20 = bitcast %struct.dim3* %grid_dim to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %19, i8* align 8 %20, i64 12, i1 false)
|
||
|
%21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0
|
||
|
%22 = load i64, i64* %21, align 8
|
||
|
%23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1
|
||
|
%24 = load i32, i32* %23, align 8
|
||
|
%25 = bitcast { i64, i32 }* %block_dim.coerce to i8*
|
||
|
%26 = bitcast %struct.dim3* %block_dim to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %25, i8* align 8 %26, i64 12, i1 false)
|
||
|
%27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0
|
||
|
%28 = load i64, i64* %27, align 8
|
||
|
%29 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1
|
||
|
%30 = load i32, i32* %29, align 8
|
||
|
%31 = bitcast i8* %18 to %struct.CUstream_st*
|
||
|
%call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32, i32*, i32*, i32*, i32, i32, i32, i32)* @_Z14dynproc_kerneliPiS_S_iiii to i8*), i64 %22, i32 %24, i64 %28, i32 %30, i8** %kernel_args, i64 %17, %struct.CUstream_st* %31)
|
||
|
br label %setup.end
|
||
|
|
||
|
setup.end: ; preds = %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**)
|
||
|
|
||
|
declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*)
|
||
|
|
||
|
; Function Attrs: argmemonly nounwind willreturn
|
||
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local i32 @_Z9calc_pathPiPS_iiiii(i32* %gpuWall, i32** %gpuResult, i32 %rows, i32 %cols, i32 %pyramid_height, i32 %blockCols, i32 %borderCols) #0 {
|
||
|
entry:
|
||
|
%gpuWall.addr = alloca i32*, align 8
|
||
|
%gpuResult.addr = alloca i32**, align 8
|
||
|
%rows.addr = alloca i32, align 4
|
||
|
%cols.addr = alloca i32, align 4
|
||
|
%pyramid_height.addr = alloca i32, align 4
|
||
|
%blockCols.addr = alloca i32, align 4
|
||
|
%borderCols.addr = alloca i32, align 4
|
||
|
%dimBlock = alloca %struct.dim3, align 4
|
||
|
%dimGrid = alloca %struct.dim3, align 4
|
||
|
%src = alloca i32, align 4
|
||
|
%dst = alloca i32, align 4
|
||
|
%t = alloca i32, align 4
|
||
|
%temp = alloca i32, align 4
|
||
|
%agg.tmp = alloca %struct.dim3, align 4
|
||
|
%agg.tmp1 = alloca %struct.dim3, align 4
|
||
|
%agg.tmp.coerce = alloca { i64, i32 }, align 4
|
||
|
%agg.tmp1.coerce = alloca { i64, i32 }, align 4
|
||
|
store i32* %gpuWall, i32** %gpuWall.addr, align 8
|
||
|
store i32** %gpuResult, i32*** %gpuResult.addr, align 8
|
||
|
store i32 %rows, i32* %rows.addr, align 4
|
||
|
store i32 %cols, i32* %cols.addr, align 4
|
||
|
store i32 %pyramid_height, i32* %pyramid_height.addr, align 4
|
||
|
store i32 %blockCols, i32* %blockCols.addr, align 4
|
||
|
store i32 %borderCols, i32* %borderCols.addr, align 4
|
||
|
call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimBlock, i32 256, i32 1, i32 1)
|
||
|
%0 = load i32, i32* %blockCols.addr, align 4
|
||
|
call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimGrid, i32 %0, i32 1, i32 1)
|
||
|
store i32 1, i32* %src, align 4
|
||
|
store i32 0, i32* %dst, align 4
|
||
|
store i32 0, i32* %t, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.cond: ; preds = %for.inc, %entry
|
||
|
%1 = load i32, i32* %t, align 4
|
||
|
%2 = load i32, i32* %rows.addr, align 4
|
||
|
%sub = sub nsw i32 %2, 1
|
||
|
%cmp = icmp slt i32 %1, %sub
|
||
|
br i1 %cmp, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %for.cond
|
||
|
%3 = load i32, i32* %src, align 4
|
||
|
store i32 %3, i32* %temp, align 4
|
||
|
%4 = load i32, i32* %dst, align 4
|
||
|
store i32 %4, i32* %src, align 4
|
||
|
%5 = load i32, i32* %temp, align 4
|
||
|
store i32 %5, i32* %dst, align 4
|
||
|
%6 = bitcast %struct.dim3* %agg.tmp to i8*
|
||
|
%7 = bitcast %struct.dim3* %dimGrid to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %6, i8* align 4 %7, i64 12, i1 false)
|
||
|
%8 = bitcast %struct.dim3* %agg.tmp1 to i8*
|
||
|
%9 = bitcast %struct.dim3* %dimBlock to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %8, i8* align 4 %9, i64 12, i1 false)
|
||
|
%10 = bitcast { i64, i32 }* %agg.tmp.coerce to i8*
|
||
|
%11 = bitcast %struct.dim3* %agg.tmp to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %10, i8* align 4 %11, i64 12, i1 false)
|
||
|
%12 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0
|
||
|
%13 = load i64, i64* %12, align 4
|
||
|
%14 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1
|
||
|
%15 = load i32, i32* %14, align 4
|
||
|
%16 = bitcast { i64, i32 }* %agg.tmp1.coerce to i8*
|
||
|
%17 = bitcast %struct.dim3* %agg.tmp1 to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %16, i8* align 4 %17, i64 12, i1 false)
|
||
|
%18 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp1.coerce, i32 0, i32 0
|
||
|
%19 = load i64, i64* %18, align 4
|
||
|
%20 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp1.coerce, i32 0, i32 1
|
||
|
%21 = load i32, i32* %20, align 4
|
||
|
%call = call i32 @__cudaPushCallConfiguration(i64 %13, i32 %15, i64 %19, i32 %21, i64 0, i8* null)
|
||
|
%tobool = icmp ne i32 %call, 0
|
||
|
br i1 %tobool, label %kcall.end, label %kcall.configok
|
||
|
|
||
|
kcall.configok: ; preds = %for.body
|
||
|
%22 = load i32, i32* %pyramid_height.addr, align 4
|
||
|
%23 = load i32, i32* %rows.addr, align 4
|
||
|
%24 = load i32, i32* %t, align 4
|
||
|
%sub2 = sub nsw i32 %23, %24
|
||
|
%sub3 = sub nsw i32 %sub2, 1
|
||
|
%cmp4 = icmp sle i32 %22, %sub3
|
||
|
br i1 %cmp4, label %cond.true, label %cond.false
|
||
|
|
||
|
cond.true: ; preds = %kcall.configok
|
||
|
%25 = load i32, i32* %pyramid_height.addr, align 4
|
||
|
br label %cond.end
|
||
|
|
||
|
cond.false: ; preds = %kcall.configok
|
||
|
%26 = load i32, i32* %rows.addr, align 4
|
||
|
%27 = load i32, i32* %t, align 4
|
||
|
%sub5 = sub nsw i32 %26, %27
|
||
|
%sub6 = sub nsw i32 %sub5, 1
|
||
|
br label %cond.end
|
||
|
|
||
|
cond.end: ; preds = %cond.false, %cond.true
|
||
|
%cond = phi i32 [ %25, %cond.true ], [ %sub6, %cond.false ]
|
||
|
%28 = load i32*, i32** %gpuWall.addr, align 8
|
||
|
%29 = load i32**, i32*** %gpuResult.addr, align 8
|
||
|
%30 = load i32, i32* %src, align 4
|
||
|
%idxprom = sext i32 %30 to i64
|
||
|
%arrayidx = getelementptr inbounds i32*, i32** %29, i64 %idxprom
|
||
|
%31 = load i32*, i32** %arrayidx, align 8
|
||
|
%32 = load i32**, i32*** %gpuResult.addr, align 8
|
||
|
%33 = load i32, i32* %dst, align 4
|
||
|
%idxprom7 = sext i32 %33 to i64
|
||
|
%arrayidx8 = getelementptr inbounds i32*, i32** %32, i64 %idxprom7
|
||
|
%34 = load i32*, i32** %arrayidx8, align 8
|
||
|
%35 = load i32, i32* %cols.addr, align 4
|
||
|
%36 = load i32, i32* %rows.addr, align 4
|
||
|
%37 = load i32, i32* %t, align 4
|
||
|
%38 = load i32, i32* %borderCols.addr, align 4
|
||
|
call void @_Z14dynproc_kerneliPiS_S_iiii(i32 %cond, i32* %28, i32* %31, i32* %34, i32 %35, i32 %36, i32 %37, i32 %38)
|
||
|
br label %kcall.end
|
||
|
|
||
|
kcall.end: ; preds = %cond.end, %for.body
|
||
|
%call9 = call i32 @cudaDeviceSynchronize()
|
||
|
br label %for.inc
|
||
|
|
||
|
for.inc: ; preds = %kcall.end
|
||
|
%39 = load i32, i32* %pyramid_height.addr, align 4
|
||
|
%40 = load i32, i32* %t, align 4
|
||
|
%add = add nsw i32 %40, %39
|
||
|
store i32 %add, i32* %t, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.end: ; preds = %for.cond
|
||
|
%41 = load i32, i32* %dst, align 4
|
||
|
ret i32 %41
|
||
|
}
|
||
|
|
||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||
|
define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #8 comdat align 2 {
|
||
|
entry:
|
||
|
%this.addr = alloca %struct.dim3*, align 8
|
||
|
%vx.addr = alloca i32, align 4
|
||
|
%vy.addr = alloca i32, align 4
|
||
|
%vz.addr = alloca i32, align 4
|
||
|
store %struct.dim3* %this, %struct.dim3** %this.addr, align 8
|
||
|
store i32 %vx, i32* %vx.addr, align 4
|
||
|
store i32 %vy, i32* %vy.addr, align 4
|
||
|
store i32 %vz, i32* %vz.addr, align 4
|
||
|
%this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8
|
||
|
%x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0
|
||
|
%0 = load i32, i32* %vx.addr, align 4
|
||
|
store i32 %0, i32* %x, align 4
|
||
|
%y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1
|
||
|
%1 = load i32, i32* %vy.addr, align 4
|
||
|
store i32 %1, i32* %y, align 4
|
||
|
%z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2
|
||
|
%2 = load i32, i32* %vz.addr, align 4
|
||
|
store i32 %2, i32* %z, align 4
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #2
|
||
|
|
||
|
declare dso_local i32 @cudaDeviceSynchronize() #2
|
||
|
|
||
|
; Function Attrs: noinline norecurse optnone uwtable
|
||
|
define dso_local i32 @main(i32 %argc, i8** %argv) #9 {
|
||
|
entry:
|
||
|
%retval = alloca i32, align 4
|
||
|
%argc.addr = alloca i32, align 4
|
||
|
%argv.addr = alloca i8**, align 8
|
||
|
store i32 0, i32* %retval, align 4
|
||
|
store i32 %argc, i32* %argc.addr, align 4
|
||
|
store i8** %argv, i8*** %argv.addr, align 8
|
||
|
%call = call i32 @cudaSetDevice(i32 0)
|
||
|
%0 = load i32, i32* %argc.addr, align 4
|
||
|
%1 = load i8**, i8*** %argv.addr, align 8
|
||
|
call void @_Z3runiPPc(i32 %0, i8** %1)
|
||
|
ret i32 0
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @cudaSetDevice(i32) #2
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z3runiPPc(i32 %argc, i8** %argv) #0 {
|
||
|
entry:
|
||
|
%argc.addr = alloca i32, align 4
|
||
|
%argv.addr = alloca i8**, align 8
|
||
|
%borderCols = alloca i32, align 4
|
||
|
%smallBlockCol = alloca i32, align 4
|
||
|
%blockCols = alloca i32, align 4
|
||
|
%gpuWall = alloca i32*, align 8
|
||
|
%gpuResult = alloca [2 x i32*], align 16
|
||
|
%size = alloca i32, align 4
|
||
|
%final_ret = alloca i32, align 4
|
||
|
%i = alloca i32, align 4
|
||
|
%i32 = alloca i32, align 4
|
||
|
store i32 %argc, i32* %argc.addr, align 4
|
||
|
store i8** %argv, i8*** %argv.addr, align 8
|
||
|
%0 = load i32, i32* %argc.addr, align 4
|
||
|
%1 = load i8**, i8*** %argv.addr, align 8
|
||
|
call void @_Z4initiPPc(i32 %0, i8** %1)
|
||
|
%2 = load i32, i32* @pyramid_height, align 4
|
||
|
%mul = mul nsw i32 %2, 1
|
||
|
store i32 %mul, i32* %borderCols, align 4
|
||
|
%3 = load i32, i32* @pyramid_height, align 4
|
||
|
%mul1 = mul nsw i32 %3, 1
|
||
|
%mul2 = mul nsw i32 %mul1, 2
|
||
|
%sub = sub nsw i32 256, %mul2
|
||
|
store i32 %sub, i32* %smallBlockCol, align 4
|
||
|
%4 = load i32, i32* @cols, align 4
|
||
|
%5 = load i32, i32* %smallBlockCol, align 4
|
||
|
%div = sdiv i32 %4, %5
|
||
|
%6 = load i32, i32* @cols, align 4
|
||
|
%7 = load i32, i32* %smallBlockCol, align 4
|
||
|
%rem = srem i32 %6, %7
|
||
|
%cmp = icmp eq i32 %rem, 0
|
||
|
%8 = zext i1 %cmp to i64
|
||
|
%cond = select i1 %cmp, i32 0, i32 1
|
||
|
%add = add nsw i32 %div, %cond
|
||
|
store i32 %add, i32* %blockCols, align 4
|
||
|
%9 = load i32, i32* @pyramid_height, align 4
|
||
|
%10 = load i32, i32* @cols, align 4
|
||
|
%11 = load i32, i32* %borderCols, align 4
|
||
|
%12 = load i32, i32* %blockCols, align 4
|
||
|
%13 = load i32, i32* %smallBlockCol, align 4
|
||
|
%call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([92 x i8], [92 x i8]* @.str.2, i64 0, i64 0), i32 %9, i32 %10, i32 %11, i32 256, i32 %12, i32 %13)
|
||
|
%14 = load i32, i32* @rows, align 4
|
||
|
%15 = load i32, i32* @cols, align 4
|
||
|
%mul3 = mul nsw i32 %14, %15
|
||
|
store i32 %mul3, i32* %size, align 4
|
||
|
%arrayidx = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0
|
||
|
%16 = bitcast i32** %arrayidx to i8**
|
||
|
%17 = load i32, i32* @cols, align 4
|
||
|
%conv = sext i32 %17 to i64
|
||
|
%mul4 = mul i64 4, %conv
|
||
|
%call5 = call i32 @cudaMalloc(i8** %16, i64 %mul4)
|
||
|
%arrayidx6 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 1
|
||
|
%18 = bitcast i32** %arrayidx6 to i8**
|
||
|
%19 = load i32, i32* @cols, align 4
|
||
|
%conv7 = sext i32 %19 to i64
|
||
|
%mul8 = mul i64 4, %conv7
|
||
|
%call9 = call i32 @cudaMalloc(i8** %18, i64 %mul8)
|
||
|
%arrayidx10 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0
|
||
|
%20 = load i32*, i32** %arrayidx10, align 16
|
||
|
%21 = bitcast i32* %20 to i8*
|
||
|
%22 = load i32*, i32** @data, align 8
|
||
|
%23 = bitcast i32* %22 to i8*
|
||
|
%24 = load i32, i32* @cols, align 4
|
||
|
%conv11 = sext i32 %24 to i64
|
||
|
%mul12 = mul i64 4, %conv11
|
||
|
%call13 = call i32 @cudaMemcpy(i8* %21, i8* %23, i64 %mul12, i32 1)
|
||
|
%25 = bitcast i32** %gpuWall to i8**
|
||
|
%26 = load i32, i32* %size, align 4
|
||
|
%27 = load i32, i32* @cols, align 4
|
||
|
%sub14 = sub nsw i32 %26, %27
|
||
|
%conv15 = sext i32 %sub14 to i64
|
||
|
%mul16 = mul i64 4, %conv15
|
||
|
%call17 = call i32 @cudaMalloc(i8** %25, i64 %mul16)
|
||
|
%28 = load i32*, i32** %gpuWall, align 8
|
||
|
%29 = bitcast i32* %28 to i8*
|
||
|
%30 = load i32*, i32** @data, align 8
|
||
|
%31 = load i32, i32* @cols, align 4
|
||
|
%idx.ext = sext i32 %31 to i64
|
||
|
%add.ptr = getelementptr inbounds i32, i32* %30, i64 %idx.ext
|
||
|
%32 = bitcast i32* %add.ptr to i8*
|
||
|
%33 = load i32, i32* %size, align 4
|
||
|
%34 = load i32, i32* @cols, align 4
|
||
|
%sub18 = sub nsw i32 %33, %34
|
||
|
%conv19 = sext i32 %sub18 to i64
|
||
|
%mul20 = mul i64 4, %conv19
|
||
|
%call21 = call i32 @cudaMemcpy(i8* %29, i8* %32, i64 %mul20, i32 1)
|
||
|
%35 = load i32*, i32** %gpuWall, align 8
|
||
|
%arraydecay = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0
|
||
|
%36 = load i32, i32* @rows, align 4
|
||
|
%37 = load i32, i32* @cols, align 4
|
||
|
%38 = load i32, i32* @pyramid_height, align 4
|
||
|
%39 = load i32, i32* %blockCols, align 4
|
||
|
%40 = load i32, i32* %borderCols, align 4
|
||
|
%call22 = call i32 @_Z9calc_pathPiPS_iiiii(i32* %35, i32** %arraydecay, i32 %36, i32 %37, i32 %38, i32 %39, i32 %40)
|
||
|
store i32 %call22, i32* %final_ret, align 4
|
||
|
%41 = load i32*, i32** @result, align 8
|
||
|
%42 = bitcast i32* %41 to i8*
|
||
|
%43 = load i32, i32* %final_ret, align 4
|
||
|
%idxprom = sext i32 %43 to i64
|
||
|
%arrayidx23 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 %idxprom
|
||
|
%44 = load i32*, i32** %arrayidx23, align 8
|
||
|
%45 = bitcast i32* %44 to i8*
|
||
|
%46 = load i32, i32* @cols, align 4
|
||
|
%conv24 = sext i32 %46 to i64
|
||
|
%mul25 = mul i64 4, %conv24
|
||
|
%call26 = call i32 @cudaMemcpy(i8* %42, i8* %45, i64 %mul25, i32 2)
|
||
|
store i32 0, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.cond: ; preds = %for.inc, %entry
|
||
|
%47 = load i32, i32* %i, align 4
|
||
|
%48 = load i32, i32* @cols, align 4
|
||
|
%cmp27 = icmp slt i32 %47, %48
|
||
|
br i1 %cmp27, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %for.cond
|
||
|
%49 = load i32*, i32** @data, align 8
|
||
|
%50 = load i32, i32* %i, align 4
|
||
|
%idxprom28 = sext i32 %50 to i64
|
||
|
%arrayidx29 = getelementptr inbounds i32, i32* %49, i64 %idxprom28
|
||
|
%51 = load i32, i32* %arrayidx29, align 4
|
||
|
%call30 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.3, i64 0, i64 0), i32 %51)
|
||
|
br label %for.inc
|
||
|
|
||
|
for.inc: ; preds = %for.body
|
||
|
%52 = load i32, i32* %i, align 4
|
||
|
%inc = add nsw i32 %52, 1
|
||
|
store i32 %inc, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.end: ; preds = %for.cond
|
||
|
%call31 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i64 0, i64 0))
|
||
|
store i32 0, i32* %i32, align 4
|
||
|
br label %for.cond33
|
||
|
|
||
|
for.cond33: ; preds = %for.inc39, %for.end
|
||
|
%53 = load i32, i32* %i32, align 4
|
||
|
%54 = load i32, i32* @cols, align 4
|
||
|
%cmp34 = icmp slt i32 %53, %54
|
||
|
br i1 %cmp34, label %for.body35, label %for.end41
|
||
|
|
||
|
for.body35: ; preds = %for.cond33
|
||
|
%55 = load i32*, i32** @result, align 8
|
||
|
%56 = load i32, i32* %i32, align 4
|
||
|
%idxprom36 = sext i32 %56 to i64
|
||
|
%arrayidx37 = getelementptr inbounds i32, i32* %55, i64 %idxprom36
|
||
|
%57 = load i32, i32* %arrayidx37, align 4
|
||
|
%call38 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.3, i64 0, i64 0), i32 %57)
|
||
|
br label %for.inc39
|
||
|
|
||
|
for.inc39: ; preds = %for.body35
|
||
|
%58 = load i32, i32* %i32, align 4
|
||
|
%inc40 = add nsw i32 %58, 1
|
||
|
store i32 %inc40, i32* %i32, align 4
|
||
|
br label %for.cond33
|
||
|
|
||
|
for.end41: ; preds = %for.cond33
|
||
|
%call42 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i64 0, i64 0))
|
||
|
%59 = load i32*, i32** %gpuWall, align 8
|
||
|
%60 = bitcast i32* %59 to i8*
|
||
|
%call43 = call i32 @cudaFree(i8* %60)
|
||
|
%arrayidx44 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0
|
||
|
%61 = load i32*, i32** %arrayidx44, align 16
|
||
|
%62 = bitcast i32* %61 to i8*
|
||
|
%call45 = call i32 @cudaFree(i8* %62)
|
||
|
%arrayidx46 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 1
|
||
|
%63 = load i32*, i32** %arrayidx46, align 8
|
||
|
%64 = bitcast i32* %63 to i8*
|
||
|
%call47 = call i32 @cudaFree(i8* %64)
|
||
|
%65 = load i32*, i32** @data, align 8
|
||
|
%isnull = icmp eq i32* %65, null
|
||
|
br i1 %isnull, label %delete.end, label %delete.notnull
|
||
|
|
||
|
delete.notnull: ; preds = %for.end41
|
||
|
%66 = bitcast i32* %65 to i8*
|
||
|
call void @_ZdaPv(i8* %66) #15
|
||
|
br label %delete.end
|
||
|
|
||
|
delete.end: ; preds = %delete.notnull, %for.end41
|
||
|
%67 = load i32**, i32*** @wall, align 8
|
||
|
%isnull48 = icmp eq i32** %67, null
|
||
|
br i1 %isnull48, label %delete.end50, label %delete.notnull49
|
||
|
|
||
|
delete.notnull49: ; preds = %delete.end
|
||
|
%68 = bitcast i32** %67 to i8*
|
||
|
call void @_ZdaPv(i8* %68) #15
|
||
|
br label %delete.end50
|
||
|
|
||
|
delete.end50: ; preds = %delete.notnull49, %delete.end
|
||
|
%69 = load i32*, i32** @result, align 8
|
||
|
%isnull51 = icmp eq i32* %69, null
|
||
|
br i1 %isnull51, label %delete.end53, label %delete.notnull52
|
||
|
|
||
|
delete.notnull52: ; preds = %delete.end50
|
||
|
%70 = bitcast i32* %69 to i8*
|
||
|
call void @_ZdaPv(i8* %70) #15
|
||
|
br label %delete.end53
|
||
|
|
||
|
delete.end53: ; preds = %delete.notnull52, %delete.end50
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @cudaMalloc(i8**, i64) #2
|
||
|
|
||
|
declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #2
|
||
|
|
||
|
declare dso_local i32 @cudaFree(i8*) #2
|
||
|
|
||
|
; Function Attrs: nobuiltin nounwind
|
||
|
declare dso_local void @_ZdaPv(i8*) #10
|
||
|
|
||
|
define internal void @__cuda_register_globals(i8** %0) {
|
||
|
entry:
|
||
|
%1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32, i32*, i32*, i32*, i32, i32, i32, i32)* @_Z14dynproc_kerneliPiS_S_iiii to i8*), i8* getelementptr inbounds ([30 x i8], [30 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([30 x i8], [30 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*)
|
||
|
|
||
|
declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32)
|
||
|
|
||
|
declare dso_local i8** @__cudaRegisterFatBinary(i8*)
|
||
|
|
||
|
define internal void @__cuda_module_ctor(i8* %0) {
|
||
|
entry:
|
||
|
%1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*))
|
||
|
store i8** %1, i8*** @__cuda_gpubin_handle, align 8
|
||
|
call void @__cuda_register_globals(i8** %1)
|
||
|
call void @__cudaRegisterFatBinaryEnd(i8** %1)
|
||
|
%2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local void @__cudaRegisterFatBinaryEnd(i8**)
|
||
|
|
||
|
declare dso_local void @__cudaUnregisterFatBinary(i8**)
|
||
|
|
||
|
define internal void @__cuda_module_dtor(i8* %0) {
|
||
|
entry:
|
||
|
%1 = load i8**, i8*** @__cuda_gpubin_handle, align 8
|
||
|
call void @__cudaUnregisterFatBinary(i8** %1)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @atexit(void (i8*)*)
|
||
|
|
||
|
attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #1 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #3 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #4 = { nounwind readnone speculatable willreturn }
|
||
|
attributes #5 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #6 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #7 = { argmemonly nounwind willreturn }
|
||
|
attributes #8 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #9 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #10 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #11 = { nounwind readonly }
|
||
|
attributes #12 = { noreturn nounwind }
|
||
|
attributes #13 = { builtin }
|
||
|
attributes #14 = { nounwind }
|
||
|
attributes #15 = { builtin nounwind }
|
||
|
|
||
|
!llvm.module.flags = !{!0, !1}
|
||
|
!llvm.ident = !{!2}
|
||
|
|
||
|
!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]}
|
||
|
!1 = !{i32 1, !"wchar_size", i32 4}
|
||
|
!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}
|