CuPBoP/examples/hotspot3D/3D-host-x86_64-unknown-linu...

1508 lines
137 KiB
LLVM
Raw Normal View History

2022-05-04 20:59:38 +08:00
; ModuleID = '3D-host-x86_64-unknown-linux-gnu.bc'
source_filename = "3D.cu"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
%struct.timeval = type { i64, i64 }
%struct.timezone = type { i32, i32 }
%struct.dim3 = type { i32, i32, i32 }
%struct.CUstream_st = type opaque
$_ZN4dim3C2Ejjj = comdat any
$_ZSt4sqrtf = comdat any
@.str = private unnamed_addr constant [16 x i8] c"Time: %.3f (s)\0A\00", align 1
@t_chip = dso_local global float 0x3F40624DE0000000, align 4
@chip_height = dso_local global float 0x3F90624DE0000000, align 4
@chip_width = dso_local global float 0x3F90624DE0000000, align 4
@amb_temp = dso_local global float 8.000000e+01, align 4
@stderr = external dso_local global %struct._IO_FILE*, align 8
@.str.1 = private unnamed_addr constant [11 x i8] c"Error: %s\0A\00", align 1
@.str.2 = private unnamed_addr constant [2 x i8] c"r\00", align 1
@.str.3 = private unnamed_addr constant [24 x i8] c"The file was not opened\00", align 1
@.str.4 = private unnamed_addr constant [20 x i8] c"Error reading file\0A\00", align 1
@.str.5 = private unnamed_addr constant [25 x i8] c"not enough lines in file\00", align 1
@.str.6 = private unnamed_addr constant [3 x i8] c"%f\00", align 1
@.str.7 = private unnamed_addr constant [20 x i8] c"invalid file format\00", align 1
@.str.8 = private unnamed_addr constant [2 x i8] c"w\00", align 1
@.str.9 = private unnamed_addr constant [25 x i8] c"The file was not opened\0A\00", align 1
@.str.10 = private unnamed_addr constant [7 x i8] c"%d\09%g\0A\00", align 1
@.str.11 = private unnamed_addr constant [81 x i8] c"Usage: %s <rows/cols> <layers> <iterations> <powerFile> <tempFile> <outputFile>\0A\00", align 1
@.str.12 = private unnamed_addr constant [68 x i8] c"\09<rows/cols> - number of rows/cols in the grid (positive integer)\0A\00", align 1
@.str.13 = private unnamed_addr constant [62 x i8] c"\09<layers> - number of layers in the grid (positive integer)\0A\00", align 1
@.str.14 = private unnamed_addr constant [37 x i8] c"\09<iteration> - number of iterations\0A\00", align 1
@.str.15 = private unnamed_addr constant [83 x i8] c"\09<powerFile> - name of the file containing the initial power values of each cell\0A\00", align 1
@.str.16 = private unnamed_addr constant [88 x i8] c"\09<tempFile> - name of the file containing the initial temperature values of each cell\0A\00", align 1
@.str.17 = private unnamed_addr constant [28 x i8] c"\09<outputFile - output file\0A\00", align 1
@.str.18 = private unnamed_addr constant [14 x i8] c"Accuracy: %e\0A\00", align 1
@0 = private unnamed_addr constant [33 x i8] c"_Z11hotspotOpt1PfS_S_fiiifffffff\00", align 1
@1 = private constant [27433 x i8] c"P\EDU\BA\01\00\10\00\18k\00\00\00\00\00\00\02\00\01\01@\00\00\00([\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\80Z\00\00\00\00\00\00@X\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\09\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.info._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.shared._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.global\00.nv.constant0._Z11hotspotOpt1PfS_S_fiiifffffff\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z11hotspotOpt1PfS_S_fiiifffffff\00.text._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.info._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.shared._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.global\00blockDim\00blockIdx\00threadIdx\00.nv.constant0._Z11hotspotOpt1PfS_S_fiiifffffff\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00S\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\D0\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\DB\00\00\00\01\00\08\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\E4\00\00\00\01\00\08\00\02\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\ED\00\00\00\01\00\08\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\F7\00\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00@R\00\00\00\00\00\00\04/\08\00\07\00\00\00\17\00\00\00\04#\08\00\07\00\00\00\00\00\00\00\04\12\08\00\07\00\00\00x\00\00\00\04\11\08\00\07\00\00\00x\00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\01D\00\03\19D\00\04\17\0C\00\00\00\00\00\0D\00@\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0C\00<\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0B\008\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0A\004\00\00\F0\11\00\04\17\0C\00\00\00\00\00\09\000\00\00\F0\11\00\04\17\0C\00\00\00\00\00\08\00,\00\00\F0\11\00\04\17\0C\00\00\00\00\00\07\00(\00\00\F0\11\00\04\17\0C\00\00\00\00\00\06\00$\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00X\09\00\00\08\0A\00\00\04\1C\04\00\18R\00\00\04\1E\04\00 \00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03<d\00\01\00\87\00\80\07\98L\01\01\87\F8\FF\FF\0F\1C\00\00w\03\00\00\C8\F0\EF\1F\E0\FD\03\BC\7F\00\07\01\07\00\80\03l[\0F\00\80\00\00\00@\E2\C0\00\10\00\00\00\A0\E3\EF\1F\E0!\03\BC\7F\00\00\01\F7\0F\00\00\10\\\00\0A\07\00\00\00\E0\\\02\00\07\00\80\07\98\\\EF\1F\E0\FD\03\BC\7F\00\03\00\F7\0F\80\07\98\\\00\
@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([27433 x i8], [27433 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8
@__cuda_gpubin_handle = internal global i8** null, align 8
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }]
; Function Attrs: noinline nounwind optnone uwtable
define dso_local i64 @_Z8get_timev() #0 {
entry:
%tv = alloca %struct.timeval, align 8
%call = call i32 @gettimeofday(%struct.timeval* %tv, %struct.timezone* null) #8
%tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 0
%0 = load i64, i64* %tv_sec, align 8
%mul = mul nsw i64 %0, 1000000
%tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 1
%1 = load i64, i64* %tv_usec, align 8
%add = add nsw i64 %mul, %1
ret i64 %add
}
; Function Attrs: nounwind
declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #1
; Function Attrs: noinline optnone uwtable
define dso_local void @_Z11hotspotOpt1PfS_S_fiiifffffff(float* %p, float* %tIn, float* %tOut, float %sdc, i32 %nx, i32 %ny, i32 %nz, float %ce, float %cw, float %cn, float %cs, float %ct, float %cb, float %cc) #2 {
entry:
%p.addr = alloca float*, align 8
%tIn.addr = alloca float*, align 8
%tOut.addr = alloca float*, align 8
%sdc.addr = alloca float, align 4
%nx.addr = alloca i32, align 4
%ny.addr = alloca i32, align 4
%nz.addr = alloca i32, align 4
%ce.addr = alloca float, align 4
%cw.addr = alloca float, align 4
%cn.addr = alloca float, align 4
%cs.addr = alloca float, align 4
%ct.addr = alloca float, align 4
%cb.addr = alloca float, align 4
%cc.addr = alloca float, align 4
%grid_dim = alloca %struct.dim3, align 8
%block_dim = alloca %struct.dim3, align 8
%shmem_size = alloca i64, align 8
%stream = alloca i8*, align 8
%grid_dim.coerce = alloca { i64, i32 }, align 8
%block_dim.coerce = alloca { i64, i32 }, align 8
store float* %p, float** %p.addr, align 8
store float* %tIn, float** %tIn.addr, align 8
store float* %tOut, float** %tOut.addr, align 8
store float %sdc, float* %sdc.addr, align 4
store i32 %nx, i32* %nx.addr, align 4
store i32 %ny, i32* %ny.addr, align 4
store i32 %nz, i32* %nz.addr, align 4
store float %ce, float* %ce.addr, align 4
store float %cw, float* %cw.addr, align 4
store float %cn, float* %cn.addr, align 4
store float %cs, float* %cs.addr, align 4
store float %ct, float* %ct.addr, align 4
store float %cb, float* %cb.addr, align 4
store float %cc, float* %cc.addr, align 4
%kernel_args = alloca i8*, i64 14, align 16
%0 = bitcast float** %p.addr to i8*
%1 = getelementptr i8*, i8** %kernel_args, i32 0
store i8* %0, i8** %1
%2 = bitcast float** %tIn.addr to i8*
%3 = getelementptr i8*, i8** %kernel_args, i32 1
store i8* %2, i8** %3
%4 = bitcast float** %tOut.addr to i8*
%5 = getelementptr i8*, i8** %kernel_args, i32 2
store i8* %4, i8** %5
%6 = bitcast float* %sdc.addr to i8*
%7 = getelementptr i8*, i8** %kernel_args, i32 3
store i8* %6, i8** %7
%8 = bitcast i32* %nx.addr to i8*
%9 = getelementptr i8*, i8** %kernel_args, i32 4
store i8* %8, i8** %9
%10 = bitcast i32* %ny.addr to i8*
%11 = getelementptr i8*, i8** %kernel_args, i32 5
store i8* %10, i8** %11
%12 = bitcast i32* %nz.addr to i8*
%13 = getelementptr i8*, i8** %kernel_args, i32 6
store i8* %12, i8** %13
%14 = bitcast float* %ce.addr to i8*
%15 = getelementptr i8*, i8** %kernel_args, i32 7
store i8* %14, i8** %15
%16 = bitcast float* %cw.addr to i8*
%17 = getelementptr i8*, i8** %kernel_args, i32 8
store i8* %16, i8** %17
%18 = bitcast float* %cn.addr to i8*
%19 = getelementptr i8*, i8** %kernel_args, i32 9
store i8* %18, i8** %19
%20 = bitcast float* %cs.addr to i8*
%21 = getelementptr i8*, i8** %kernel_args, i32 10
store i8* %20, i8** %21
%22 = bitcast float* %ct.addr to i8*
%23 = getelementptr i8*, i8** %kernel_args, i32 11
store i8* %22, i8** %23
%24 = bitcast float* %cb.addr to i8*
%25 = getelementptr i8*, i8** %kernel_args, i32 12
store i8* %24, i8** %25
%26 = bitcast float* %cc.addr to i8*
%27 = getelementptr i8*, i8** %kernel_args, i32 13
store i8* %26, i8** %27
%28 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream)
%29 = load i64, i64* %shmem_size, align 8
%30 = load i8*, i8** %stream, align 8
%31 = bitcast { i64, i32 }* %grid_dim.coerce to i8*
%32 = bitcast %struct.dim3* %grid_dim to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %31, i8* align 8 %32, i64 12, i1 false)
%33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0
%34 = load i64, i64* %33, align 8
%35 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1
%36 = load i32, i32* %35, align 8
%37 = bitcast { i64, i32 }* %block_dim.coerce to i8*
%38 = bitcast %struct.dim3* %block_dim to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %37, i8* align 8 %38, i64 12, i1 false)
%39 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0
%40 = load i64, i64* %39, align 8
%41 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1
%42 = load i32, i32* %41, align 8
%43 = bitcast i8* %30 to %struct.CUstream_st*
%call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, float*, float*, float, i32, i32, i32, float, float, float, float, float, float, float)* @_Z11hotspotOpt1PfS_S_fiiifffffff to i8*), i64 %34, i32 %36, i64 %40, i32 %42, i8** %kernel_args, i64 %29, %struct.CUstream_st* %43)
br label %setup.end
setup.end: ; preds = %entry
ret void
}
declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**)
declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*)
; Function Attrs: argmemonly nounwind willreturn
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #3
; Function Attrs: noinline optnone uwtable
define dso_local void @_Z12hotspot_opt1PfS_S_iiifffffi(float* %p, float* %tIn, float* %tOut, i32 %nx, i32 %ny, i32 %nz, float %Cap, float %Rx, float %Ry, float %Rz, float %dt, i32 %numiter) #2 {
entry:
%p.addr = alloca float*, align 8
%tIn.addr = alloca float*, align 8
%tOut.addr = alloca float*, align 8
%nx.addr = alloca i32, align 4
%ny.addr = alloca i32, align 4
%nz.addr = alloca i32, align 4
%Cap.addr = alloca float, align 4
%Rx.addr = alloca float, align 4
%Ry.addr = alloca float, align 4
%Rz.addr = alloca float, align 4
%dt.addr = alloca float, align 4
%numiter.addr = alloca i32, align 4
%ce = alloca float, align 4
%cw = alloca float, align 4
%cn = alloca float, align 4
%cs = alloca float, align 4
%ct = alloca float, align 4
%cb = alloca float, align 4
%cc = alloca float, align 4
%stepDivCap = alloca float, align 4
%s = alloca i64, align 8
%tIn_d = alloca float*, align 8
%tOut_d = alloca float*, align 8
%p_d = alloca float*, align 8
%block_dim = alloca %struct.dim3, align 4
%grid_dim = alloca %struct.dim3, align 4
%start = alloca i64, align 8
%i = alloca i32, align 4
%agg.tmp = alloca %struct.dim3, align 4
%agg.tmp23 = alloca %struct.dim3, align 4
%agg.tmp.coerce = alloca { i64, i32 }, align 4
%agg.tmp23.coerce = alloca { i64, i32 }, align 4
%t = alloca float*, align 8
%stop = alloca i64, align 8
%time = alloca float, align 4
store float* %p, float** %p.addr, align 8
store float* %tIn, float** %tIn.addr, align 8
store float* %tOut, float** %tOut.addr, align 8
store i32 %nx, i32* %nx.addr, align 4
store i32 %ny, i32* %ny.addr, align 4
store i32 %nz, i32* %nz.addr, align 4
store float %Cap, float* %Cap.addr, align 4
store float %Rx, float* %Rx.addr, align 4
store float %Ry, float* %Ry.addr, align 4
store float %Rz, float* %Rz.addr, align 4
store float %dt, float* %dt.addr, align 4
store i32 %numiter, i32* %numiter.addr, align 4
%0 = load float, float* %dt.addr, align 4
%1 = load float, float* %Cap.addr, align 4
%div = fdiv float %0, %1
store float %div, float* %stepDivCap, align 4
%2 = load float, float* %stepDivCap, align 4
%3 = load float, float* %Rx.addr, align 4
%div1 = fdiv float %2, %3
store float %div1, float* %cw, align 4
store float %div1, float* %ce, align 4
%4 = load float, float* %stepDivCap, align 4
%5 = load float, float* %Ry.addr, align 4
%div2 = fdiv float %4, %5
store float %div2, float* %cs, align 4
store float %div2, float* %cn, align 4
%6 = load float, float* %stepDivCap, align 4
%7 = load float, float* %Rz.addr, align 4
%div3 = fdiv float %6, %7
store float %div3, float* %cb, align 4
store float %div3, float* %ct, align 4
%8 = load float, float* %ce, align 4
%conv = fpext float %8 to double
%mul = fmul contract double 2.000000e+00, %conv
%9 = load float, float* %cn, align 4
%conv4 = fpext float %9 to double
%mul5 = fmul contract double 2.000000e+00, %conv4
%add = fadd contract double %mul, %mul5
%10 = load float, float* %ct, align 4
%conv6 = fpext float %10 to double
%mul7 = fmul contract double 3.000000e+00, %conv6
%add8 = fadd contract double %add, %mul7
%sub = fsub contract double 1.000000e+00, %add8
%conv9 = fptrunc double %sub to float
store float %conv9, float* %cc, align 4
%11 = load i32, i32* %nx.addr, align 4
%conv10 = sext i32 %11 to i64
%mul11 = mul i64 4, %conv10
%12 = load i32, i32* %ny.addr, align 4
%conv12 = sext i32 %12 to i64
%mul13 = mul i64 %mul11, %conv12
%13 = load i32, i32* %nz.addr, align 4
%conv14 = sext i32 %13 to i64
%mul15 = mul i64 %mul13, %conv14
store i64 %mul15, i64* %s, align 8
%14 = bitcast float** %p_d to i8**
%15 = load i64, i64* %s, align 8
%call = call i32 @cudaMalloc(i8** %14, i64 %15)
%16 = bitcast float** %tIn_d to i8**
%17 = load i64, i64* %s, align 8
%call16 = call i32 @cudaMalloc(i8** %16, i64 %17)
%18 = bitcast float** %tOut_d to i8**
%19 = load i64, i64* %s, align 8
%call17 = call i32 @cudaMalloc(i8** %18, i64 %19)
%20 = load float*, float** %tIn_d, align 8
%21 = bitcast float* %20 to i8*
%22 = load float*, float** %tIn.addr, align 8
%23 = bitcast float* %22 to i8*
%24 = load i64, i64* %s, align 8
%call18 = call i32 @cudaMemcpy(i8* %21, i8* %23, i64 %24, i32 1)
%25 = load float*, float** %p_d, align 8
%26 = bitcast float* %25 to i8*
%27 = load float*, float** %p.addr, align 8
%28 = bitcast float* %27 to i8*
%29 = load i64, i64* %s, align 8
%call19 = call i32 @cudaMemcpy(i8* %26, i8* %28, i64 %29, i32 1)
call void @_ZN4dim3C2Ejjj(%struct.dim3* %block_dim, i32 64, i32 4, i32 1)
%30 = load i32, i32* %nx.addr, align 4
%div20 = sdiv i32 %30, 64
%31 = load i32, i32* %ny.addr, align 4
%div21 = sdiv i32 %31, 4
call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid_dim, i32 %div20, i32 %div21, i32 1)
%call22 = call i64 @_Z8get_timev()
store i64 %call22, i64* %start, align 8
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%32 = load i32, i32* %i, align 4
%33 = load i32, i32* %numiter.addr, align 4
%cmp = icmp slt i32 %32, %33
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%34 = bitcast %struct.dim3* %agg.tmp to i8*
%35 = bitcast %struct.dim3* %grid_dim to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %34, i8* align 4 %35, i64 12, i1 false)
%36 = bitcast %struct.dim3* %agg.tmp23 to i8*
%37 = bitcast %struct.dim3* %block_dim to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %36, i8* align 4 %37, i64 12, i1 false)
%38 = bitcast { i64, i32 }* %agg.tmp.coerce to i8*
%39 = bitcast %struct.dim3* %agg.tmp to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %38, i8* align 4 %39, i64 12, i1 false)
%40 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0
%41 = load i64, i64* %40, align 4
%42 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1
%43 = load i32, i32* %42, align 4
%44 = bitcast { i64, i32 }* %agg.tmp23.coerce to i8*
%45 = bitcast %struct.dim3* %agg.tmp23 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %44, i8* align 4 %45, i64 12, i1 false)
%46 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp23.coerce, i32 0, i32 0
%47 = load i64, i64* %46, align 4
%48 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp23.coerce, i32 0, i32 1
%49 = load i32, i32* %48, align 4
%call24 = call i32 @__cudaPushCallConfiguration(i64 %41, i32 %43, i64 %47, i32 %49, i64 0, i8* null)
%tobool = icmp ne i32 %call24, 0
br i1 %tobool, label %kcall.end, label %kcall.configok
kcall.configok: ; preds = %for.body
%50 = load float*, float** %p_d, align 8
%51 = load float*, float** %tIn_d, align 8
%52 = load float*, float** %tOut_d, align 8
%53 = load float, float* %stepDivCap, align 4
%54 = load i32, i32* %nx.addr, align 4
%55 = load i32, i32* %ny.addr, align 4
%56 = load i32, i32* %nz.addr, align 4
%57 = load float, float* %ce, align 4
%58 = load float, float* %cw, align 4
%59 = load float, float* %cn, align 4
%60 = load float, float* %cs, align 4
%61 = load float, float* %ct, align 4
%62 = load float, float* %cb, align 4
%63 = load float, float* %cc, align 4
call void @_Z11hotspotOpt1PfS_S_fiiifffffff(float* %50, float* %51, float* %52, float %53, i32 %54, i32 %55, i32 %56, float %57, float %58, float %59, float %60, float %61, float %62, float %63)
br label %kcall.end
kcall.end: ; preds = %kcall.configok, %for.body
%64 = load float*, float** %tIn_d, align 8
store float* %64, float** %t, align 8
%65 = load float*, float** %tOut_d, align 8
store float* %65, float** %tIn_d, align 8
%66 = load float*, float** %t, align 8
store float* %66, float** %tOut_d, align 8
br label %for.inc
for.inc: ; preds = %kcall.end
%67 = load i32, i32* %i, align 4
%inc = add nsw i32 %67, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
%call25 = call i32 @cudaDeviceSynchronize()
%call26 = call i64 @_Z8get_timev()
store i64 %call26, i64* %stop, align 8
%68 = load i64, i64* %stop, align 8
%69 = load i64, i64* %start, align 8
%sub27 = sub nsw i64 %68, %69
%conv28 = sitofp i64 %sub27 to double
%div29 = fdiv double %conv28, 1.000000e+06
%conv30 = fptrunc double %div29 to float
store float %conv30, float* %time, align 4
%70 = load float, float* %time, align 4
%conv31 = fpext float %70 to double
%call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str, i64 0, i64 0), double %conv31)
%71 = load float*, float** %tOut.addr, align 8
%72 = bitcast float* %71 to i8*
%73 = load float*, float** %tOut_d, align 8
%74 = bitcast float* %73 to i8*
%75 = load i64, i64* %s, align 8
%call33 = call i32 @cudaMemcpy(i8* %72, i8* %74, i64 %75, i32 2)
%76 = load float*, float** %p_d, align 8
%77 = bitcast float* %76 to i8*
%call34 = call i32 @cudaFree(i8* %77)
%78 = load float*, float** %tIn_d, align 8
%79 = bitcast float* %78 to i8*
%call35 = call i32 @cudaFree(i8* %79)
%80 = load float*, float** %tOut_d, align 8
%81 = bitcast float* %80 to i8*
%call36 = call i32 @cudaFree(i8* %81)
ret void
}
declare dso_local i32 @cudaMalloc(i8**, i64) #4
declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #4
; Function Attrs: noinline nounwind optnone uwtable
define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #0 comdat align 2 {
entry:
%this.addr = alloca %struct.dim3*, align 8
%vx.addr = alloca i32, align 4
%vy.addr = alloca i32, align 4
%vz.addr = alloca i32, align 4
store %struct.dim3* %this, %struct.dim3** %this.addr, align 8
store i32 %vx, i32* %vx.addr, align 4
store i32 %vy, i32* %vy.addr, align 4
store i32 %vz, i32* %vz.addr, align 4
%this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8
%x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0
%0 = load i32, i32* %vx.addr, align 4
store i32 %0, i32* %x, align 4
%y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1
%1 = load i32, i32* %vy.addr, align 4
store i32 %1, i32* %y, align 4
%z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2
%2 = load i32, i32* %vz.addr, align 4
store i32 %2, i32* %z, align 4
ret void
}
declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #4
declare dso_local i32 @cudaDeviceSynchronize() #4
declare dso_local i32 @printf(i8*, ...) #4
declare dso_local i32 @cudaFree(i8*) #4
; Function Attrs: noinline optnone uwtable
define dso_local void @_Z5fatalPKc(i8* %s) #2 {
entry:
%s.addr = alloca i8*, align 8
store i8* %s, i8** %s.addr, align 8
%0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%1 = load i8*, i8** %s.addr, align 8
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.1, i64 0, i64 0), i8* %1)
ret void
}
declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #4
; Function Attrs: noinline optnone uwtable
define dso_local void @_Z9readinputPfiiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i32 %layers, i8* %file) #2 {
entry:
%vect.addr = alloca float*, align 8
%grid_rows.addr = alloca i32, align 4
%grid_cols.addr = alloca i32, align 4
%layers.addr = alloca i32, align 4
%file.addr = alloca i8*, align 8
%i = alloca i32, align 4
%j = alloca i32, align 4
%k = alloca i32, align 4
%fp = alloca %struct._IO_FILE*, align 8
%str = alloca [256 x i8], align 16
%val = alloca float, align 4
store float* %vect, float** %vect.addr, align 8
store i32 %grid_rows, i32* %grid_rows.addr, align 4
store i32 %grid_cols, i32* %grid_cols.addr, align 4
store i32 %layers, i32* %layers.addr, align 4
store i8* %file, i8** %file.addr, align 8
%0 = load i8*, i8** %file.addr, align 8
%call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i64 0, i64 0))
store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8
%cmp = icmp eq %struct._IO_FILE* %call, null
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
call void @_Z5fatalPKc(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.3, i64 0, i64 0))
br label %if.end
if.end: ; preds = %if.then, %entry
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc28, %if.end
%1 = load i32, i32* %i, align 4
%2 = load i32, i32* %grid_rows.addr, align 4
%sub = sub nsw i32 %2, 1
%cmp1 = icmp sle i32 %1, %sub
br i1 %cmp1, label %for.body, label %for.end30
for.body: ; preds = %for.cond
store i32 0, i32* %j, align 4
br label %for.cond2
for.cond2: ; preds = %for.inc25, %for.body
%3 = load i32, i32* %j, align 4
%4 = load i32, i32* %grid_cols.addr, align 4
%sub3 = sub nsw i32 %4, 1
%cmp4 = icmp sle i32 %3, %sub3
br i1 %cmp4, label %for.body5, label %for.end27
for.body5: ; preds = %for.cond2
store i32 0, i32* %k, align 4
br label %for.cond6
for.cond6: ; preds = %for.inc, %for.body5
%5 = load i32, i32* %k, align 4
%6 = load i32, i32* %layers.addr, align 4
%sub7 = sub nsw i32 %6, 1
%cmp8 = icmp sle i32 %5, %sub7
br i1 %cmp8, label %for.body9, label %for.end
for.body9: ; preds = %for.cond6
%arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0
%7 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8
%call10 = call i8* @fgets(i8* %arraydecay, i32 256, %struct._IO_FILE* %7)
%cmp11 = icmp eq i8* %call10, null
br i1 %cmp11, label %if.then12, label %if.end13
if.then12: ; preds = %for.body9
call void @_Z5fatalPKc(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.4, i64 0, i64 0))
br label %if.end13
if.end13: ; preds = %if.then12, %for.body9
%8 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8
%call14 = call i32 @feof(%struct._IO_FILE* %8) #8
%tobool = icmp ne i32 %call14, 0
br i1 %tobool, label %if.then15, label %if.end16
if.then15: ; preds = %if.end13
call void @_Z5fatalPKc(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.5, i64 0, i64 0))
br label %if.end16
if.end16: ; preds = %if.then15, %if.end13
%arraydecay17 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0
%call18 = call i32 (i8*, i8*, ...) @sscanf(i8* %arraydecay17, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.6, i64 0, i64 0), float* %val) #8
%cmp19 = icmp ne i32 %call18, 1
br i1 %cmp19, label %if.then20, label %if.end21
if.then20: ; preds = %if.end16
call void @_Z5fatalPKc(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.7, i64 0, i64 0))
br label %if.end21
if.end21: ; preds = %if.then20, %if.end16
%9 = load float, float* %val, align 4
%10 = load float*, float** %vect.addr, align 8
%11 = load i32, i32* %i, align 4
%12 = load i32, i32* %grid_cols.addr, align 4
%mul = mul nsw i32 %11, %12
%13 = load i32, i32* %j, align 4
%add = add nsw i32 %mul, %13
%14 = load i32, i32* %k, align 4
%15 = load i32, i32* %grid_rows.addr, align 4
%mul22 = mul nsw i32 %14, %15
%16 = load i32, i32* %grid_cols.addr, align 4
%mul23 = mul nsw i32 %mul22, %16
%add24 = add nsw i32 %add, %mul23
%idxprom = sext i32 %add24 to i64
%arrayidx = getelementptr inbounds float, float* %10, i64 %idxprom
store float %9, float* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %if.end21
%17 = load i32, i32* %k, align 4
%inc = add nsw i32 %17, 1
store i32 %inc, i32* %k, align 4
br label %for.cond6
for.end: ; preds = %for.cond6
br label %for.inc25
for.inc25: ; preds = %for.end
%18 = load i32, i32* %j, align 4
%inc26 = add nsw i32 %18, 1
store i32 %inc26, i32* %j, align 4
br label %for.cond2
for.end27: ; preds = %for.cond2
br label %for.inc28
for.inc28: ; preds = %for.end27
%19 = load i32, i32* %i, align 4
%inc29 = add nsw i32 %19, 1
store i32 %inc29, i32* %i, align 4
br label %for.cond
for.end30: ; preds = %for.cond
%20 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8
%call31 = call i32 @fclose(%struct._IO_FILE* %20)
ret void
}
declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #4
declare dso_local i8* @fgets(i8*, i32, %struct._IO_FILE*) #4
; Function Attrs: nounwind
declare dso_local i32 @feof(%struct._IO_FILE*) #1
; Function Attrs: nounwind
declare dso_local i32 @sscanf(i8*, i8*, ...) #1
declare dso_local i32 @fclose(%struct._IO_FILE*) #4
; Function Attrs: noinline optnone uwtable
define dso_local void @_Z11writeoutputPfiiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i32 %layers, i8* %file) #2 {
entry:
%vect.addr = alloca float*, align 8
%grid_rows.addr = alloca i32, align 4
%grid_cols.addr = alloca i32, align 4
%layers.addr = alloca i32, align 4
%file.addr = alloca i8*, align 8
%i = alloca i32, align 4
%j = alloca i32, align 4
%k = alloca i32, align 4
%index = alloca i32, align 4
%fp = alloca %struct._IO_FILE*, align 8
%str = alloca [256 x i8], align 16
store float* %vect, float** %vect.addr, align 8
store i32 %grid_rows, i32* %grid_rows.addr, align 4
store i32 %grid_cols, i32* %grid_cols.addr, align 4
store i32 %layers, i32* %layers.addr, align 4
store i8* %file, i8** %file.addr, align 8
store i32 0, i32* %index, align 4
%0 = load i8*, i8** %file.addr, align 8
%call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.8, i64 0, i64 0))
store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8
%cmp = icmp eq %struct._IO_FILE* %call, null
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.9, i64 0, i64 0))
br label %if.end
if.end: ; preds = %if.then, %entry
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc19, %if.end
%1 = load i32, i32* %i, align 4
%2 = load i32, i32* %grid_rows.addr, align 4
%cmp2 = icmp slt i32 %1, %2
br i1 %cmp2, label %for.body, label %for.end21
for.body: ; preds = %for.cond
store i32 0, i32* %j, align 4
br label %for.cond3
for.cond3: ; preds = %for.inc16, %for.body
%3 = load i32, i32* %j, align 4
%4 = load i32, i32* %grid_cols.addr, align 4
%cmp4 = icmp slt i32 %3, %4
br i1 %cmp4, label %for.body5, label %for.end18
for.body5: ; preds = %for.cond3
store i32 0, i32* %k, align 4
br label %for.cond6
for.cond6: ; preds = %for.inc, %for.body5
%5 = load i32, i32* %k, align 4
%6 = load i32, i32* %layers.addr, align 4
%cmp7 = icmp slt i32 %5, %6
br i1 %cmp7, label %for.body8, label %for.end
for.body8: ; preds = %for.cond6
%arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0
%7 = load i32, i32* %index, align 4
%8 = load float*, float** %vect.addr, align 8
%9 = load i32, i32* %i, align 4
%10 = load i32, i32* %grid_cols.addr, align 4
%mul = mul nsw i32 %9, %10
%11 = load i32, i32* %j, align 4
%add = add nsw i32 %mul, %11
%12 = load i32, i32* %k, align 4
%13 = load i32, i32* %grid_rows.addr, align 4
%mul9 = mul nsw i32 %12, %13
%14 = load i32, i32* %grid_cols.addr, align 4
%mul10 = mul nsw i32 %mul9, %14
%add11 = add nsw i32 %add, %mul10
%idxprom = sext i32 %add11 to i64
%arrayidx = getelementptr inbounds float, float* %8, i64 %idxprom
%15 = load float, float* %arrayidx, align 4
%conv = fpext float %15 to double
%call12 = call i32 (i8*, i8*, ...) @sprintf(i8* %arraydecay, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.10, i64 0, i64 0), i32 %7, double %conv) #8
%arraydecay13 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0
%16 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8
%call14 = call i32 @fputs(i8* %arraydecay13, %struct._IO_FILE* %16)
%17 = load i32, i32* %index, align 4
%inc = add nsw i32 %17, 1
store i32 %inc, i32* %index, align 4
br label %for.inc
for.inc: ; preds = %for.body8
%18 = load i32, i32* %k, align 4
%inc15 = add nsw i32 %18, 1
store i32 %inc15, i32* %k, align 4
br label %for.cond6
for.end: ; preds = %for.cond6
br label %for.inc16
for.inc16: ; preds = %for.end
%19 = load i32, i32* %j, align 4
%inc17 = add nsw i32 %19, 1
store i32 %inc17, i32* %j, align 4
br label %for.cond3
for.end18: ; preds = %for.cond3
br label %for.inc19
for.inc19: ; preds = %for.end18
%20 = load i32, i32* %i, align 4
%inc20 = add nsw i32 %20, 1
store i32 %inc20, i32* %i, align 4
br label %for.cond
for.end21: ; preds = %for.cond
%21 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8
%call22 = call i32 @fclose(%struct._IO_FILE* %21)
ret void
}
; Function Attrs: nounwind
declare dso_local i32 @sprintf(i8*, i8*, ...) #1
declare dso_local i32 @fputs(i8*, %struct._IO_FILE*) #4
; Function Attrs: noinline nounwind optnone uwtable
define dso_local void @_Z14computeTempCPUPfS_S_iiifffffi(float* %pIn, float* %tIn, float* %tOut, i32 %nx, i32 %ny, i32 %nz, float %Cap, float %Rx, float %Ry, float %Rz, float %dt, i32 %numiter) #0 {
entry:
%pIn.addr = alloca float*, align 8
%tIn.addr = alloca float*, align 8
%tOut.addr = alloca float*, align 8
%nx.addr = alloca i32, align 4
%ny.addr = alloca i32, align 4
%nz.addr = alloca i32, align 4
%Cap.addr = alloca float, align 4
%Rx.addr = alloca float, align 4
%Ry.addr = alloca float, align 4
%Rz.addr = alloca float, align 4
%dt.addr = alloca float, align 4
%numiter.addr = alloca i32, align 4
%ce = alloca float, align 4
%cw = alloca float, align 4
%cn = alloca float, align 4
%cs = alloca float, align 4
%ct = alloca float, align 4
%cb = alloca float, align 4
%cc = alloca float, align 4
%stepDivCap = alloca float, align 4
%c = alloca i32, align 4
%w = alloca i32, align 4
%e = alloca i32, align 4
%n = alloca i32, align 4
%s = alloca i32, align 4
%b = alloca i32, align 4
%t = alloca i32, align 4
%x = alloca i32, align 4
%y = alloca i32, align 4
%z = alloca i32, align 4
%i = alloca i32, align 4
%temp = alloca float*, align 8
store float* %pIn, float** %pIn.addr, align 8
store float* %tIn, float** %tIn.addr, align 8
store float* %tOut, float** %tOut.addr, align 8
store i32 %nx, i32* %nx.addr, align 4
store i32 %ny, i32* %ny.addr, align 4
store i32 %nz, i32* %nz.addr, align 4
store float %Cap, float* %Cap.addr, align 4
store float %Rx, float* %Rx.addr, align 4
store float %Ry, float* %Ry.addr, align 4
store float %Rz, float* %Rz.addr, align 4
store float %dt, float* %dt.addr, align 4
store i32 %numiter, i32* %numiter.addr, align 4
%0 = load float, float* %dt.addr, align 4
%1 = load float, float* %Cap.addr, align 4
%div = fdiv float %0, %1
store float %div, float* %stepDivCap, align 4
%2 = load float, float* %stepDivCap, align 4
%3 = load float, float* %Rx.addr, align 4
%div1 = fdiv float %2, %3
store float %div1, float* %cw, align 4
store float %div1, float* %ce, align 4
%4 = load float, float* %stepDivCap, align 4
%5 = load float, float* %Ry.addr, align 4
%div2 = fdiv float %4, %5
store float %div2, float* %cs, align 4
store float %div2, float* %cn, align 4
%6 = load float, float* %stepDivCap, align 4
%7 = load float, float* %Rz.addr, align 4
%div3 = fdiv float %6, %7
store float %div3, float* %cb, align 4
store float %div3, float* %ct, align 4
%8 = load float, float* %ce, align 4
%conv = fpext float %8 to double
%mul = fmul contract double 2.000000e+00, %conv
%9 = load float, float* %cn, align 4
%conv4 = fpext float %9 to double
%mul5 = fmul contract double 2.000000e+00, %conv4
%add = fadd contract double %mul, %mul5
%10 = load float, float* %ct, align 4
%conv6 = fpext float %10 to double
%mul7 = fmul contract double 3.000000e+00, %conv6
%add8 = fadd contract double %add, %mul7
%sub = fsub contract double 1.000000e+00, %add8
%conv9 = fptrunc double %sub to float
store float %conv9, float* %cc, align 4
store i32 0, i32* %i, align 4
br label %do.body
do.body: ; preds = %do.cond, %entry
store i32 0, i32* %z, align 4
br label %for.cond
for.cond: ; preds = %for.inc95, %do.body
%11 = load i32, i32* %z, align 4
%12 = load i32, i32* %nz.addr, align 4
%cmp = icmp slt i32 %11, %12
br i1 %cmp, label %for.body, label %for.end97
for.body: ; preds = %for.cond
store i32 0, i32* %y, align 4
br label %for.cond10
for.cond10: ; preds = %for.inc92, %for.body
%13 = load i32, i32* %y, align 4
%14 = load i32, i32* %ny.addr, align 4
%cmp11 = icmp slt i32 %13, %14
br i1 %cmp11, label %for.body12, label %for.end94
for.body12: ; preds = %for.cond10
store i32 0, i32* %x, align 4
br label %for.cond13
for.cond13: ; preds = %for.inc, %for.body12
%15 = load i32, i32* %x, align 4
%16 = load i32, i32* %nx.addr, align 4
%cmp14 = icmp slt i32 %15, %16
br i1 %cmp14, label %for.body15, label %for.end
for.body15: ; preds = %for.cond13
%17 = load i32, i32* %x, align 4
%18 = load i32, i32* %y, align 4
%19 = load i32, i32* %nx.addr, align 4
%mul16 = mul nsw i32 %18, %19
%add17 = add nsw i32 %17, %mul16
%20 = load i32, i32* %z, align 4
%21 = load i32, i32* %nx.addr, align 4
%mul18 = mul nsw i32 %20, %21
%22 = load i32, i32* %ny.addr, align 4
%mul19 = mul nsw i32 %mul18, %22
%add20 = add nsw i32 %add17, %mul19
store i32 %add20, i32* %c, align 4
%23 = load i32, i32* %x, align 4
%cmp21 = icmp eq i32 %23, 0
br i1 %cmp21, label %cond.true, label %cond.false
cond.true: ; preds = %for.body15
%24 = load i32, i32* %c, align 4
br label %cond.end
cond.false: ; preds = %for.body15
%25 = load i32, i32* %c, align 4
%sub22 = sub nsw i32 %25, 1
br label %cond.end
cond.end: ; preds = %cond.false, %cond.true
%cond = phi i32 [ %24, %cond.true ], [ %sub22, %cond.false ]
store i32 %cond, i32* %w, align 4
%26 = load i32, i32* %x, align 4
%27 = load i32, i32* %nx.addr, align 4
%sub23 = sub nsw i32 %27, 1
%cmp24 = icmp eq i32 %26, %sub23
br i1 %cmp24, label %cond.true25, label %cond.false26
cond.true25: ; preds = %cond.end
%28 = load i32, i32* %c, align 4
br label %cond.end28
cond.false26: ; preds = %cond.end
%29 = load i32, i32* %c, align 4
%add27 = add nsw i32 %29, 1
br label %cond.end28
cond.end28: ; preds = %cond.false26, %cond.true25
%cond29 = phi i32 [ %28, %cond.true25 ], [ %add27, %cond.false26 ]
store i32 %cond29, i32* %e, align 4
%30 = load i32, i32* %y, align 4
%cmp30 = icmp eq i32 %30, 0
br i1 %cmp30, label %cond.true31, label %cond.false32
cond.true31: ; preds = %cond.end28
%31 = load i32, i32* %c, align 4
br label %cond.end34
cond.false32: ; preds = %cond.end28
%32 = load i32, i32* %c, align 4
%33 = load i32, i32* %nx.addr, align 4
%sub33 = sub nsw i32 %32, %33
br label %cond.end34
cond.end34: ; preds = %cond.false32, %cond.true31
%cond35 = phi i32 [ %31, %cond.true31 ], [ %sub33, %cond.false32 ]
store i32 %cond35, i32* %n, align 4
%34 = load i32, i32* %y, align 4
%35 = load i32, i32* %ny.addr, align 4
%sub36 = sub nsw i32 %35, 1
%cmp37 = icmp eq i32 %34, %sub36
br i1 %cmp37, label %cond.true38, label %cond.false39
cond.true38: ; preds = %cond.end34
%36 = load i32, i32* %c, align 4
br label %cond.end41
cond.false39: ; preds = %cond.end34
%37 = load i32, i32* %c, align 4
%38 = load i32, i32* %nx.addr, align 4
%add40 = add nsw i32 %37, %38
br label %cond.end41
cond.end41: ; preds = %cond.false39, %cond.true38
%cond42 = phi i32 [ %36, %cond.true38 ], [ %add40, %cond.false39 ]
store i32 %cond42, i32* %s, align 4
%39 = load i32, i32* %z, align 4
%cmp43 = icmp eq i32 %39, 0
br i1 %cmp43, label %cond.true44, label %cond.false45
cond.true44: ; preds = %cond.end41
%40 = load i32, i32* %c, align 4
br label %cond.end48
cond.false45: ; preds = %cond.end41
%41 = load i32, i32* %c, align 4
%42 = load i32, i32* %nx.addr, align 4
%43 = load i32, i32* %ny.addr, align 4
%mul46 = mul nsw i32 %42, %43
%sub47 = sub nsw i32 %41, %mul46
br label %cond.end48
cond.end48: ; preds = %cond.false45, %cond.true44
%cond49 = phi i32 [ %40, %cond.true44 ], [ %sub47, %cond.false45 ]
store i32 %cond49, i32* %b, align 4
%44 = load i32, i32* %z, align 4
%45 = load i32, i32* %nz.addr, align 4
%sub50 = sub nsw i32 %45, 1
%cmp51 = icmp eq i32 %44, %sub50
br i1 %cmp51, label %cond.true52, label %cond.false53
cond.true52: ; preds = %cond.end48
%46 = load i32, i32* %c, align 4
br label %cond.end56
cond.false53: ; preds = %cond.end48
%47 = load i32, i32* %c, align 4
%48 = load i32, i32* %nx.addr, align 4
%49 = load i32, i32* %ny.addr, align 4
%mul54 = mul nsw i32 %48, %49
%add55 = add nsw i32 %47, %mul54
br label %cond.end56
cond.end56: ; preds = %cond.false53, %cond.true52
%cond57 = phi i32 [ %46, %cond.true52 ], [ %add55, %cond.false53 ]
store i32 %cond57, i32* %t, align 4
%50 = load float*, float** %tIn.addr, align 8
%51 = load i32, i32* %c, align 4
%idxprom = sext i32 %51 to i64
%arrayidx = getelementptr inbounds float, float* %50, i64 %idxprom
%52 = load float, float* %arrayidx, align 4
%53 = load float, float* %cc, align 4
%mul58 = fmul contract float %52, %53
%54 = load float*, float** %tIn.addr, align 8
%55 = load i32, i32* %n, align 4
%idxprom59 = sext i32 %55 to i64
%arrayidx60 = getelementptr inbounds float, float* %54, i64 %idxprom59
%56 = load float, float* %arrayidx60, align 4
%57 = load float, float* %cn, align 4
%mul61 = fmul contract float %56, %57
%add62 = fadd contract float %mul58, %mul61
%58 = load float*, float** %tIn.addr, align 8
%59 = load i32, i32* %s, align 4
%idxprom63 = sext i32 %59 to i64
%arrayidx64 = getelementptr inbounds float, float* %58, i64 %idxprom63
%60 = load float, float* %arrayidx64, align 4
%61 = load float, float* %cs, align 4
%mul65 = fmul contract float %60, %61
%add66 = fadd contract float %add62, %mul65
%62 = load float*, float** %tIn.addr, align 8
%63 = load i32, i32* %e, align 4
%idxprom67 = sext i32 %63 to i64
%arrayidx68 = getelementptr inbounds float, float* %62, i64 %idxprom67
%64 = load float, float* %arrayidx68, align 4
%65 = load float, float* %ce, align 4
%mul69 = fmul contract float %64, %65
%add70 = fadd contract float %add66, %mul69
%66 = load float*, float** %tIn.addr, align 8
%67 = load i32, i32* %w, align 4
%idxprom71 = sext i32 %67 to i64
%arrayidx72 = getelementptr inbounds float, float* %66, i64 %idxprom71
%68 = load float, float* %arrayidx72, align 4
%69 = load float, float* %cw, align 4
%mul73 = fmul contract float %68, %69
%add74 = fadd contract float %add70, %mul73
%70 = load float*, float** %tIn.addr, align 8
%71 = load i32, i32* %t, align 4
%idxprom75 = sext i32 %71 to i64
%arrayidx76 = getelementptr inbounds float, float* %70, i64 %idxprom75
%72 = load float, float* %arrayidx76, align 4
%73 = load float, float* %ct, align 4
%mul77 = fmul contract float %72, %73
%add78 = fadd contract float %add74, %mul77
%74 = load float*, float** %tIn.addr, align 8
%75 = load i32, i32* %b, align 4
%idxprom79 = sext i32 %75 to i64
%arrayidx80 = getelementptr inbounds float, float* %74, i64 %idxprom79
%76 = load float, float* %arrayidx80, align 4
%77 = load float, float* %cb, align 4
%mul81 = fmul contract float %76, %77
%add82 = fadd contract float %add78, %mul81
%78 = load float, float* %dt.addr, align 4
%79 = load float, float* %Cap.addr, align 4
%div83 = fdiv float %78, %79
%80 = load float*, float** %pIn.addr, align 8
%81 = load i32, i32* %c, align 4
%idxprom84 = sext i32 %81 to i64
%arrayidx85 = getelementptr inbounds float, float* %80, i64 %idxprom84
%82 = load float, float* %arrayidx85, align 4
%mul86 = fmul contract float %div83, %82
%add87 = fadd contract float %add82, %mul86
%83 = load float, float* %ct, align 4
%84 = load float, float* @amb_temp, align 4
%mul88 = fmul contract float %83, %84
%add89 = fadd contract float %add87, %mul88
%85 = load float*, float** %tOut.addr, align 8
%86 = load i32, i32* %c, align 4
%idxprom90 = sext i32 %86 to i64
%arrayidx91 = getelementptr inbounds float, float* %85, i64 %idxprom90
store float %add89, float* %arrayidx91, align 4
br label %for.inc
for.inc: ; preds = %cond.end56
%87 = load i32, i32* %x, align 4
%inc = add nsw i32 %87, 1
store i32 %inc, i32* %x, align 4
br label %for.cond13
for.end: ; preds = %for.cond13
br label %for.inc92
for.inc92: ; preds = %for.end
%88 = load i32, i32* %y, align 4
%inc93 = add nsw i32 %88, 1
store i32 %inc93, i32* %y, align 4
br label %for.cond10
for.end94: ; preds = %for.cond10
br label %for.inc95
for.inc95: ; preds = %for.end94
%89 = load i32, i32* %z, align 4
%inc96 = add nsw i32 %89, 1
store i32 %inc96, i32* %z, align 4
br label %for.cond
for.end97: ; preds = %for.cond
%90 = load float*, float** %tIn.addr, align 8
store float* %90, float** %temp, align 8
%91 = load float*, float** %tOut.addr, align 8
store float* %91, float** %tIn.addr, align 8
%92 = load float*, float** %temp, align 8
store float* %92, float** %tOut.addr, align 8
%93 = load i32, i32* %i, align 4
%inc98 = add nsw i32 %93, 1
store i32 %inc98, i32* %i, align 4
br label %do.cond
do.cond: ; preds = %for.end97
%94 = load i32, i32* %i, align 4
%95 = load i32, i32* %numiter.addr, align 4
%cmp99 = icmp slt i32 %94, %95
br i1 %cmp99, label %do.body, label %do.end
do.end: ; preds = %do.cond
ret void
}
; Function Attrs: noinline optnone uwtable
define dso_local float @_Z8accuracyPfS_i(float* %arr1, float* %arr2, i32 %len) #2 {
entry:
%arr1.addr = alloca float*, align 8
%arr2.addr = alloca float*, align 8
%len.addr = alloca i32, align 4
%err = alloca float, align 4
%i = alloca i32, align 4
store float* %arr1, float** %arr1.addr, align 8
store float* %arr2, float** %arr2.addr, align 8
store i32 %len, i32* %len.addr, align 4
store float 0.000000e+00, float* %err, align 4
store i32 0, i32* %i, align 4
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%0 = load i32, i32* %i, align 4
%1 = load i32, i32* %len.addr, align 4
%cmp = icmp slt i32 %0, %1
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%2 = load float*, float** %arr1.addr, align 8
%3 = load i32, i32* %i, align 4
%idxprom = sext i32 %3 to i64
%arrayidx = getelementptr inbounds float, float* %2, i64 %idxprom
%4 = load float, float* %arrayidx, align 4
%5 = load float*, float** %arr2.addr, align 8
%6 = load i32, i32* %i, align 4
%idxprom1 = sext i32 %6 to i64
%arrayidx2 = getelementptr inbounds float, float* %5, i64 %idxprom1
%7 = load float, float* %arrayidx2, align 4
%sub = fsub contract float %4, %7
%8 = load float*, float** %arr1.addr, align 8
%9 = load i32, i32* %i, align 4
%idxprom3 = sext i32 %9 to i64
%arrayidx4 = getelementptr inbounds float, float* %8, i64 %idxprom3
%10 = load float, float* %arrayidx4, align 4
%11 = load float*, float** %arr2.addr, align 8
%12 = load i32, i32* %i, align 4
%idxprom5 = sext i32 %12 to i64
%arrayidx6 = getelementptr inbounds float, float* %11, i64 %idxprom5
%13 = load float, float* %arrayidx6, align 4
%sub7 = fsub contract float %10, %13
%mul = fmul contract float %sub, %sub7
%14 = load float, float* %err, align 4
%add = fadd contract float %14, %mul
store float %add, float* %err, align 4
br label %for.inc
for.inc: ; preds = %for.body
%15 = load i32, i32* %i, align 4
%inc = add nsw i32 %15, 1
store i32 %inc, i32* %i, align 4
br label %for.cond
for.end: ; preds = %for.cond
%16 = load float, float* %err, align 4
%17 = load i32, i32* %len.addr, align 4
%conv = sitofp i32 %17 to float
%div = fdiv float %16, %conv
%call = call float @_ZSt4sqrtf(float %div)
ret float %call
}
; Function Attrs: noinline nounwind optnone uwtable
define linkonce_odr dso_local float @_ZSt4sqrtf(float %__x) #0 comdat {
entry:
%__x.addr = alloca float, align 4
store float %__x, float* %__x.addr, align 4
%0 = load float, float* %__x.addr, align 4
%call = call float @sqrtf(float %0) #8
ret float %call
}
; Function Attrs: noinline optnone uwtable
define dso_local void @_Z5usageiPPc(i32 %argc, i8** %argv) #2 {
entry:
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%1 = load i8**, i8*** %argv.addr, align 8
%arrayidx = getelementptr inbounds i8*, i8** %1, i64 0
%2 = load i8*, i8** %arrayidx, align 8
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.11, i64 0, i64 0), i8* %2)
%3 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([68 x i8], [68 x i8]* @.str.12, i64 0, i64 0))
%4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([62 x i8], [62 x i8]* @.str.13, i64 0, i64 0))
%5 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%call3 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.14, i64 0, i64 0))
%6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%call4 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([83 x i8], [83 x i8]* @.str.15, i64 0, i64 0))
%7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([88 x i8], [88 x i8]* @.str.16, i64 0, i64 0))
%8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%call6 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.17, i64 0, i64 0))
call void @exit(i32 1) #9
unreachable
}
; Function Attrs: noreturn nounwind
declare dso_local void @exit(i32) #5
; Function Attrs: noinline norecurse optnone uwtable
define dso_local i32 @main(i32 %argc, i8** %argv) #6 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%pfile = alloca i8*, align 8
%tfile = alloca i8*, align 8
%ofile = alloca i8*, align 8
%iterations = alloca i32, align 4
%numCols = alloca i32, align 4
%numRows = alloca i32, align 4
%layers = alloca i32, align 4
%dx = alloca float, align 4
%dy = alloca float, align 4
%dz = alloca float, align 4
%Cap = alloca float, align 4
%Rx = alloca float, align 4
%Ry = alloca float, align 4
%Rz = alloca float, align 4
%max_slope = alloca float, align 4
%dt = alloca float, align 4
%powerIn = alloca float*, align 8
%tempOut = alloca float*, align 8
%tempIn = alloca float*, align 8
%tempCopy = alloca float*, align 8
%size = alloca i32, align 4
%answer = alloca float*, align 8
%acc = alloca float, align 4
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%call = call i32 @cudaSetDevice(i32 0)
%0 = load i32, i32* %argc.addr, align 4
%cmp = icmp ne i32 %0, 7
br i1 %cmp, label %if.then, label %if.end
if.then: ; preds = %entry
%1 = load i32, i32* %argc.addr, align 4
%2 = load i8**, i8*** %argv.addr, align 8
call void @_Z5usageiPPc(i32 %1, i8** %2)
br label %if.end
if.end: ; preds = %if.then, %entry
%3 = load i8**, i8*** %argv.addr, align 8
%arrayidx = getelementptr inbounds i8*, i8** %3, i64 3
%4 = load i8*, i8** %arrayidx, align 8
%call1 = call i32 @atoi(i8* %4) #10
store i32 %call1, i32* %iterations, align 4
%5 = load i8**, i8*** %argv.addr, align 8
%arrayidx2 = getelementptr inbounds i8*, i8** %5, i64 4
%6 = load i8*, i8** %arrayidx2, align 8
store i8* %6, i8** %pfile, align 8
%7 = load i8**, i8*** %argv.addr, align 8
%arrayidx3 = getelementptr inbounds i8*, i8** %7, i64 5
%8 = load i8*, i8** %arrayidx3, align 8
store i8* %8, i8** %tfile, align 8
%9 = load i8**, i8*** %argv.addr, align 8
%arrayidx4 = getelementptr inbounds i8*, i8** %9, i64 6
%10 = load i8*, i8** %arrayidx4, align 8
store i8* %10, i8** %ofile, align 8
%11 = load i8**, i8*** %argv.addr, align 8
%arrayidx5 = getelementptr inbounds i8*, i8** %11, i64 1
%12 = load i8*, i8** %arrayidx5, align 8
%call6 = call i32 @atoi(i8* %12) #10
store i32 %call6, i32* %numCols, align 4
%13 = load i8**, i8*** %argv.addr, align 8
%arrayidx7 = getelementptr inbounds i8*, i8** %13, i64 1
%14 = load i8*, i8** %arrayidx7, align 8
%call8 = call i32 @atoi(i8* %14) #10
store i32 %call8, i32* %numRows, align 4
%15 = load i8**, i8*** %argv.addr, align 8
%arrayidx9 = getelementptr inbounds i8*, i8** %15, i64 2
%16 = load i8*, i8** %arrayidx9, align 8
%call10 = call i32 @atoi(i8* %16) #10
store i32 %call10, i32* %layers, align 4
%17 = load float, float* @chip_height, align 4
%18 = load i32, i32* %numRows, align 4
%conv = sitofp i32 %18 to float
%div = fdiv float %17, %conv
store float %div, float* %dx, align 4
%19 = load float, float* @chip_width, align 4
%20 = load i32, i32* %numCols, align 4
%conv11 = sitofp i32 %20 to float
%div12 = fdiv float %19, %conv11
store float %div12, float* %dy, align 4
%21 = load float, float* @t_chip, align 4
%22 = load i32, i32* %layers, align 4
%conv13 = sitofp i32 %22 to float
%div14 = fdiv float %21, %conv13
store float %div14, float* %dz, align 4
%23 = load float, float* @t_chip, align 4
%conv15 = fpext float %23 to double
%mul = fmul contract double 8.750000e+05, %conv15
%24 = load float, float* %dx, align 4
%conv16 = fpext float %24 to double
%mul17 = fmul contract double %mul, %conv16
%25 = load float, float* %dy, align 4
%conv18 = fpext float %25 to double
%mul19 = fmul contract double %mul17, %conv18
%conv20 = fptrunc double %mul19 to float
store float %conv20, float* %Cap, align 4
%26 = load float, float* %dy, align 4
%conv21 = fpext float %26 to double
%27 = load float, float* @t_chip, align 4
%conv22 = fpext float %27 to double
%mul23 = fmul contract double 2.000000e+02, %conv22
%28 = load float, float* %dx, align 4
%conv24 = fpext float %28 to double
%mul25 = fmul contract double %mul23, %conv24
%div26 = fdiv double %conv21, %mul25
%conv27 = fptrunc double %div26 to float
store float %conv27, float* %Rx, align 4
%29 = load float, float* %dx, align 4
%conv28 = fpext float %29 to double
%30 = load float, float* @t_chip, align 4
%conv29 = fpext float %30 to double
%mul30 = fmul contract double 2.000000e+02, %conv29
%31 = load float, float* %dy, align 4
%conv31 = fpext float %31 to double
%mul32 = fmul contract double %mul30, %conv31
%div33 = fdiv double %conv28, %mul32
%conv34 = fptrunc double %div33 to float
store float %conv34, float* %Ry, align 4
%32 = load float, float* %dz, align 4
%33 = load float, float* %dx, align 4
%mul35 = fmul contract float 1.000000e+02, %33
%34 = load float, float* %dy, align 4
%mul36 = fmul contract float %mul35, %34
%div37 = fdiv float %32, %mul36
store float %div37, float* %Rz, align 4
%35 = load float, float* @t_chip, align 4
%conv38 = fpext float %35 to double
%mul39 = fmul contract double 5.000000e-01, %conv38
%mul40 = fmul contract double %mul39, 1.750000e+06
%div41 = fdiv double 3.000000e+06, %mul40
%conv42 = fptrunc double %div41 to float
store float %conv42, float* %max_slope, align 4
%36 = load float, float* %max_slope, align 4
%conv43 = fpext float %36 to double
%div44 = fdiv double 1.000000e-03, %conv43
%conv45 = fptrunc double %div44 to float
store float %conv45, float* %dt, align 4
%37 = load i32, i32* %numCols, align 4
%38 = load i32, i32* %numRows, align 4
%mul46 = mul nsw i32 %37, %38
%39 = load i32, i32* %layers, align 4
%mul47 = mul nsw i32 %mul46, %39
store i32 %mul47, i32* %size, align 4
%40 = load i32, i32* %size, align 4
%conv48 = sext i32 %40 to i64
%call49 = call noalias i8* @calloc(i64 %conv48, i64 4) #8
%41 = bitcast i8* %call49 to float*
store float* %41, float** %powerIn, align 8
%42 = load i32, i32* %size, align 4
%conv50 = sext i32 %42 to i64
%mul51 = mul i64 %conv50, 4
%call52 = call noalias i8* @malloc(i64 %mul51) #8
%43 = bitcast i8* %call52 to float*
store float* %43, float** %tempCopy, align 8
%44 = load i32, i32* %size, align 4
%conv53 = sext i32 %44 to i64
%call54 = call noalias i8* @calloc(i64 %conv53, i64 4) #8
%45 = bitcast i8* %call54 to float*
store float* %45, float** %tempIn, align 8
%46 = load i32, i32* %size, align 4
%conv55 = sext i32 %46 to i64
%call56 = call noalias i8* @calloc(i64 %conv55, i64 4) #8
%47 = bitcast i8* %call56 to float*
store float* %47, float** %tempOut, align 8
%48 = load i32, i32* %size, align 4
%conv57 = sext i32 %48 to i64
%call58 = call noalias i8* @calloc(i64 %conv57, i64 4) #8
%49 = bitcast i8* %call58 to float*
store float* %49, float** %answer, align 8
%50 = load float*, float** %powerIn, align 8
%51 = load i32, i32* %numRows, align 4
%52 = load i32, i32* %numCols, align 4
%53 = load i32, i32* %layers, align 4
%54 = load i8*, i8** %pfile, align 8
call void @_Z9readinputPfiiiPc(float* %50, i32 %51, i32 %52, i32 %53, i8* %54)
%55 = load float*, float** %tempIn, align 8
%56 = load i32, i32* %numRows, align 4
%57 = load i32, i32* %numCols, align 4
%58 = load i32, i32* %layers, align 4
%59 = load i8*, i8** %tfile, align 8
call void @_Z9readinputPfiiiPc(float* %55, i32 %56, i32 %57, i32 %58, i8* %59)
%60 = load float*, float** %tempCopy, align 8
%61 = bitcast float* %60 to i8*
%62 = load float*, float** %tempIn, align 8
%63 = bitcast float* %62 to i8*
%64 = load i32, i32* %size, align 4
%conv59 = sext i32 %64 to i64
%mul60 = mul i64 %conv59, 4
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %61, i8* align 4 %63, i64 %mul60, i1 false)
%65 = load float*, float** %powerIn, align 8
%66 = load float*, float** %tempIn, align 8
%67 = load float*, float** %tempOut, align 8
%68 = load i32, i32* %numCols, align 4
%69 = load i32, i32* %numRows, align 4
%70 = load i32, i32* %layers, align 4
%71 = load float, float* %Cap, align 4
%72 = load float, float* %Rx, align 4
%73 = load float, float* %Ry, align 4
%74 = load float, float* %Rz, align 4
%75 = load float, float* %dt, align 4
%76 = load i32, i32* %iterations, align 4
call void @_Z12hotspot_opt1PfS_S_iiifffffi(float* %65, float* %66, float* %67, i32 %68, i32 %69, i32 %70, float %71, float %72, float %73, float %74, float %75, i32 %76)
%77 = load float*, float** %powerIn, align 8
%78 = load float*, float** %tempCopy, align 8
%79 = load float*, float** %answer, align 8
%80 = load i32, i32* %numCols, align 4
%81 = load i32, i32* %numRows, align 4
%82 = load i32, i32* %layers, align 4
%83 = load float, float* %Cap, align 4
%84 = load float, float* %Rx, align 4
%85 = load float, float* %Ry, align 4
%86 = load float, float* %Rz, align 4
%87 = load float, float* %dt, align 4
%88 = load i32, i32* %iterations, align 4
call void @_Z14computeTempCPUPfS_S_iiifffffi(float* %77, float* %78, float* %79, i32 %80, i32 %81, i32 %82, float %83, float %84, float %85, float %86, float %87, i32 %88)
%89 = load float*, float** %tempOut, align 8
%90 = load float*, float** %answer, align 8
%91 = load i32, i32* %numRows, align 4
%92 = load i32, i32* %numCols, align 4
%mul61 = mul nsw i32 %91, %92
%93 = load i32, i32* %layers, align 4
%mul62 = mul nsw i32 %mul61, %93
%call63 = call float @_Z8accuracyPfS_i(float* %89, float* %90, i32 %mul62)
store float %call63, float* %acc, align 4
%94 = load float, float* %acc, align 4
%conv64 = fpext float %94 to double
%call65 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.18, i64 0, i64 0), double %conv64)
%95 = load float*, float** %tempOut, align 8
%96 = load i32, i32* %numRows, align 4
%97 = load i32, i32* %numCols, align 4
%98 = load i32, i32* %layers, align 4
%99 = load i8*, i8** %ofile, align 8
call void @_Z11writeoutputPfiiiPc(float* %95, i32 %96, i32 %97, i32 %98, i8* %99)
%100 = load float*, float** %tempIn, align 8
%101 = bitcast float* %100 to i8*
call void @free(i8* %101) #8
%102 = load float*, float** %tempOut, align 8
%103 = bitcast float* %102 to i8*
call void @free(i8* %103) #8
%104 = load float*, float** %powerIn, align 8
%105 = bitcast float* %104 to i8*
call void @free(i8* %105) #8
ret i32 0
}
declare dso_local i32 @cudaSetDevice(i32) #4
; Function Attrs: nounwind readonly
declare dso_local i32 @atoi(i8*) #7
; Function Attrs: nounwind
declare dso_local noalias i8* @calloc(i64, i64) #1
; Function Attrs: nounwind
declare dso_local noalias i8* @malloc(i64) #1
; Function Attrs: nounwind
declare dso_local void @free(i8*) #1
; Function Attrs: nounwind
declare dso_local float @sqrtf(float) #1
define internal void @__cuda_register_globals(i8** %0) {
entry:
%1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, float*, float*, float, i32, i32, i32, float, float, float, float, float, float, float)* @_Z11hotspotOpt1PfS_S_fiiifffffff to i8*), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null)
ret void
}
declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*)
declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32)
declare dso_local i8** @__cudaRegisterFatBinary(i8*)
define internal void @__cuda_module_ctor(i8* %0) {
entry:
%1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*))
store i8** %1, i8*** @__cuda_gpubin_handle, align 8
call void @__cuda_register_globals(i8** %1)
call void @__cudaRegisterFatBinaryEnd(i8** %1)
%2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor)
ret void
}
declare dso_local void @__cudaRegisterFatBinaryEnd(i8**)
declare dso_local void @__cudaUnregisterFatBinary(i8**)
define internal void @__cuda_module_dtor(i8* %0) {
entry:
%1 = load i8**, i8*** @__cuda_gpubin_handle, align 8
call void @__cudaUnregisterFatBinary(i8** %1)
ret void
}
declare dso_local i32 @atexit(void (i8*)*)
attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { argmemonly nounwind willreturn }
attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #6 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #7 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #8 = { nounwind }
attributes #9 = { noreturn nounwind }
attributes #10 = { nounwind readonly }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}