1508 lines
137 KiB
LLVM
1508 lines
137 KiB
LLVM
|
; ModuleID = '3D-host-x86_64-unknown-linux-gnu.bc'
|
||
|
source_filename = "3D.cu"
|
||
|
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
|
||
|
target triple = "x86_64-unknown-linux-gnu"
|
||
|
|
||
|
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
|
||
|
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
|
||
|
%struct.timeval = type { i64, i64 }
|
||
|
%struct.timezone = type { i32, i32 }
|
||
|
%struct.dim3 = type { i32, i32, i32 }
|
||
|
%struct.CUstream_st = type opaque
|
||
|
|
||
|
$_ZN4dim3C2Ejjj = comdat any
|
||
|
|
||
|
$_ZSt4sqrtf = comdat any
|
||
|
|
||
|
@.str = private unnamed_addr constant [16 x i8] c"Time: %.3f (s)\0A\00", align 1
|
||
|
@t_chip = dso_local global float 0x3F40624DE0000000, align 4
|
||
|
@chip_height = dso_local global float 0x3F90624DE0000000, align 4
|
||
|
@chip_width = dso_local global float 0x3F90624DE0000000, align 4
|
||
|
@amb_temp = dso_local global float 8.000000e+01, align 4
|
||
|
@stderr = external dso_local global %struct._IO_FILE*, align 8
|
||
|
@.str.1 = private unnamed_addr constant [11 x i8] c"Error: %s\0A\00", align 1
|
||
|
@.str.2 = private unnamed_addr constant [2 x i8] c"r\00", align 1
|
||
|
@.str.3 = private unnamed_addr constant [24 x i8] c"The file was not opened\00", align 1
|
||
|
@.str.4 = private unnamed_addr constant [20 x i8] c"Error reading file\0A\00", align 1
|
||
|
@.str.5 = private unnamed_addr constant [25 x i8] c"not enough lines in file\00", align 1
|
||
|
@.str.6 = private unnamed_addr constant [3 x i8] c"%f\00", align 1
|
||
|
@.str.7 = private unnamed_addr constant [20 x i8] c"invalid file format\00", align 1
|
||
|
@.str.8 = private unnamed_addr constant [2 x i8] c"w\00", align 1
|
||
|
@.str.9 = private unnamed_addr constant [25 x i8] c"The file was not opened\0A\00", align 1
|
||
|
@.str.10 = private unnamed_addr constant [7 x i8] c"%d\09%g\0A\00", align 1
|
||
|
@.str.11 = private unnamed_addr constant [81 x i8] c"Usage: %s <rows/cols> <layers> <iterations> <powerFile> <tempFile> <outputFile>\0A\00", align 1
|
||
|
@.str.12 = private unnamed_addr constant [68 x i8] c"\09<rows/cols> - number of rows/cols in the grid (positive integer)\0A\00", align 1
|
||
|
@.str.13 = private unnamed_addr constant [62 x i8] c"\09<layers> - number of layers in the grid (positive integer)\0A\00", align 1
|
||
|
@.str.14 = private unnamed_addr constant [37 x i8] c"\09<iteration> - number of iterations\0A\00", align 1
|
||
|
@.str.15 = private unnamed_addr constant [83 x i8] c"\09<powerFile> - name of the file containing the initial power values of each cell\0A\00", align 1
|
||
|
@.str.16 = private unnamed_addr constant [88 x i8] c"\09<tempFile> - name of the file containing the initial temperature values of each cell\0A\00", align 1
|
||
|
@.str.17 = private unnamed_addr constant [28 x i8] c"\09<outputFile - output file\0A\00", align 1
|
||
|
@.str.18 = private unnamed_addr constant [14 x i8] c"Accuracy: %e\0A\00", align 1
|
||
|
@0 = private unnamed_addr constant [33 x i8] c"_Z11hotspotOpt1PfS_S_fiiifffffff\00", align 1
|
||
|
@1 = private constant [27433 x i8] c"P\EDU\BA\01\00\10\00\18k\00\00\00\00\00\00\02\00\01\01@\00\00\00([\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\80Z\00\00\00\00\00\00@X\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\09\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.info._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.shared._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.global\00.nv.constant0._Z11hotspotOpt1PfS_S_fiiifffffff\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z11hotspotOpt1PfS_S_fiiifffffff\00.text._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.info._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.shared._Z11hotspotOpt1PfS_S_fiiifffffff\00.nv.global\00blockDim\00blockIdx\00threadIdx\00.nv.constant0._Z11hotspotOpt1PfS_S_fiiifffffff\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00S\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\D0\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\DB\00\00\00\01\00\08\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\E4\00\00\00\01\00\08\00\02\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\ED\00\00\00\01\00\08\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\F7\00\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00@R\00\00\00\00\00\00\04/\08\00\07\00\00\00\17\00\00\00\04#\08\00\07\00\00\00\00\00\00\00\04\12\08\00\07\00\00\00x\00\00\00\04\11\08\00\07\00\00\00x\00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\01D\00\03\19D\00\04\17\0C\00\00\00\00\00\0D\00@\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0C\00<\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0B\008\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0A\004\00\00\F0\11\00\04\17\0C\00\00\00\00\00\09\000\00\00\F0\11\00\04\17\0C\00\00\00\00\00\08\00,\00\00\F0\11\00\04\17\0C\00\00\00\00\00\07\00(\00\00\F0\11\00\04\17\0C\00\00\00\00\00\06\00$\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00X\09\00\00\08\0A\00\00\04\1C\04\00\18R\00\00\04\1E\04\00 \00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03<d\00\01\00\87\00\80\07\98L\01\01\87\F8\FF\FF\0F\1C\00\00w\03\00\00\C8\F0\EF\1F\E0\FD\03\BC\7F\00\07\01\07\00\80\03l[\0F\00\80\00\00\00@\E2\C0\00\10\00\00\00\A0\E3\EF\1F\E0!\03\BC\7F\00\00\01\F7\0F\00\00\10\\\00\0A\07\00\00\00\E0\\\02\00\07\00\80\07\98\\\EF\1F\E0\FD\03\BC\7F\00\03\00\F7\0F\80\07\98\\\00\
|
||
|
@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([27433 x i8], [27433 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8
|
||
|
@__cuda_gpubin_handle = internal global i8** null, align 8
|
||
|
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }]
|
||
|
|
||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||
|
define dso_local i64 @_Z8get_timev() #0 {
|
||
|
entry:
|
||
|
%tv = alloca %struct.timeval, align 8
|
||
|
%call = call i32 @gettimeofday(%struct.timeval* %tv, %struct.timezone* null) #8
|
||
|
%tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 0
|
||
|
%0 = load i64, i64* %tv_sec, align 8
|
||
|
%mul = mul nsw i64 %0, 1000000
|
||
|
%tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 1
|
||
|
%1 = load i64, i64* %tv_usec, align 8
|
||
|
%add = add nsw i64 %mul, %1
|
||
|
ret i64 %add
|
||
|
}
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #1
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z11hotspotOpt1PfS_S_fiiifffffff(float* %p, float* %tIn, float* %tOut, float %sdc, i32 %nx, i32 %ny, i32 %nz, float %ce, float %cw, float %cn, float %cs, float %ct, float %cb, float %cc) #2 {
|
||
|
entry:
|
||
|
%p.addr = alloca float*, align 8
|
||
|
%tIn.addr = alloca float*, align 8
|
||
|
%tOut.addr = alloca float*, align 8
|
||
|
%sdc.addr = alloca float, align 4
|
||
|
%nx.addr = alloca i32, align 4
|
||
|
%ny.addr = alloca i32, align 4
|
||
|
%nz.addr = alloca i32, align 4
|
||
|
%ce.addr = alloca float, align 4
|
||
|
%cw.addr = alloca float, align 4
|
||
|
%cn.addr = alloca float, align 4
|
||
|
%cs.addr = alloca float, align 4
|
||
|
%ct.addr = alloca float, align 4
|
||
|
%cb.addr = alloca float, align 4
|
||
|
%cc.addr = alloca float, align 4
|
||
|
%grid_dim = alloca %struct.dim3, align 8
|
||
|
%block_dim = alloca %struct.dim3, align 8
|
||
|
%shmem_size = alloca i64, align 8
|
||
|
%stream = alloca i8*, align 8
|
||
|
%grid_dim.coerce = alloca { i64, i32 }, align 8
|
||
|
%block_dim.coerce = alloca { i64, i32 }, align 8
|
||
|
store float* %p, float** %p.addr, align 8
|
||
|
store float* %tIn, float** %tIn.addr, align 8
|
||
|
store float* %tOut, float** %tOut.addr, align 8
|
||
|
store float %sdc, float* %sdc.addr, align 4
|
||
|
store i32 %nx, i32* %nx.addr, align 4
|
||
|
store i32 %ny, i32* %ny.addr, align 4
|
||
|
store i32 %nz, i32* %nz.addr, align 4
|
||
|
store float %ce, float* %ce.addr, align 4
|
||
|
store float %cw, float* %cw.addr, align 4
|
||
|
store float %cn, float* %cn.addr, align 4
|
||
|
store float %cs, float* %cs.addr, align 4
|
||
|
store float %ct, float* %ct.addr, align 4
|
||
|
store float %cb, float* %cb.addr, align 4
|
||
|
store float %cc, float* %cc.addr, align 4
|
||
|
%kernel_args = alloca i8*, i64 14, align 16
|
||
|
%0 = bitcast float** %p.addr to i8*
|
||
|
%1 = getelementptr i8*, i8** %kernel_args, i32 0
|
||
|
store i8* %0, i8** %1
|
||
|
%2 = bitcast float** %tIn.addr to i8*
|
||
|
%3 = getelementptr i8*, i8** %kernel_args, i32 1
|
||
|
store i8* %2, i8** %3
|
||
|
%4 = bitcast float** %tOut.addr to i8*
|
||
|
%5 = getelementptr i8*, i8** %kernel_args, i32 2
|
||
|
store i8* %4, i8** %5
|
||
|
%6 = bitcast float* %sdc.addr to i8*
|
||
|
%7 = getelementptr i8*, i8** %kernel_args, i32 3
|
||
|
store i8* %6, i8** %7
|
||
|
%8 = bitcast i32* %nx.addr to i8*
|
||
|
%9 = getelementptr i8*, i8** %kernel_args, i32 4
|
||
|
store i8* %8, i8** %9
|
||
|
%10 = bitcast i32* %ny.addr to i8*
|
||
|
%11 = getelementptr i8*, i8** %kernel_args, i32 5
|
||
|
store i8* %10, i8** %11
|
||
|
%12 = bitcast i32* %nz.addr to i8*
|
||
|
%13 = getelementptr i8*, i8** %kernel_args, i32 6
|
||
|
store i8* %12, i8** %13
|
||
|
%14 = bitcast float* %ce.addr to i8*
|
||
|
%15 = getelementptr i8*, i8** %kernel_args, i32 7
|
||
|
store i8* %14, i8** %15
|
||
|
%16 = bitcast float* %cw.addr to i8*
|
||
|
%17 = getelementptr i8*, i8** %kernel_args, i32 8
|
||
|
store i8* %16, i8** %17
|
||
|
%18 = bitcast float* %cn.addr to i8*
|
||
|
%19 = getelementptr i8*, i8** %kernel_args, i32 9
|
||
|
store i8* %18, i8** %19
|
||
|
%20 = bitcast float* %cs.addr to i8*
|
||
|
%21 = getelementptr i8*, i8** %kernel_args, i32 10
|
||
|
store i8* %20, i8** %21
|
||
|
%22 = bitcast float* %ct.addr to i8*
|
||
|
%23 = getelementptr i8*, i8** %kernel_args, i32 11
|
||
|
store i8* %22, i8** %23
|
||
|
%24 = bitcast float* %cb.addr to i8*
|
||
|
%25 = getelementptr i8*, i8** %kernel_args, i32 12
|
||
|
store i8* %24, i8** %25
|
||
|
%26 = bitcast float* %cc.addr to i8*
|
||
|
%27 = getelementptr i8*, i8** %kernel_args, i32 13
|
||
|
store i8* %26, i8** %27
|
||
|
%28 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream)
|
||
|
%29 = load i64, i64* %shmem_size, align 8
|
||
|
%30 = load i8*, i8** %stream, align 8
|
||
|
%31 = bitcast { i64, i32 }* %grid_dim.coerce to i8*
|
||
|
%32 = bitcast %struct.dim3* %grid_dim to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %31, i8* align 8 %32, i64 12, i1 false)
|
||
|
%33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0
|
||
|
%34 = load i64, i64* %33, align 8
|
||
|
%35 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1
|
||
|
%36 = load i32, i32* %35, align 8
|
||
|
%37 = bitcast { i64, i32 }* %block_dim.coerce to i8*
|
||
|
%38 = bitcast %struct.dim3* %block_dim to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %37, i8* align 8 %38, i64 12, i1 false)
|
||
|
%39 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0
|
||
|
%40 = load i64, i64* %39, align 8
|
||
|
%41 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1
|
||
|
%42 = load i32, i32* %41, align 8
|
||
|
%43 = bitcast i8* %30 to %struct.CUstream_st*
|
||
|
%call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, float*, float*, float, i32, i32, i32, float, float, float, float, float, float, float)* @_Z11hotspotOpt1PfS_S_fiiifffffff to i8*), i64 %34, i32 %36, i64 %40, i32 %42, i8** %kernel_args, i64 %29, %struct.CUstream_st* %43)
|
||
|
br label %setup.end
|
||
|
|
||
|
setup.end: ; preds = %entry
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**)
|
||
|
|
||
|
declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*)
|
||
|
|
||
|
; Function Attrs: argmemonly nounwind willreturn
|
||
|
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #3
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z12hotspot_opt1PfS_S_iiifffffi(float* %p, float* %tIn, float* %tOut, i32 %nx, i32 %ny, i32 %nz, float %Cap, float %Rx, float %Ry, float %Rz, float %dt, i32 %numiter) #2 {
|
||
|
entry:
|
||
|
%p.addr = alloca float*, align 8
|
||
|
%tIn.addr = alloca float*, align 8
|
||
|
%tOut.addr = alloca float*, align 8
|
||
|
%nx.addr = alloca i32, align 4
|
||
|
%ny.addr = alloca i32, align 4
|
||
|
%nz.addr = alloca i32, align 4
|
||
|
%Cap.addr = alloca float, align 4
|
||
|
%Rx.addr = alloca float, align 4
|
||
|
%Ry.addr = alloca float, align 4
|
||
|
%Rz.addr = alloca float, align 4
|
||
|
%dt.addr = alloca float, align 4
|
||
|
%numiter.addr = alloca i32, align 4
|
||
|
%ce = alloca float, align 4
|
||
|
%cw = alloca float, align 4
|
||
|
%cn = alloca float, align 4
|
||
|
%cs = alloca float, align 4
|
||
|
%ct = alloca float, align 4
|
||
|
%cb = alloca float, align 4
|
||
|
%cc = alloca float, align 4
|
||
|
%stepDivCap = alloca float, align 4
|
||
|
%s = alloca i64, align 8
|
||
|
%tIn_d = alloca float*, align 8
|
||
|
%tOut_d = alloca float*, align 8
|
||
|
%p_d = alloca float*, align 8
|
||
|
%block_dim = alloca %struct.dim3, align 4
|
||
|
%grid_dim = alloca %struct.dim3, align 4
|
||
|
%start = alloca i64, align 8
|
||
|
%i = alloca i32, align 4
|
||
|
%agg.tmp = alloca %struct.dim3, align 4
|
||
|
%agg.tmp23 = alloca %struct.dim3, align 4
|
||
|
%agg.tmp.coerce = alloca { i64, i32 }, align 4
|
||
|
%agg.tmp23.coerce = alloca { i64, i32 }, align 4
|
||
|
%t = alloca float*, align 8
|
||
|
%stop = alloca i64, align 8
|
||
|
%time = alloca float, align 4
|
||
|
store float* %p, float** %p.addr, align 8
|
||
|
store float* %tIn, float** %tIn.addr, align 8
|
||
|
store float* %tOut, float** %tOut.addr, align 8
|
||
|
store i32 %nx, i32* %nx.addr, align 4
|
||
|
store i32 %ny, i32* %ny.addr, align 4
|
||
|
store i32 %nz, i32* %nz.addr, align 4
|
||
|
store float %Cap, float* %Cap.addr, align 4
|
||
|
store float %Rx, float* %Rx.addr, align 4
|
||
|
store float %Ry, float* %Ry.addr, align 4
|
||
|
store float %Rz, float* %Rz.addr, align 4
|
||
|
store float %dt, float* %dt.addr, align 4
|
||
|
store i32 %numiter, i32* %numiter.addr, align 4
|
||
|
%0 = load float, float* %dt.addr, align 4
|
||
|
%1 = load float, float* %Cap.addr, align 4
|
||
|
%div = fdiv float %0, %1
|
||
|
store float %div, float* %stepDivCap, align 4
|
||
|
%2 = load float, float* %stepDivCap, align 4
|
||
|
%3 = load float, float* %Rx.addr, align 4
|
||
|
%div1 = fdiv float %2, %3
|
||
|
store float %div1, float* %cw, align 4
|
||
|
store float %div1, float* %ce, align 4
|
||
|
%4 = load float, float* %stepDivCap, align 4
|
||
|
%5 = load float, float* %Ry.addr, align 4
|
||
|
%div2 = fdiv float %4, %5
|
||
|
store float %div2, float* %cs, align 4
|
||
|
store float %div2, float* %cn, align 4
|
||
|
%6 = load float, float* %stepDivCap, align 4
|
||
|
%7 = load float, float* %Rz.addr, align 4
|
||
|
%div3 = fdiv float %6, %7
|
||
|
store float %div3, float* %cb, align 4
|
||
|
store float %div3, float* %ct, align 4
|
||
|
%8 = load float, float* %ce, align 4
|
||
|
%conv = fpext float %8 to double
|
||
|
%mul = fmul contract double 2.000000e+00, %conv
|
||
|
%9 = load float, float* %cn, align 4
|
||
|
%conv4 = fpext float %9 to double
|
||
|
%mul5 = fmul contract double 2.000000e+00, %conv4
|
||
|
%add = fadd contract double %mul, %mul5
|
||
|
%10 = load float, float* %ct, align 4
|
||
|
%conv6 = fpext float %10 to double
|
||
|
%mul7 = fmul contract double 3.000000e+00, %conv6
|
||
|
%add8 = fadd contract double %add, %mul7
|
||
|
%sub = fsub contract double 1.000000e+00, %add8
|
||
|
%conv9 = fptrunc double %sub to float
|
||
|
store float %conv9, float* %cc, align 4
|
||
|
%11 = load i32, i32* %nx.addr, align 4
|
||
|
%conv10 = sext i32 %11 to i64
|
||
|
%mul11 = mul i64 4, %conv10
|
||
|
%12 = load i32, i32* %ny.addr, align 4
|
||
|
%conv12 = sext i32 %12 to i64
|
||
|
%mul13 = mul i64 %mul11, %conv12
|
||
|
%13 = load i32, i32* %nz.addr, align 4
|
||
|
%conv14 = sext i32 %13 to i64
|
||
|
%mul15 = mul i64 %mul13, %conv14
|
||
|
store i64 %mul15, i64* %s, align 8
|
||
|
%14 = bitcast float** %p_d to i8**
|
||
|
%15 = load i64, i64* %s, align 8
|
||
|
%call = call i32 @cudaMalloc(i8** %14, i64 %15)
|
||
|
%16 = bitcast float** %tIn_d to i8**
|
||
|
%17 = load i64, i64* %s, align 8
|
||
|
%call16 = call i32 @cudaMalloc(i8** %16, i64 %17)
|
||
|
%18 = bitcast float** %tOut_d to i8**
|
||
|
%19 = load i64, i64* %s, align 8
|
||
|
%call17 = call i32 @cudaMalloc(i8** %18, i64 %19)
|
||
|
%20 = load float*, float** %tIn_d, align 8
|
||
|
%21 = bitcast float* %20 to i8*
|
||
|
%22 = load float*, float** %tIn.addr, align 8
|
||
|
%23 = bitcast float* %22 to i8*
|
||
|
%24 = load i64, i64* %s, align 8
|
||
|
%call18 = call i32 @cudaMemcpy(i8* %21, i8* %23, i64 %24, i32 1)
|
||
|
%25 = load float*, float** %p_d, align 8
|
||
|
%26 = bitcast float* %25 to i8*
|
||
|
%27 = load float*, float** %p.addr, align 8
|
||
|
%28 = bitcast float* %27 to i8*
|
||
|
%29 = load i64, i64* %s, align 8
|
||
|
%call19 = call i32 @cudaMemcpy(i8* %26, i8* %28, i64 %29, i32 1)
|
||
|
call void @_ZN4dim3C2Ejjj(%struct.dim3* %block_dim, i32 64, i32 4, i32 1)
|
||
|
%30 = load i32, i32* %nx.addr, align 4
|
||
|
%div20 = sdiv i32 %30, 64
|
||
|
%31 = load i32, i32* %ny.addr, align 4
|
||
|
%div21 = sdiv i32 %31, 4
|
||
|
call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid_dim, i32 %div20, i32 %div21, i32 1)
|
||
|
%call22 = call i64 @_Z8get_timev()
|
||
|
store i64 %call22, i64* %start, align 8
|
||
|
store i32 0, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.cond: ; preds = %for.inc, %entry
|
||
|
%32 = load i32, i32* %i, align 4
|
||
|
%33 = load i32, i32* %numiter.addr, align 4
|
||
|
%cmp = icmp slt i32 %32, %33
|
||
|
br i1 %cmp, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %for.cond
|
||
|
%34 = bitcast %struct.dim3* %agg.tmp to i8*
|
||
|
%35 = bitcast %struct.dim3* %grid_dim to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %34, i8* align 4 %35, i64 12, i1 false)
|
||
|
%36 = bitcast %struct.dim3* %agg.tmp23 to i8*
|
||
|
%37 = bitcast %struct.dim3* %block_dim to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %36, i8* align 4 %37, i64 12, i1 false)
|
||
|
%38 = bitcast { i64, i32 }* %agg.tmp.coerce to i8*
|
||
|
%39 = bitcast %struct.dim3* %agg.tmp to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %38, i8* align 4 %39, i64 12, i1 false)
|
||
|
%40 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0
|
||
|
%41 = load i64, i64* %40, align 4
|
||
|
%42 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1
|
||
|
%43 = load i32, i32* %42, align 4
|
||
|
%44 = bitcast { i64, i32 }* %agg.tmp23.coerce to i8*
|
||
|
%45 = bitcast %struct.dim3* %agg.tmp23 to i8*
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %44, i8* align 4 %45, i64 12, i1 false)
|
||
|
%46 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp23.coerce, i32 0, i32 0
|
||
|
%47 = load i64, i64* %46, align 4
|
||
|
%48 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp23.coerce, i32 0, i32 1
|
||
|
%49 = load i32, i32* %48, align 4
|
||
|
%call24 = call i32 @__cudaPushCallConfiguration(i64 %41, i32 %43, i64 %47, i32 %49, i64 0, i8* null)
|
||
|
%tobool = icmp ne i32 %call24, 0
|
||
|
br i1 %tobool, label %kcall.end, label %kcall.configok
|
||
|
|
||
|
kcall.configok: ; preds = %for.body
|
||
|
%50 = load float*, float** %p_d, align 8
|
||
|
%51 = load float*, float** %tIn_d, align 8
|
||
|
%52 = load float*, float** %tOut_d, align 8
|
||
|
%53 = load float, float* %stepDivCap, align 4
|
||
|
%54 = load i32, i32* %nx.addr, align 4
|
||
|
%55 = load i32, i32* %ny.addr, align 4
|
||
|
%56 = load i32, i32* %nz.addr, align 4
|
||
|
%57 = load float, float* %ce, align 4
|
||
|
%58 = load float, float* %cw, align 4
|
||
|
%59 = load float, float* %cn, align 4
|
||
|
%60 = load float, float* %cs, align 4
|
||
|
%61 = load float, float* %ct, align 4
|
||
|
%62 = load float, float* %cb, align 4
|
||
|
%63 = load float, float* %cc, align 4
|
||
|
call void @_Z11hotspotOpt1PfS_S_fiiifffffff(float* %50, float* %51, float* %52, float %53, i32 %54, i32 %55, i32 %56, float %57, float %58, float %59, float %60, float %61, float %62, float %63)
|
||
|
br label %kcall.end
|
||
|
|
||
|
kcall.end: ; preds = %kcall.configok, %for.body
|
||
|
%64 = load float*, float** %tIn_d, align 8
|
||
|
store float* %64, float** %t, align 8
|
||
|
%65 = load float*, float** %tOut_d, align 8
|
||
|
store float* %65, float** %tIn_d, align 8
|
||
|
%66 = load float*, float** %t, align 8
|
||
|
store float* %66, float** %tOut_d, align 8
|
||
|
br label %for.inc
|
||
|
|
||
|
for.inc: ; preds = %kcall.end
|
||
|
%67 = load i32, i32* %i, align 4
|
||
|
%inc = add nsw i32 %67, 1
|
||
|
store i32 %inc, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.end: ; preds = %for.cond
|
||
|
%call25 = call i32 @cudaDeviceSynchronize()
|
||
|
%call26 = call i64 @_Z8get_timev()
|
||
|
store i64 %call26, i64* %stop, align 8
|
||
|
%68 = load i64, i64* %stop, align 8
|
||
|
%69 = load i64, i64* %start, align 8
|
||
|
%sub27 = sub nsw i64 %68, %69
|
||
|
%conv28 = sitofp i64 %sub27 to double
|
||
|
%div29 = fdiv double %conv28, 1.000000e+06
|
||
|
%conv30 = fptrunc double %div29 to float
|
||
|
store float %conv30, float* %time, align 4
|
||
|
%70 = load float, float* %time, align 4
|
||
|
%conv31 = fpext float %70 to double
|
||
|
%call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str, i64 0, i64 0), double %conv31)
|
||
|
%71 = load float*, float** %tOut.addr, align 8
|
||
|
%72 = bitcast float* %71 to i8*
|
||
|
%73 = load float*, float** %tOut_d, align 8
|
||
|
%74 = bitcast float* %73 to i8*
|
||
|
%75 = load i64, i64* %s, align 8
|
||
|
%call33 = call i32 @cudaMemcpy(i8* %72, i8* %74, i64 %75, i32 2)
|
||
|
%76 = load float*, float** %p_d, align 8
|
||
|
%77 = bitcast float* %76 to i8*
|
||
|
%call34 = call i32 @cudaFree(i8* %77)
|
||
|
%78 = load float*, float** %tIn_d, align 8
|
||
|
%79 = bitcast float* %78 to i8*
|
||
|
%call35 = call i32 @cudaFree(i8* %79)
|
||
|
%80 = load float*, float** %tOut_d, align 8
|
||
|
%81 = bitcast float* %80 to i8*
|
||
|
%call36 = call i32 @cudaFree(i8* %81)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @cudaMalloc(i8**, i64) #4
|
||
|
|
||
|
declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #4
|
||
|
|
||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||
|
define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #0 comdat align 2 {
|
||
|
entry:
|
||
|
%this.addr = alloca %struct.dim3*, align 8
|
||
|
%vx.addr = alloca i32, align 4
|
||
|
%vy.addr = alloca i32, align 4
|
||
|
%vz.addr = alloca i32, align 4
|
||
|
store %struct.dim3* %this, %struct.dim3** %this.addr, align 8
|
||
|
store i32 %vx, i32* %vx.addr, align 4
|
||
|
store i32 %vy, i32* %vy.addr, align 4
|
||
|
store i32 %vz, i32* %vz.addr, align 4
|
||
|
%this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8
|
||
|
%x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0
|
||
|
%0 = load i32, i32* %vx.addr, align 4
|
||
|
store i32 %0, i32* %x, align 4
|
||
|
%y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1
|
||
|
%1 = load i32, i32* %vy.addr, align 4
|
||
|
store i32 %1, i32* %y, align 4
|
||
|
%z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2
|
||
|
%2 = load i32, i32* %vz.addr, align 4
|
||
|
store i32 %2, i32* %z, align 4
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #4
|
||
|
|
||
|
declare dso_local i32 @cudaDeviceSynchronize() #4
|
||
|
|
||
|
declare dso_local i32 @printf(i8*, ...) #4
|
||
|
|
||
|
declare dso_local i32 @cudaFree(i8*) #4
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z5fatalPKc(i8* %s) #2 {
|
||
|
entry:
|
||
|
%s.addr = alloca i8*, align 8
|
||
|
store i8* %s, i8** %s.addr, align 8
|
||
|
%0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%1 = load i8*, i8** %s.addr, align 8
|
||
|
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.1, i64 0, i64 0), i8* %1)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #4
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z9readinputPfiiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i32 %layers, i8* %file) #2 {
|
||
|
entry:
|
||
|
%vect.addr = alloca float*, align 8
|
||
|
%grid_rows.addr = alloca i32, align 4
|
||
|
%grid_cols.addr = alloca i32, align 4
|
||
|
%layers.addr = alloca i32, align 4
|
||
|
%file.addr = alloca i8*, align 8
|
||
|
%i = alloca i32, align 4
|
||
|
%j = alloca i32, align 4
|
||
|
%k = alloca i32, align 4
|
||
|
%fp = alloca %struct._IO_FILE*, align 8
|
||
|
%str = alloca [256 x i8], align 16
|
||
|
%val = alloca float, align 4
|
||
|
store float* %vect, float** %vect.addr, align 8
|
||
|
store i32 %grid_rows, i32* %grid_rows.addr, align 4
|
||
|
store i32 %grid_cols, i32* %grid_cols.addr, align 4
|
||
|
store i32 %layers, i32* %layers.addr, align 4
|
||
|
store i8* %file, i8** %file.addr, align 8
|
||
|
%0 = load i8*, i8** %file.addr, align 8
|
||
|
%call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i64 0, i64 0))
|
||
|
store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8
|
||
|
%cmp = icmp eq %struct._IO_FILE* %call, null
|
||
|
br i1 %cmp, label %if.then, label %if.end
|
||
|
|
||
|
if.then: ; preds = %entry
|
||
|
call void @_Z5fatalPKc(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.3, i64 0, i64 0))
|
||
|
br label %if.end
|
||
|
|
||
|
if.end: ; preds = %if.then, %entry
|
||
|
store i32 0, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.cond: ; preds = %for.inc28, %if.end
|
||
|
%1 = load i32, i32* %i, align 4
|
||
|
%2 = load i32, i32* %grid_rows.addr, align 4
|
||
|
%sub = sub nsw i32 %2, 1
|
||
|
%cmp1 = icmp sle i32 %1, %sub
|
||
|
br i1 %cmp1, label %for.body, label %for.end30
|
||
|
|
||
|
for.body: ; preds = %for.cond
|
||
|
store i32 0, i32* %j, align 4
|
||
|
br label %for.cond2
|
||
|
|
||
|
for.cond2: ; preds = %for.inc25, %for.body
|
||
|
%3 = load i32, i32* %j, align 4
|
||
|
%4 = load i32, i32* %grid_cols.addr, align 4
|
||
|
%sub3 = sub nsw i32 %4, 1
|
||
|
%cmp4 = icmp sle i32 %3, %sub3
|
||
|
br i1 %cmp4, label %for.body5, label %for.end27
|
||
|
|
||
|
for.body5: ; preds = %for.cond2
|
||
|
store i32 0, i32* %k, align 4
|
||
|
br label %for.cond6
|
||
|
|
||
|
for.cond6: ; preds = %for.inc, %for.body5
|
||
|
%5 = load i32, i32* %k, align 4
|
||
|
%6 = load i32, i32* %layers.addr, align 4
|
||
|
%sub7 = sub nsw i32 %6, 1
|
||
|
%cmp8 = icmp sle i32 %5, %sub7
|
||
|
br i1 %cmp8, label %for.body9, label %for.end
|
||
|
|
||
|
for.body9: ; preds = %for.cond6
|
||
|
%arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0
|
||
|
%7 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8
|
||
|
%call10 = call i8* @fgets(i8* %arraydecay, i32 256, %struct._IO_FILE* %7)
|
||
|
%cmp11 = icmp eq i8* %call10, null
|
||
|
br i1 %cmp11, label %if.then12, label %if.end13
|
||
|
|
||
|
if.then12: ; preds = %for.body9
|
||
|
call void @_Z5fatalPKc(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.4, i64 0, i64 0))
|
||
|
br label %if.end13
|
||
|
|
||
|
if.end13: ; preds = %if.then12, %for.body9
|
||
|
%8 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8
|
||
|
%call14 = call i32 @feof(%struct._IO_FILE* %8) #8
|
||
|
%tobool = icmp ne i32 %call14, 0
|
||
|
br i1 %tobool, label %if.then15, label %if.end16
|
||
|
|
||
|
if.then15: ; preds = %if.end13
|
||
|
call void @_Z5fatalPKc(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.5, i64 0, i64 0))
|
||
|
br label %if.end16
|
||
|
|
||
|
if.end16: ; preds = %if.then15, %if.end13
|
||
|
%arraydecay17 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0
|
||
|
%call18 = call i32 (i8*, i8*, ...) @sscanf(i8* %arraydecay17, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.6, i64 0, i64 0), float* %val) #8
|
||
|
%cmp19 = icmp ne i32 %call18, 1
|
||
|
br i1 %cmp19, label %if.then20, label %if.end21
|
||
|
|
||
|
if.then20: ; preds = %if.end16
|
||
|
call void @_Z5fatalPKc(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.7, i64 0, i64 0))
|
||
|
br label %if.end21
|
||
|
|
||
|
if.end21: ; preds = %if.then20, %if.end16
|
||
|
%9 = load float, float* %val, align 4
|
||
|
%10 = load float*, float** %vect.addr, align 8
|
||
|
%11 = load i32, i32* %i, align 4
|
||
|
%12 = load i32, i32* %grid_cols.addr, align 4
|
||
|
%mul = mul nsw i32 %11, %12
|
||
|
%13 = load i32, i32* %j, align 4
|
||
|
%add = add nsw i32 %mul, %13
|
||
|
%14 = load i32, i32* %k, align 4
|
||
|
%15 = load i32, i32* %grid_rows.addr, align 4
|
||
|
%mul22 = mul nsw i32 %14, %15
|
||
|
%16 = load i32, i32* %grid_cols.addr, align 4
|
||
|
%mul23 = mul nsw i32 %mul22, %16
|
||
|
%add24 = add nsw i32 %add, %mul23
|
||
|
%idxprom = sext i32 %add24 to i64
|
||
|
%arrayidx = getelementptr inbounds float, float* %10, i64 %idxprom
|
||
|
store float %9, float* %arrayidx, align 4
|
||
|
br label %for.inc
|
||
|
|
||
|
for.inc: ; preds = %if.end21
|
||
|
%17 = load i32, i32* %k, align 4
|
||
|
%inc = add nsw i32 %17, 1
|
||
|
store i32 %inc, i32* %k, align 4
|
||
|
br label %for.cond6
|
||
|
|
||
|
for.end: ; preds = %for.cond6
|
||
|
br label %for.inc25
|
||
|
|
||
|
for.inc25: ; preds = %for.end
|
||
|
%18 = load i32, i32* %j, align 4
|
||
|
%inc26 = add nsw i32 %18, 1
|
||
|
store i32 %inc26, i32* %j, align 4
|
||
|
br label %for.cond2
|
||
|
|
||
|
for.end27: ; preds = %for.cond2
|
||
|
br label %for.inc28
|
||
|
|
||
|
for.inc28: ; preds = %for.end27
|
||
|
%19 = load i32, i32* %i, align 4
|
||
|
%inc29 = add nsw i32 %19, 1
|
||
|
store i32 %inc29, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.end30: ; preds = %for.cond
|
||
|
%20 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8
|
||
|
%call31 = call i32 @fclose(%struct._IO_FILE* %20)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #4
|
||
|
|
||
|
declare dso_local i8* @fgets(i8*, i32, %struct._IO_FILE*) #4
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local i32 @feof(%struct._IO_FILE*) #1
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local i32 @sscanf(i8*, i8*, ...) #1
|
||
|
|
||
|
declare dso_local i32 @fclose(%struct._IO_FILE*) #4
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z11writeoutputPfiiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i32 %layers, i8* %file) #2 {
|
||
|
entry:
|
||
|
%vect.addr = alloca float*, align 8
|
||
|
%grid_rows.addr = alloca i32, align 4
|
||
|
%grid_cols.addr = alloca i32, align 4
|
||
|
%layers.addr = alloca i32, align 4
|
||
|
%file.addr = alloca i8*, align 8
|
||
|
%i = alloca i32, align 4
|
||
|
%j = alloca i32, align 4
|
||
|
%k = alloca i32, align 4
|
||
|
%index = alloca i32, align 4
|
||
|
%fp = alloca %struct._IO_FILE*, align 8
|
||
|
%str = alloca [256 x i8], align 16
|
||
|
store float* %vect, float** %vect.addr, align 8
|
||
|
store i32 %grid_rows, i32* %grid_rows.addr, align 4
|
||
|
store i32 %grid_cols, i32* %grid_cols.addr, align 4
|
||
|
store i32 %layers, i32* %layers.addr, align 4
|
||
|
store i8* %file, i8** %file.addr, align 8
|
||
|
store i32 0, i32* %index, align 4
|
||
|
%0 = load i8*, i8** %file.addr, align 8
|
||
|
%call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.8, i64 0, i64 0))
|
||
|
store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8
|
||
|
%cmp = icmp eq %struct._IO_FILE* %call, null
|
||
|
br i1 %cmp, label %if.then, label %if.end
|
||
|
|
||
|
if.then: ; preds = %entry
|
||
|
%call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.9, i64 0, i64 0))
|
||
|
br label %if.end
|
||
|
|
||
|
if.end: ; preds = %if.then, %entry
|
||
|
store i32 0, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.cond: ; preds = %for.inc19, %if.end
|
||
|
%1 = load i32, i32* %i, align 4
|
||
|
%2 = load i32, i32* %grid_rows.addr, align 4
|
||
|
%cmp2 = icmp slt i32 %1, %2
|
||
|
br i1 %cmp2, label %for.body, label %for.end21
|
||
|
|
||
|
for.body: ; preds = %for.cond
|
||
|
store i32 0, i32* %j, align 4
|
||
|
br label %for.cond3
|
||
|
|
||
|
for.cond3: ; preds = %for.inc16, %for.body
|
||
|
%3 = load i32, i32* %j, align 4
|
||
|
%4 = load i32, i32* %grid_cols.addr, align 4
|
||
|
%cmp4 = icmp slt i32 %3, %4
|
||
|
br i1 %cmp4, label %for.body5, label %for.end18
|
||
|
|
||
|
for.body5: ; preds = %for.cond3
|
||
|
store i32 0, i32* %k, align 4
|
||
|
br label %for.cond6
|
||
|
|
||
|
for.cond6: ; preds = %for.inc, %for.body5
|
||
|
%5 = load i32, i32* %k, align 4
|
||
|
%6 = load i32, i32* %layers.addr, align 4
|
||
|
%cmp7 = icmp slt i32 %5, %6
|
||
|
br i1 %cmp7, label %for.body8, label %for.end
|
||
|
|
||
|
for.body8: ; preds = %for.cond6
|
||
|
%arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0
|
||
|
%7 = load i32, i32* %index, align 4
|
||
|
%8 = load float*, float** %vect.addr, align 8
|
||
|
%9 = load i32, i32* %i, align 4
|
||
|
%10 = load i32, i32* %grid_cols.addr, align 4
|
||
|
%mul = mul nsw i32 %9, %10
|
||
|
%11 = load i32, i32* %j, align 4
|
||
|
%add = add nsw i32 %mul, %11
|
||
|
%12 = load i32, i32* %k, align 4
|
||
|
%13 = load i32, i32* %grid_rows.addr, align 4
|
||
|
%mul9 = mul nsw i32 %12, %13
|
||
|
%14 = load i32, i32* %grid_cols.addr, align 4
|
||
|
%mul10 = mul nsw i32 %mul9, %14
|
||
|
%add11 = add nsw i32 %add, %mul10
|
||
|
%idxprom = sext i32 %add11 to i64
|
||
|
%arrayidx = getelementptr inbounds float, float* %8, i64 %idxprom
|
||
|
%15 = load float, float* %arrayidx, align 4
|
||
|
%conv = fpext float %15 to double
|
||
|
%call12 = call i32 (i8*, i8*, ...) @sprintf(i8* %arraydecay, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.10, i64 0, i64 0), i32 %7, double %conv) #8
|
||
|
%arraydecay13 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0
|
||
|
%16 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8
|
||
|
%call14 = call i32 @fputs(i8* %arraydecay13, %struct._IO_FILE* %16)
|
||
|
%17 = load i32, i32* %index, align 4
|
||
|
%inc = add nsw i32 %17, 1
|
||
|
store i32 %inc, i32* %index, align 4
|
||
|
br label %for.inc
|
||
|
|
||
|
for.inc: ; preds = %for.body8
|
||
|
%18 = load i32, i32* %k, align 4
|
||
|
%inc15 = add nsw i32 %18, 1
|
||
|
store i32 %inc15, i32* %k, align 4
|
||
|
br label %for.cond6
|
||
|
|
||
|
for.end: ; preds = %for.cond6
|
||
|
br label %for.inc16
|
||
|
|
||
|
for.inc16: ; preds = %for.end
|
||
|
%19 = load i32, i32* %j, align 4
|
||
|
%inc17 = add nsw i32 %19, 1
|
||
|
store i32 %inc17, i32* %j, align 4
|
||
|
br label %for.cond3
|
||
|
|
||
|
for.end18: ; preds = %for.cond3
|
||
|
br label %for.inc19
|
||
|
|
||
|
for.inc19: ; preds = %for.end18
|
||
|
%20 = load i32, i32* %i, align 4
|
||
|
%inc20 = add nsw i32 %20, 1
|
||
|
store i32 %inc20, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.end21: ; preds = %for.cond
|
||
|
%21 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8
|
||
|
%call22 = call i32 @fclose(%struct._IO_FILE* %21)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local i32 @sprintf(i8*, i8*, ...) #1
|
||
|
|
||
|
declare dso_local i32 @fputs(i8*, %struct._IO_FILE*) #4
|
||
|
|
||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||
|
define dso_local void @_Z14computeTempCPUPfS_S_iiifffffi(float* %pIn, float* %tIn, float* %tOut, i32 %nx, i32 %ny, i32 %nz, float %Cap, float %Rx, float %Ry, float %Rz, float %dt, i32 %numiter) #0 {
|
||
|
entry:
|
||
|
%pIn.addr = alloca float*, align 8
|
||
|
%tIn.addr = alloca float*, align 8
|
||
|
%tOut.addr = alloca float*, align 8
|
||
|
%nx.addr = alloca i32, align 4
|
||
|
%ny.addr = alloca i32, align 4
|
||
|
%nz.addr = alloca i32, align 4
|
||
|
%Cap.addr = alloca float, align 4
|
||
|
%Rx.addr = alloca float, align 4
|
||
|
%Ry.addr = alloca float, align 4
|
||
|
%Rz.addr = alloca float, align 4
|
||
|
%dt.addr = alloca float, align 4
|
||
|
%numiter.addr = alloca i32, align 4
|
||
|
%ce = alloca float, align 4
|
||
|
%cw = alloca float, align 4
|
||
|
%cn = alloca float, align 4
|
||
|
%cs = alloca float, align 4
|
||
|
%ct = alloca float, align 4
|
||
|
%cb = alloca float, align 4
|
||
|
%cc = alloca float, align 4
|
||
|
%stepDivCap = alloca float, align 4
|
||
|
%c = alloca i32, align 4
|
||
|
%w = alloca i32, align 4
|
||
|
%e = alloca i32, align 4
|
||
|
%n = alloca i32, align 4
|
||
|
%s = alloca i32, align 4
|
||
|
%b = alloca i32, align 4
|
||
|
%t = alloca i32, align 4
|
||
|
%x = alloca i32, align 4
|
||
|
%y = alloca i32, align 4
|
||
|
%z = alloca i32, align 4
|
||
|
%i = alloca i32, align 4
|
||
|
%temp = alloca float*, align 8
|
||
|
store float* %pIn, float** %pIn.addr, align 8
|
||
|
store float* %tIn, float** %tIn.addr, align 8
|
||
|
store float* %tOut, float** %tOut.addr, align 8
|
||
|
store i32 %nx, i32* %nx.addr, align 4
|
||
|
store i32 %ny, i32* %ny.addr, align 4
|
||
|
store i32 %nz, i32* %nz.addr, align 4
|
||
|
store float %Cap, float* %Cap.addr, align 4
|
||
|
store float %Rx, float* %Rx.addr, align 4
|
||
|
store float %Ry, float* %Ry.addr, align 4
|
||
|
store float %Rz, float* %Rz.addr, align 4
|
||
|
store float %dt, float* %dt.addr, align 4
|
||
|
store i32 %numiter, i32* %numiter.addr, align 4
|
||
|
%0 = load float, float* %dt.addr, align 4
|
||
|
%1 = load float, float* %Cap.addr, align 4
|
||
|
%div = fdiv float %0, %1
|
||
|
store float %div, float* %stepDivCap, align 4
|
||
|
%2 = load float, float* %stepDivCap, align 4
|
||
|
%3 = load float, float* %Rx.addr, align 4
|
||
|
%div1 = fdiv float %2, %3
|
||
|
store float %div1, float* %cw, align 4
|
||
|
store float %div1, float* %ce, align 4
|
||
|
%4 = load float, float* %stepDivCap, align 4
|
||
|
%5 = load float, float* %Ry.addr, align 4
|
||
|
%div2 = fdiv float %4, %5
|
||
|
store float %div2, float* %cs, align 4
|
||
|
store float %div2, float* %cn, align 4
|
||
|
%6 = load float, float* %stepDivCap, align 4
|
||
|
%7 = load float, float* %Rz.addr, align 4
|
||
|
%div3 = fdiv float %6, %7
|
||
|
store float %div3, float* %cb, align 4
|
||
|
store float %div3, float* %ct, align 4
|
||
|
%8 = load float, float* %ce, align 4
|
||
|
%conv = fpext float %8 to double
|
||
|
%mul = fmul contract double 2.000000e+00, %conv
|
||
|
%9 = load float, float* %cn, align 4
|
||
|
%conv4 = fpext float %9 to double
|
||
|
%mul5 = fmul contract double 2.000000e+00, %conv4
|
||
|
%add = fadd contract double %mul, %mul5
|
||
|
%10 = load float, float* %ct, align 4
|
||
|
%conv6 = fpext float %10 to double
|
||
|
%mul7 = fmul contract double 3.000000e+00, %conv6
|
||
|
%add8 = fadd contract double %add, %mul7
|
||
|
%sub = fsub contract double 1.000000e+00, %add8
|
||
|
%conv9 = fptrunc double %sub to float
|
||
|
store float %conv9, float* %cc, align 4
|
||
|
store i32 0, i32* %i, align 4
|
||
|
br label %do.body
|
||
|
|
||
|
do.body: ; preds = %do.cond, %entry
|
||
|
store i32 0, i32* %z, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.cond: ; preds = %for.inc95, %do.body
|
||
|
%11 = load i32, i32* %z, align 4
|
||
|
%12 = load i32, i32* %nz.addr, align 4
|
||
|
%cmp = icmp slt i32 %11, %12
|
||
|
br i1 %cmp, label %for.body, label %for.end97
|
||
|
|
||
|
for.body: ; preds = %for.cond
|
||
|
store i32 0, i32* %y, align 4
|
||
|
br label %for.cond10
|
||
|
|
||
|
for.cond10: ; preds = %for.inc92, %for.body
|
||
|
%13 = load i32, i32* %y, align 4
|
||
|
%14 = load i32, i32* %ny.addr, align 4
|
||
|
%cmp11 = icmp slt i32 %13, %14
|
||
|
br i1 %cmp11, label %for.body12, label %for.end94
|
||
|
|
||
|
for.body12: ; preds = %for.cond10
|
||
|
store i32 0, i32* %x, align 4
|
||
|
br label %for.cond13
|
||
|
|
||
|
for.cond13: ; preds = %for.inc, %for.body12
|
||
|
%15 = load i32, i32* %x, align 4
|
||
|
%16 = load i32, i32* %nx.addr, align 4
|
||
|
%cmp14 = icmp slt i32 %15, %16
|
||
|
br i1 %cmp14, label %for.body15, label %for.end
|
||
|
|
||
|
for.body15: ; preds = %for.cond13
|
||
|
%17 = load i32, i32* %x, align 4
|
||
|
%18 = load i32, i32* %y, align 4
|
||
|
%19 = load i32, i32* %nx.addr, align 4
|
||
|
%mul16 = mul nsw i32 %18, %19
|
||
|
%add17 = add nsw i32 %17, %mul16
|
||
|
%20 = load i32, i32* %z, align 4
|
||
|
%21 = load i32, i32* %nx.addr, align 4
|
||
|
%mul18 = mul nsw i32 %20, %21
|
||
|
%22 = load i32, i32* %ny.addr, align 4
|
||
|
%mul19 = mul nsw i32 %mul18, %22
|
||
|
%add20 = add nsw i32 %add17, %mul19
|
||
|
store i32 %add20, i32* %c, align 4
|
||
|
%23 = load i32, i32* %x, align 4
|
||
|
%cmp21 = icmp eq i32 %23, 0
|
||
|
br i1 %cmp21, label %cond.true, label %cond.false
|
||
|
|
||
|
cond.true: ; preds = %for.body15
|
||
|
%24 = load i32, i32* %c, align 4
|
||
|
br label %cond.end
|
||
|
|
||
|
cond.false: ; preds = %for.body15
|
||
|
%25 = load i32, i32* %c, align 4
|
||
|
%sub22 = sub nsw i32 %25, 1
|
||
|
br label %cond.end
|
||
|
|
||
|
cond.end: ; preds = %cond.false, %cond.true
|
||
|
%cond = phi i32 [ %24, %cond.true ], [ %sub22, %cond.false ]
|
||
|
store i32 %cond, i32* %w, align 4
|
||
|
%26 = load i32, i32* %x, align 4
|
||
|
%27 = load i32, i32* %nx.addr, align 4
|
||
|
%sub23 = sub nsw i32 %27, 1
|
||
|
%cmp24 = icmp eq i32 %26, %sub23
|
||
|
br i1 %cmp24, label %cond.true25, label %cond.false26
|
||
|
|
||
|
cond.true25: ; preds = %cond.end
|
||
|
%28 = load i32, i32* %c, align 4
|
||
|
br label %cond.end28
|
||
|
|
||
|
cond.false26: ; preds = %cond.end
|
||
|
%29 = load i32, i32* %c, align 4
|
||
|
%add27 = add nsw i32 %29, 1
|
||
|
br label %cond.end28
|
||
|
|
||
|
cond.end28: ; preds = %cond.false26, %cond.true25
|
||
|
%cond29 = phi i32 [ %28, %cond.true25 ], [ %add27, %cond.false26 ]
|
||
|
store i32 %cond29, i32* %e, align 4
|
||
|
%30 = load i32, i32* %y, align 4
|
||
|
%cmp30 = icmp eq i32 %30, 0
|
||
|
br i1 %cmp30, label %cond.true31, label %cond.false32
|
||
|
|
||
|
cond.true31: ; preds = %cond.end28
|
||
|
%31 = load i32, i32* %c, align 4
|
||
|
br label %cond.end34
|
||
|
|
||
|
cond.false32: ; preds = %cond.end28
|
||
|
%32 = load i32, i32* %c, align 4
|
||
|
%33 = load i32, i32* %nx.addr, align 4
|
||
|
%sub33 = sub nsw i32 %32, %33
|
||
|
br label %cond.end34
|
||
|
|
||
|
cond.end34: ; preds = %cond.false32, %cond.true31
|
||
|
%cond35 = phi i32 [ %31, %cond.true31 ], [ %sub33, %cond.false32 ]
|
||
|
store i32 %cond35, i32* %n, align 4
|
||
|
%34 = load i32, i32* %y, align 4
|
||
|
%35 = load i32, i32* %ny.addr, align 4
|
||
|
%sub36 = sub nsw i32 %35, 1
|
||
|
%cmp37 = icmp eq i32 %34, %sub36
|
||
|
br i1 %cmp37, label %cond.true38, label %cond.false39
|
||
|
|
||
|
cond.true38: ; preds = %cond.end34
|
||
|
%36 = load i32, i32* %c, align 4
|
||
|
br label %cond.end41
|
||
|
|
||
|
cond.false39: ; preds = %cond.end34
|
||
|
%37 = load i32, i32* %c, align 4
|
||
|
%38 = load i32, i32* %nx.addr, align 4
|
||
|
%add40 = add nsw i32 %37, %38
|
||
|
br label %cond.end41
|
||
|
|
||
|
cond.end41: ; preds = %cond.false39, %cond.true38
|
||
|
%cond42 = phi i32 [ %36, %cond.true38 ], [ %add40, %cond.false39 ]
|
||
|
store i32 %cond42, i32* %s, align 4
|
||
|
%39 = load i32, i32* %z, align 4
|
||
|
%cmp43 = icmp eq i32 %39, 0
|
||
|
br i1 %cmp43, label %cond.true44, label %cond.false45
|
||
|
|
||
|
cond.true44: ; preds = %cond.end41
|
||
|
%40 = load i32, i32* %c, align 4
|
||
|
br label %cond.end48
|
||
|
|
||
|
cond.false45: ; preds = %cond.end41
|
||
|
%41 = load i32, i32* %c, align 4
|
||
|
%42 = load i32, i32* %nx.addr, align 4
|
||
|
%43 = load i32, i32* %ny.addr, align 4
|
||
|
%mul46 = mul nsw i32 %42, %43
|
||
|
%sub47 = sub nsw i32 %41, %mul46
|
||
|
br label %cond.end48
|
||
|
|
||
|
cond.end48: ; preds = %cond.false45, %cond.true44
|
||
|
%cond49 = phi i32 [ %40, %cond.true44 ], [ %sub47, %cond.false45 ]
|
||
|
store i32 %cond49, i32* %b, align 4
|
||
|
%44 = load i32, i32* %z, align 4
|
||
|
%45 = load i32, i32* %nz.addr, align 4
|
||
|
%sub50 = sub nsw i32 %45, 1
|
||
|
%cmp51 = icmp eq i32 %44, %sub50
|
||
|
br i1 %cmp51, label %cond.true52, label %cond.false53
|
||
|
|
||
|
cond.true52: ; preds = %cond.end48
|
||
|
%46 = load i32, i32* %c, align 4
|
||
|
br label %cond.end56
|
||
|
|
||
|
cond.false53: ; preds = %cond.end48
|
||
|
%47 = load i32, i32* %c, align 4
|
||
|
%48 = load i32, i32* %nx.addr, align 4
|
||
|
%49 = load i32, i32* %ny.addr, align 4
|
||
|
%mul54 = mul nsw i32 %48, %49
|
||
|
%add55 = add nsw i32 %47, %mul54
|
||
|
br label %cond.end56
|
||
|
|
||
|
cond.end56: ; preds = %cond.false53, %cond.true52
|
||
|
%cond57 = phi i32 [ %46, %cond.true52 ], [ %add55, %cond.false53 ]
|
||
|
store i32 %cond57, i32* %t, align 4
|
||
|
%50 = load float*, float** %tIn.addr, align 8
|
||
|
%51 = load i32, i32* %c, align 4
|
||
|
%idxprom = sext i32 %51 to i64
|
||
|
%arrayidx = getelementptr inbounds float, float* %50, i64 %idxprom
|
||
|
%52 = load float, float* %arrayidx, align 4
|
||
|
%53 = load float, float* %cc, align 4
|
||
|
%mul58 = fmul contract float %52, %53
|
||
|
%54 = load float*, float** %tIn.addr, align 8
|
||
|
%55 = load i32, i32* %n, align 4
|
||
|
%idxprom59 = sext i32 %55 to i64
|
||
|
%arrayidx60 = getelementptr inbounds float, float* %54, i64 %idxprom59
|
||
|
%56 = load float, float* %arrayidx60, align 4
|
||
|
%57 = load float, float* %cn, align 4
|
||
|
%mul61 = fmul contract float %56, %57
|
||
|
%add62 = fadd contract float %mul58, %mul61
|
||
|
%58 = load float*, float** %tIn.addr, align 8
|
||
|
%59 = load i32, i32* %s, align 4
|
||
|
%idxprom63 = sext i32 %59 to i64
|
||
|
%arrayidx64 = getelementptr inbounds float, float* %58, i64 %idxprom63
|
||
|
%60 = load float, float* %arrayidx64, align 4
|
||
|
%61 = load float, float* %cs, align 4
|
||
|
%mul65 = fmul contract float %60, %61
|
||
|
%add66 = fadd contract float %add62, %mul65
|
||
|
%62 = load float*, float** %tIn.addr, align 8
|
||
|
%63 = load i32, i32* %e, align 4
|
||
|
%idxprom67 = sext i32 %63 to i64
|
||
|
%arrayidx68 = getelementptr inbounds float, float* %62, i64 %idxprom67
|
||
|
%64 = load float, float* %arrayidx68, align 4
|
||
|
%65 = load float, float* %ce, align 4
|
||
|
%mul69 = fmul contract float %64, %65
|
||
|
%add70 = fadd contract float %add66, %mul69
|
||
|
%66 = load float*, float** %tIn.addr, align 8
|
||
|
%67 = load i32, i32* %w, align 4
|
||
|
%idxprom71 = sext i32 %67 to i64
|
||
|
%arrayidx72 = getelementptr inbounds float, float* %66, i64 %idxprom71
|
||
|
%68 = load float, float* %arrayidx72, align 4
|
||
|
%69 = load float, float* %cw, align 4
|
||
|
%mul73 = fmul contract float %68, %69
|
||
|
%add74 = fadd contract float %add70, %mul73
|
||
|
%70 = load float*, float** %tIn.addr, align 8
|
||
|
%71 = load i32, i32* %t, align 4
|
||
|
%idxprom75 = sext i32 %71 to i64
|
||
|
%arrayidx76 = getelementptr inbounds float, float* %70, i64 %idxprom75
|
||
|
%72 = load float, float* %arrayidx76, align 4
|
||
|
%73 = load float, float* %ct, align 4
|
||
|
%mul77 = fmul contract float %72, %73
|
||
|
%add78 = fadd contract float %add74, %mul77
|
||
|
%74 = load float*, float** %tIn.addr, align 8
|
||
|
%75 = load i32, i32* %b, align 4
|
||
|
%idxprom79 = sext i32 %75 to i64
|
||
|
%arrayidx80 = getelementptr inbounds float, float* %74, i64 %idxprom79
|
||
|
%76 = load float, float* %arrayidx80, align 4
|
||
|
%77 = load float, float* %cb, align 4
|
||
|
%mul81 = fmul contract float %76, %77
|
||
|
%add82 = fadd contract float %add78, %mul81
|
||
|
%78 = load float, float* %dt.addr, align 4
|
||
|
%79 = load float, float* %Cap.addr, align 4
|
||
|
%div83 = fdiv float %78, %79
|
||
|
%80 = load float*, float** %pIn.addr, align 8
|
||
|
%81 = load i32, i32* %c, align 4
|
||
|
%idxprom84 = sext i32 %81 to i64
|
||
|
%arrayidx85 = getelementptr inbounds float, float* %80, i64 %idxprom84
|
||
|
%82 = load float, float* %arrayidx85, align 4
|
||
|
%mul86 = fmul contract float %div83, %82
|
||
|
%add87 = fadd contract float %add82, %mul86
|
||
|
%83 = load float, float* %ct, align 4
|
||
|
%84 = load float, float* @amb_temp, align 4
|
||
|
%mul88 = fmul contract float %83, %84
|
||
|
%add89 = fadd contract float %add87, %mul88
|
||
|
%85 = load float*, float** %tOut.addr, align 8
|
||
|
%86 = load i32, i32* %c, align 4
|
||
|
%idxprom90 = sext i32 %86 to i64
|
||
|
%arrayidx91 = getelementptr inbounds float, float* %85, i64 %idxprom90
|
||
|
store float %add89, float* %arrayidx91, align 4
|
||
|
br label %for.inc
|
||
|
|
||
|
for.inc: ; preds = %cond.end56
|
||
|
%87 = load i32, i32* %x, align 4
|
||
|
%inc = add nsw i32 %87, 1
|
||
|
store i32 %inc, i32* %x, align 4
|
||
|
br label %for.cond13
|
||
|
|
||
|
for.end: ; preds = %for.cond13
|
||
|
br label %for.inc92
|
||
|
|
||
|
for.inc92: ; preds = %for.end
|
||
|
%88 = load i32, i32* %y, align 4
|
||
|
%inc93 = add nsw i32 %88, 1
|
||
|
store i32 %inc93, i32* %y, align 4
|
||
|
br label %for.cond10
|
||
|
|
||
|
for.end94: ; preds = %for.cond10
|
||
|
br label %for.inc95
|
||
|
|
||
|
for.inc95: ; preds = %for.end94
|
||
|
%89 = load i32, i32* %z, align 4
|
||
|
%inc96 = add nsw i32 %89, 1
|
||
|
store i32 %inc96, i32* %z, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.end97: ; preds = %for.cond
|
||
|
%90 = load float*, float** %tIn.addr, align 8
|
||
|
store float* %90, float** %temp, align 8
|
||
|
%91 = load float*, float** %tOut.addr, align 8
|
||
|
store float* %91, float** %tIn.addr, align 8
|
||
|
%92 = load float*, float** %temp, align 8
|
||
|
store float* %92, float** %tOut.addr, align 8
|
||
|
%93 = load i32, i32* %i, align 4
|
||
|
%inc98 = add nsw i32 %93, 1
|
||
|
store i32 %inc98, i32* %i, align 4
|
||
|
br label %do.cond
|
||
|
|
||
|
do.cond: ; preds = %for.end97
|
||
|
%94 = load i32, i32* %i, align 4
|
||
|
%95 = load i32, i32* %numiter.addr, align 4
|
||
|
%cmp99 = icmp slt i32 %94, %95
|
||
|
br i1 %cmp99, label %do.body, label %do.end
|
||
|
|
||
|
do.end: ; preds = %do.cond
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local float @_Z8accuracyPfS_i(float* %arr1, float* %arr2, i32 %len) #2 {
|
||
|
entry:
|
||
|
%arr1.addr = alloca float*, align 8
|
||
|
%arr2.addr = alloca float*, align 8
|
||
|
%len.addr = alloca i32, align 4
|
||
|
%err = alloca float, align 4
|
||
|
%i = alloca i32, align 4
|
||
|
store float* %arr1, float** %arr1.addr, align 8
|
||
|
store float* %arr2, float** %arr2.addr, align 8
|
||
|
store i32 %len, i32* %len.addr, align 4
|
||
|
store float 0.000000e+00, float* %err, align 4
|
||
|
store i32 0, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.cond: ; preds = %for.inc, %entry
|
||
|
%0 = load i32, i32* %i, align 4
|
||
|
%1 = load i32, i32* %len.addr, align 4
|
||
|
%cmp = icmp slt i32 %0, %1
|
||
|
br i1 %cmp, label %for.body, label %for.end
|
||
|
|
||
|
for.body: ; preds = %for.cond
|
||
|
%2 = load float*, float** %arr1.addr, align 8
|
||
|
%3 = load i32, i32* %i, align 4
|
||
|
%idxprom = sext i32 %3 to i64
|
||
|
%arrayidx = getelementptr inbounds float, float* %2, i64 %idxprom
|
||
|
%4 = load float, float* %arrayidx, align 4
|
||
|
%5 = load float*, float** %arr2.addr, align 8
|
||
|
%6 = load i32, i32* %i, align 4
|
||
|
%idxprom1 = sext i32 %6 to i64
|
||
|
%arrayidx2 = getelementptr inbounds float, float* %5, i64 %idxprom1
|
||
|
%7 = load float, float* %arrayidx2, align 4
|
||
|
%sub = fsub contract float %4, %7
|
||
|
%8 = load float*, float** %arr1.addr, align 8
|
||
|
%9 = load i32, i32* %i, align 4
|
||
|
%idxprom3 = sext i32 %9 to i64
|
||
|
%arrayidx4 = getelementptr inbounds float, float* %8, i64 %idxprom3
|
||
|
%10 = load float, float* %arrayidx4, align 4
|
||
|
%11 = load float*, float** %arr2.addr, align 8
|
||
|
%12 = load i32, i32* %i, align 4
|
||
|
%idxprom5 = sext i32 %12 to i64
|
||
|
%arrayidx6 = getelementptr inbounds float, float* %11, i64 %idxprom5
|
||
|
%13 = load float, float* %arrayidx6, align 4
|
||
|
%sub7 = fsub contract float %10, %13
|
||
|
%mul = fmul contract float %sub, %sub7
|
||
|
%14 = load float, float* %err, align 4
|
||
|
%add = fadd contract float %14, %mul
|
||
|
store float %add, float* %err, align 4
|
||
|
br label %for.inc
|
||
|
|
||
|
for.inc: ; preds = %for.body
|
||
|
%15 = load i32, i32* %i, align 4
|
||
|
%inc = add nsw i32 %15, 1
|
||
|
store i32 %inc, i32* %i, align 4
|
||
|
br label %for.cond
|
||
|
|
||
|
for.end: ; preds = %for.cond
|
||
|
%16 = load float, float* %err, align 4
|
||
|
%17 = load i32, i32* %len.addr, align 4
|
||
|
%conv = sitofp i32 %17 to float
|
||
|
%div = fdiv float %16, %conv
|
||
|
%call = call float @_ZSt4sqrtf(float %div)
|
||
|
ret float %call
|
||
|
}
|
||
|
|
||
|
; Function Attrs: noinline nounwind optnone uwtable
|
||
|
define linkonce_odr dso_local float @_ZSt4sqrtf(float %__x) #0 comdat {
|
||
|
entry:
|
||
|
%__x.addr = alloca float, align 4
|
||
|
store float %__x, float* %__x.addr, align 4
|
||
|
%0 = load float, float* %__x.addr, align 4
|
||
|
%call = call float @sqrtf(float %0) #8
|
||
|
ret float %call
|
||
|
}
|
||
|
|
||
|
; Function Attrs: noinline optnone uwtable
|
||
|
define dso_local void @_Z5usageiPPc(i32 %argc, i8** %argv) #2 {
|
||
|
entry:
|
||
|
%argc.addr = alloca i32, align 4
|
||
|
%argv.addr = alloca i8**, align 8
|
||
|
store i32 %argc, i32* %argc.addr, align 4
|
||
|
store i8** %argv, i8*** %argv.addr, align 8
|
||
|
%0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%1 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx = getelementptr inbounds i8*, i8** %1, i64 0
|
||
|
%2 = load i8*, i8** %arrayidx, align 8
|
||
|
%call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.11, i64 0, i64 0), i8* %2)
|
||
|
%3 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([68 x i8], [68 x i8]* @.str.12, i64 0, i64 0))
|
||
|
%4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([62 x i8], [62 x i8]* @.str.13, i64 0, i64 0))
|
||
|
%5 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%call3 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.14, i64 0, i64 0))
|
||
|
%6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%call4 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([83 x i8], [83 x i8]* @.str.15, i64 0, i64 0))
|
||
|
%7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([88 x i8], [88 x i8]* @.str.16, i64 0, i64 0))
|
||
|
%8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
|
||
|
%call6 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.17, i64 0, i64 0))
|
||
|
call void @exit(i32 1) #9
|
||
|
unreachable
|
||
|
}
|
||
|
|
||
|
; Function Attrs: noreturn nounwind
|
||
|
declare dso_local void @exit(i32) #5
|
||
|
|
||
|
; Function Attrs: noinline norecurse optnone uwtable
|
||
|
define dso_local i32 @main(i32 %argc, i8** %argv) #6 {
|
||
|
entry:
|
||
|
%retval = alloca i32, align 4
|
||
|
%argc.addr = alloca i32, align 4
|
||
|
%argv.addr = alloca i8**, align 8
|
||
|
%pfile = alloca i8*, align 8
|
||
|
%tfile = alloca i8*, align 8
|
||
|
%ofile = alloca i8*, align 8
|
||
|
%iterations = alloca i32, align 4
|
||
|
%numCols = alloca i32, align 4
|
||
|
%numRows = alloca i32, align 4
|
||
|
%layers = alloca i32, align 4
|
||
|
%dx = alloca float, align 4
|
||
|
%dy = alloca float, align 4
|
||
|
%dz = alloca float, align 4
|
||
|
%Cap = alloca float, align 4
|
||
|
%Rx = alloca float, align 4
|
||
|
%Ry = alloca float, align 4
|
||
|
%Rz = alloca float, align 4
|
||
|
%max_slope = alloca float, align 4
|
||
|
%dt = alloca float, align 4
|
||
|
%powerIn = alloca float*, align 8
|
||
|
%tempOut = alloca float*, align 8
|
||
|
%tempIn = alloca float*, align 8
|
||
|
%tempCopy = alloca float*, align 8
|
||
|
%size = alloca i32, align 4
|
||
|
%answer = alloca float*, align 8
|
||
|
%acc = alloca float, align 4
|
||
|
store i32 0, i32* %retval, align 4
|
||
|
store i32 %argc, i32* %argc.addr, align 4
|
||
|
store i8** %argv, i8*** %argv.addr, align 8
|
||
|
%call = call i32 @cudaSetDevice(i32 0)
|
||
|
%0 = load i32, i32* %argc.addr, align 4
|
||
|
%cmp = icmp ne i32 %0, 7
|
||
|
br i1 %cmp, label %if.then, label %if.end
|
||
|
|
||
|
if.then: ; preds = %entry
|
||
|
%1 = load i32, i32* %argc.addr, align 4
|
||
|
%2 = load i8**, i8*** %argv.addr, align 8
|
||
|
call void @_Z5usageiPPc(i32 %1, i8** %2)
|
||
|
br label %if.end
|
||
|
|
||
|
if.end: ; preds = %if.then, %entry
|
||
|
%3 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx = getelementptr inbounds i8*, i8** %3, i64 3
|
||
|
%4 = load i8*, i8** %arrayidx, align 8
|
||
|
%call1 = call i32 @atoi(i8* %4) #10
|
||
|
store i32 %call1, i32* %iterations, align 4
|
||
|
%5 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx2 = getelementptr inbounds i8*, i8** %5, i64 4
|
||
|
%6 = load i8*, i8** %arrayidx2, align 8
|
||
|
store i8* %6, i8** %pfile, align 8
|
||
|
%7 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx3 = getelementptr inbounds i8*, i8** %7, i64 5
|
||
|
%8 = load i8*, i8** %arrayidx3, align 8
|
||
|
store i8* %8, i8** %tfile, align 8
|
||
|
%9 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx4 = getelementptr inbounds i8*, i8** %9, i64 6
|
||
|
%10 = load i8*, i8** %arrayidx4, align 8
|
||
|
store i8* %10, i8** %ofile, align 8
|
||
|
%11 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx5 = getelementptr inbounds i8*, i8** %11, i64 1
|
||
|
%12 = load i8*, i8** %arrayidx5, align 8
|
||
|
%call6 = call i32 @atoi(i8* %12) #10
|
||
|
store i32 %call6, i32* %numCols, align 4
|
||
|
%13 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx7 = getelementptr inbounds i8*, i8** %13, i64 1
|
||
|
%14 = load i8*, i8** %arrayidx7, align 8
|
||
|
%call8 = call i32 @atoi(i8* %14) #10
|
||
|
store i32 %call8, i32* %numRows, align 4
|
||
|
%15 = load i8**, i8*** %argv.addr, align 8
|
||
|
%arrayidx9 = getelementptr inbounds i8*, i8** %15, i64 2
|
||
|
%16 = load i8*, i8** %arrayidx9, align 8
|
||
|
%call10 = call i32 @atoi(i8* %16) #10
|
||
|
store i32 %call10, i32* %layers, align 4
|
||
|
%17 = load float, float* @chip_height, align 4
|
||
|
%18 = load i32, i32* %numRows, align 4
|
||
|
%conv = sitofp i32 %18 to float
|
||
|
%div = fdiv float %17, %conv
|
||
|
store float %div, float* %dx, align 4
|
||
|
%19 = load float, float* @chip_width, align 4
|
||
|
%20 = load i32, i32* %numCols, align 4
|
||
|
%conv11 = sitofp i32 %20 to float
|
||
|
%div12 = fdiv float %19, %conv11
|
||
|
store float %div12, float* %dy, align 4
|
||
|
%21 = load float, float* @t_chip, align 4
|
||
|
%22 = load i32, i32* %layers, align 4
|
||
|
%conv13 = sitofp i32 %22 to float
|
||
|
%div14 = fdiv float %21, %conv13
|
||
|
store float %div14, float* %dz, align 4
|
||
|
%23 = load float, float* @t_chip, align 4
|
||
|
%conv15 = fpext float %23 to double
|
||
|
%mul = fmul contract double 8.750000e+05, %conv15
|
||
|
%24 = load float, float* %dx, align 4
|
||
|
%conv16 = fpext float %24 to double
|
||
|
%mul17 = fmul contract double %mul, %conv16
|
||
|
%25 = load float, float* %dy, align 4
|
||
|
%conv18 = fpext float %25 to double
|
||
|
%mul19 = fmul contract double %mul17, %conv18
|
||
|
%conv20 = fptrunc double %mul19 to float
|
||
|
store float %conv20, float* %Cap, align 4
|
||
|
%26 = load float, float* %dy, align 4
|
||
|
%conv21 = fpext float %26 to double
|
||
|
%27 = load float, float* @t_chip, align 4
|
||
|
%conv22 = fpext float %27 to double
|
||
|
%mul23 = fmul contract double 2.000000e+02, %conv22
|
||
|
%28 = load float, float* %dx, align 4
|
||
|
%conv24 = fpext float %28 to double
|
||
|
%mul25 = fmul contract double %mul23, %conv24
|
||
|
%div26 = fdiv double %conv21, %mul25
|
||
|
%conv27 = fptrunc double %div26 to float
|
||
|
store float %conv27, float* %Rx, align 4
|
||
|
%29 = load float, float* %dx, align 4
|
||
|
%conv28 = fpext float %29 to double
|
||
|
%30 = load float, float* @t_chip, align 4
|
||
|
%conv29 = fpext float %30 to double
|
||
|
%mul30 = fmul contract double 2.000000e+02, %conv29
|
||
|
%31 = load float, float* %dy, align 4
|
||
|
%conv31 = fpext float %31 to double
|
||
|
%mul32 = fmul contract double %mul30, %conv31
|
||
|
%div33 = fdiv double %conv28, %mul32
|
||
|
%conv34 = fptrunc double %div33 to float
|
||
|
store float %conv34, float* %Ry, align 4
|
||
|
%32 = load float, float* %dz, align 4
|
||
|
%33 = load float, float* %dx, align 4
|
||
|
%mul35 = fmul contract float 1.000000e+02, %33
|
||
|
%34 = load float, float* %dy, align 4
|
||
|
%mul36 = fmul contract float %mul35, %34
|
||
|
%div37 = fdiv float %32, %mul36
|
||
|
store float %div37, float* %Rz, align 4
|
||
|
%35 = load float, float* @t_chip, align 4
|
||
|
%conv38 = fpext float %35 to double
|
||
|
%mul39 = fmul contract double 5.000000e-01, %conv38
|
||
|
%mul40 = fmul contract double %mul39, 1.750000e+06
|
||
|
%div41 = fdiv double 3.000000e+06, %mul40
|
||
|
%conv42 = fptrunc double %div41 to float
|
||
|
store float %conv42, float* %max_slope, align 4
|
||
|
%36 = load float, float* %max_slope, align 4
|
||
|
%conv43 = fpext float %36 to double
|
||
|
%div44 = fdiv double 1.000000e-03, %conv43
|
||
|
%conv45 = fptrunc double %div44 to float
|
||
|
store float %conv45, float* %dt, align 4
|
||
|
%37 = load i32, i32* %numCols, align 4
|
||
|
%38 = load i32, i32* %numRows, align 4
|
||
|
%mul46 = mul nsw i32 %37, %38
|
||
|
%39 = load i32, i32* %layers, align 4
|
||
|
%mul47 = mul nsw i32 %mul46, %39
|
||
|
store i32 %mul47, i32* %size, align 4
|
||
|
%40 = load i32, i32* %size, align 4
|
||
|
%conv48 = sext i32 %40 to i64
|
||
|
%call49 = call noalias i8* @calloc(i64 %conv48, i64 4) #8
|
||
|
%41 = bitcast i8* %call49 to float*
|
||
|
store float* %41, float** %powerIn, align 8
|
||
|
%42 = load i32, i32* %size, align 4
|
||
|
%conv50 = sext i32 %42 to i64
|
||
|
%mul51 = mul i64 %conv50, 4
|
||
|
%call52 = call noalias i8* @malloc(i64 %mul51) #8
|
||
|
%43 = bitcast i8* %call52 to float*
|
||
|
store float* %43, float** %tempCopy, align 8
|
||
|
%44 = load i32, i32* %size, align 4
|
||
|
%conv53 = sext i32 %44 to i64
|
||
|
%call54 = call noalias i8* @calloc(i64 %conv53, i64 4) #8
|
||
|
%45 = bitcast i8* %call54 to float*
|
||
|
store float* %45, float** %tempIn, align 8
|
||
|
%46 = load i32, i32* %size, align 4
|
||
|
%conv55 = sext i32 %46 to i64
|
||
|
%call56 = call noalias i8* @calloc(i64 %conv55, i64 4) #8
|
||
|
%47 = bitcast i8* %call56 to float*
|
||
|
store float* %47, float** %tempOut, align 8
|
||
|
%48 = load i32, i32* %size, align 4
|
||
|
%conv57 = sext i32 %48 to i64
|
||
|
%call58 = call noalias i8* @calloc(i64 %conv57, i64 4) #8
|
||
|
%49 = bitcast i8* %call58 to float*
|
||
|
store float* %49, float** %answer, align 8
|
||
|
%50 = load float*, float** %powerIn, align 8
|
||
|
%51 = load i32, i32* %numRows, align 4
|
||
|
%52 = load i32, i32* %numCols, align 4
|
||
|
%53 = load i32, i32* %layers, align 4
|
||
|
%54 = load i8*, i8** %pfile, align 8
|
||
|
call void @_Z9readinputPfiiiPc(float* %50, i32 %51, i32 %52, i32 %53, i8* %54)
|
||
|
%55 = load float*, float** %tempIn, align 8
|
||
|
%56 = load i32, i32* %numRows, align 4
|
||
|
%57 = load i32, i32* %numCols, align 4
|
||
|
%58 = load i32, i32* %layers, align 4
|
||
|
%59 = load i8*, i8** %tfile, align 8
|
||
|
call void @_Z9readinputPfiiiPc(float* %55, i32 %56, i32 %57, i32 %58, i8* %59)
|
||
|
%60 = load float*, float** %tempCopy, align 8
|
||
|
%61 = bitcast float* %60 to i8*
|
||
|
%62 = load float*, float** %tempIn, align 8
|
||
|
%63 = bitcast float* %62 to i8*
|
||
|
%64 = load i32, i32* %size, align 4
|
||
|
%conv59 = sext i32 %64 to i64
|
||
|
%mul60 = mul i64 %conv59, 4
|
||
|
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %61, i8* align 4 %63, i64 %mul60, i1 false)
|
||
|
%65 = load float*, float** %powerIn, align 8
|
||
|
%66 = load float*, float** %tempIn, align 8
|
||
|
%67 = load float*, float** %tempOut, align 8
|
||
|
%68 = load i32, i32* %numCols, align 4
|
||
|
%69 = load i32, i32* %numRows, align 4
|
||
|
%70 = load i32, i32* %layers, align 4
|
||
|
%71 = load float, float* %Cap, align 4
|
||
|
%72 = load float, float* %Rx, align 4
|
||
|
%73 = load float, float* %Ry, align 4
|
||
|
%74 = load float, float* %Rz, align 4
|
||
|
%75 = load float, float* %dt, align 4
|
||
|
%76 = load i32, i32* %iterations, align 4
|
||
|
call void @_Z12hotspot_opt1PfS_S_iiifffffi(float* %65, float* %66, float* %67, i32 %68, i32 %69, i32 %70, float %71, float %72, float %73, float %74, float %75, i32 %76)
|
||
|
%77 = load float*, float** %powerIn, align 8
|
||
|
%78 = load float*, float** %tempCopy, align 8
|
||
|
%79 = load float*, float** %answer, align 8
|
||
|
%80 = load i32, i32* %numCols, align 4
|
||
|
%81 = load i32, i32* %numRows, align 4
|
||
|
%82 = load i32, i32* %layers, align 4
|
||
|
%83 = load float, float* %Cap, align 4
|
||
|
%84 = load float, float* %Rx, align 4
|
||
|
%85 = load float, float* %Ry, align 4
|
||
|
%86 = load float, float* %Rz, align 4
|
||
|
%87 = load float, float* %dt, align 4
|
||
|
%88 = load i32, i32* %iterations, align 4
|
||
|
call void @_Z14computeTempCPUPfS_S_iiifffffi(float* %77, float* %78, float* %79, i32 %80, i32 %81, i32 %82, float %83, float %84, float %85, float %86, float %87, i32 %88)
|
||
|
%89 = load float*, float** %tempOut, align 8
|
||
|
%90 = load float*, float** %answer, align 8
|
||
|
%91 = load i32, i32* %numRows, align 4
|
||
|
%92 = load i32, i32* %numCols, align 4
|
||
|
%mul61 = mul nsw i32 %91, %92
|
||
|
%93 = load i32, i32* %layers, align 4
|
||
|
%mul62 = mul nsw i32 %mul61, %93
|
||
|
%call63 = call float @_Z8accuracyPfS_i(float* %89, float* %90, i32 %mul62)
|
||
|
store float %call63, float* %acc, align 4
|
||
|
%94 = load float, float* %acc, align 4
|
||
|
%conv64 = fpext float %94 to double
|
||
|
%call65 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.18, i64 0, i64 0), double %conv64)
|
||
|
%95 = load float*, float** %tempOut, align 8
|
||
|
%96 = load i32, i32* %numRows, align 4
|
||
|
%97 = load i32, i32* %numCols, align 4
|
||
|
%98 = load i32, i32* %layers, align 4
|
||
|
%99 = load i8*, i8** %ofile, align 8
|
||
|
call void @_Z11writeoutputPfiiiPc(float* %95, i32 %96, i32 %97, i32 %98, i8* %99)
|
||
|
%100 = load float*, float** %tempIn, align 8
|
||
|
%101 = bitcast float* %100 to i8*
|
||
|
call void @free(i8* %101) #8
|
||
|
%102 = load float*, float** %tempOut, align 8
|
||
|
%103 = bitcast float* %102 to i8*
|
||
|
call void @free(i8* %103) #8
|
||
|
%104 = load float*, float** %powerIn, align 8
|
||
|
%105 = bitcast float* %104 to i8*
|
||
|
call void @free(i8* %105) #8
|
||
|
ret i32 0
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @cudaSetDevice(i32) #4
|
||
|
|
||
|
; Function Attrs: nounwind readonly
|
||
|
declare dso_local i32 @atoi(i8*) #7
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local noalias i8* @calloc(i64, i64) #1
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local noalias i8* @malloc(i64) #1
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local void @free(i8*) #1
|
||
|
|
||
|
; Function Attrs: nounwind
|
||
|
declare dso_local float @sqrtf(float) #1
|
||
|
|
||
|
define internal void @__cuda_register_globals(i8** %0) {
|
||
|
entry:
|
||
|
%1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, float*, float*, float, i32, i32, i32, float, float, float, float, float, float, float)* @_Z11hotspotOpt1PfS_S_fiiifffffff to i8*), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*)
|
||
|
|
||
|
declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32)
|
||
|
|
||
|
declare dso_local i8** @__cudaRegisterFatBinary(i8*)
|
||
|
|
||
|
define internal void @__cuda_module_ctor(i8* %0) {
|
||
|
entry:
|
||
|
%1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*))
|
||
|
store i8** %1, i8*** @__cuda_gpubin_handle, align 8
|
||
|
call void @__cuda_register_globals(i8** %1)
|
||
|
call void @__cudaRegisterFatBinaryEnd(i8** %1)
|
||
|
%2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local void @__cudaRegisterFatBinaryEnd(i8**)
|
||
|
|
||
|
declare dso_local void @__cudaUnregisterFatBinary(i8**)
|
||
|
|
||
|
define internal void @__cuda_module_dtor(i8* %0) {
|
||
|
entry:
|
||
|
%1 = load i8**, i8*** @__cuda_gpubin_handle, align 8
|
||
|
call void @__cudaUnregisterFatBinary(i8** %1)
|
||
|
ret void
|
||
|
}
|
||
|
|
||
|
declare dso_local i32 @atexit(void (i8*)*)
|
||
|
|
||
|
attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #2 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #3 = { argmemonly nounwind willreturn }
|
||
|
attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #6 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #7 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||
|
attributes #8 = { nounwind }
|
||
|
attributes #9 = { noreturn nounwind }
|
||
|
attributes #10 = { nounwind readonly }
|
||
|
|
||
|
!llvm.module.flags = !{!0, !1}
|
||
|
!llvm.ident = !{!2}
|
||
|
|
||
|
!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]}
|
||
|
!1 = !{i32 1, !"wchar_size", i32 4}
|
||
|
!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}
|