; ModuleID = '3D-host-x86_64-unknown-linux-gnu.bc' source_filename = "3D.cu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } %struct.timeval = type { i64, i64 } %struct.timezone = type { i32, i32 } %struct.dim3 = type { i32, i32, i32 } %struct.CUstream_st = type opaque $_ZN4dim3C2Ejjj = comdat any $_ZSt4sqrtf = comdat any @.str = private unnamed_addr constant [16 x i8] c"Time: %.3f (s)\0A\00", align 1 @t_chip = dso_local global float 0x3F40624DE0000000, align 4 @chip_height = dso_local global float 0x3F90624DE0000000, align 4 @chip_width = dso_local global float 0x3F90624DE0000000, align 4 @amb_temp = dso_local global float 8.000000e+01, align 4 @stderr = external dso_local global %struct._IO_FILE*, align 8 @.str.1 = private unnamed_addr constant [11 x i8] c"Error: %s\0A\00", align 1 @.str.2 = private unnamed_addr constant [2 x i8] c"r\00", align 1 @.str.3 = private unnamed_addr constant [24 x i8] c"The file was not opened\00", align 1 @.str.4 = private unnamed_addr constant [20 x i8] c"Error reading file\0A\00", align 1 @.str.5 = private unnamed_addr constant [25 x i8] c"not enough lines in file\00", align 1 @.str.6 = private unnamed_addr constant [3 x i8] c"%f\00", align 1 @.str.7 = private unnamed_addr constant [20 x i8] c"invalid file format\00", align 1 @.str.8 = private unnamed_addr constant [2 x i8] c"w\00", align 1 @.str.9 = private unnamed_addr constant [25 x i8] c"The file was not opened\0A\00", align 1 @.str.10 = private unnamed_addr constant [7 x i8] c"%d\09%g\0A\00", align 1 @.str.11 = private unnamed_addr constant [81 x i8] c"Usage: %s \0A\00", align 1 @.str.12 = private unnamed_addr constant [68 x i8] c"\09 - number of rows/cols in the grid (positive integer)\0A\00", align 1 @.str.13 = private unnamed_addr constant [62 x i8] c"\09 - number of layers in the grid (positive integer)\0A\00", align 1 @.str.14 = private unnamed_addr constant [37 x i8] c"\09 - number of iterations\0A\00", align 1 @.str.15 = private unnamed_addr constant [83 x i8] c"\09 - name of the file containing the initial power values of each cell\0A\00", align 1 @.str.16 = private unnamed_addr constant [88 x i8] c"\09 - name of the file containing the initial temperature values of each cell\0A\00", align 1 @.str.17 = private unnamed_addr constant [28 x i8] c"\09!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F2\1Avisible .entry _Z11hotspotOpt1PfS_S_fiiif\01\00\06\A9\04\00\A3\00\0F.\00\0D\0E\93\04\0F6\00\18\1F16\00\22\07g\04?f326\00\15\07Q\04\1Fu6\00\17\1F46\00\22\1F56\00\22\1F6\D8\00\22\1F76\00\22\1F86\00\22\1F96\00\22/107\00#\1F17\00#\0F\1F\02#\1F1m\0A\14O6[12&\06\16\95pred %p<6'\06\00\92\00k%f<94>J\06-95K\06/79L\06\0C\1F6L\06\12\02s\00O8, [\0B\01\16\1D]?\00\1F7?\00\18\1E2?\00\1F6?\00\18\1E1?\00\1F5?\00\18\1E0?\00\1F4?\00\17\1E9>\00\1F3>\00\17\1E8>\00\1F2>\00\17\1F7Y\07\00\0F\FA\00\18\1F6?\00\00\0F\FA\00\18\1F5?\00\00\0F\FB\00\18\1E4\FB\00\1F1>\00\17\1F3\A9\08\00\0F}\00\18\0F?\08\01\0Fy\01\18\0F)\08\01\0F\BD\00\18#0]\88\03#to\1D\18\04E\00\144\D9\07\01\1F\00\0A\1C\00\115\1C\00\1F4;\00\05\146A\08\0F;\00\00\117\1C\00\1F6;\00\05\148\92\08\0F;\00\00\119\1C\00\1F8\C4\08\02\1F9\C4\08\02\1A7\16\00\03\C4\08!d5\17\00\00\E9\01\07\C5\08\1Bf\E8\12\132\FE\12\0F\DB\08\03+14\17\00\03\1F\09\0B[\00\114\9F\00+f2\16\00\02q\00\1B3\16\00\01q\00*f4\16\00\115p\00\1BfX\00\115o\00*f6\16\00\126n\00\1B7\16\00\02n\00\1982\12\DA6, 1117782016\CB\00\136\F9\00\1961\00\00\B8\01intid.x\17\00\00\B0\013cta\18\00rul.lo.s\1B\00$9,8\00)r1}\00S20, %K\003add0\00521,6\00*20\93\00\137P\0A\09\F6\12$22\93\00\19y\17\00\153\93\00\1By\93\00424,8\00*r2s\0F\135\93\00\19y\93\00&6,6\00\1B5\93\00\02\F1\01!26\7F\03\03F\00%7,\B0\00\08\17\00%8,4\00\08\17\00%9,j\02\0B\A9\00330,8\00\00'\00\08\93\00331,i\00\00&\00\0B\AF\15\03@\0B\171e\00(32e\00\06\17\00%3,\CF\02\0C|\00#4,8\00\00'\00\0Bb\00\02\EF\0B(34K\00\185\F5\00rsetp.neL\003p1,!\00\F2\0C0;\0A@%p1 bra LBB6_2;\0Abra.uni\10\0021;\0A\08\00\16:Z\00%1,\D8\00\08\AC\01\129)\02\0BA\00\133A\00\172A\00(36B\00\07\D8\01\13,\1D\00.-1Y\00\1F2Y\00\04*3:\CD\0C\000\00\0B\1E\01\148\C0\0C\06\1D\01\0F\12\02\03(38\96\01\07\DE\01#9,\1E\00,-1L\01#2,P\00\00'\00\01O\01\162O\01\1B5\B5\00\134\B5\00\174\0E\01\1F4O\01\04\01Y\00\1B4A\00\136A\00\185A\00\1F0O\01\03#5,\1D\00\1E1X\00\1F5X\00\04\186N\01\01g\03\1B9q\0E\129\F4\03\08`\03(41I\03\0Ak\02#3,!\00\02k\02\163\1C\01\1B8\83\00\137\83\00\177\DB\00\1F7\1C\01\04\01V\00\1B7A\00\139A\00\188\1C\01\182B\00\06\17\00\183\E9\013sub3\01#8,4\00\00#\00\0Er\00\1F8r\00\04\1896\01\01,\02\1B9\A2\03\129\97\04\189\84\00\1F4\7F\04\02(45\03\04\06\CE\01346,\1E\00\0E\84\02#4,P\00\00'\00\01h\01\164h\01\1C1\93\03$10\D5\03\180\D6\03\1F0l\01\04\01W\00,10E\00\142E\00\08\1B\04/47o\01\03\0FX\03\04311,5\00\00$\00\0Fv\00\00\1F1v\00\06\182s\01\111\B2\01\1B9\FC\07#10^\19\122\8B\00\03\85\09\05\E8\00\02\A2\00\02\8A\00)d1\D6\044shl3\0D\02\\\00\01 \00\132\BC\00\03\19\00$3,P\00\01'\00\01e\00\02\A6\0A\01B\06\00\22\00\1A]\F3\07\2210L\08\1C9\17\00\144\17\00\07\A8\00\1A4\A8\00\03J\01\1F9a\01\02\155\D5\00\1A4\03\04#1,5\00\00$\00\01\BE\09\00\BF\00\05\F5\00\01#\04*51\F1\00$6,\1C\00\0B\F1\00$7,\99\00\01'\00\07\F1\00#10\F2\00\1C7\F2\00\02\97\1C8f10.\00%1,\03\09\08\17\00&2,'\01\08\18\00%3,\A0\09\09\22\01\1F8\CA\01\03\05#\01\1A8\CA\01\01\E0\08\02 \00\0A\D9\00\02\E3\08\22d1\E5\0A\192\AB\00\124\D9\00#21\90\07$rn\1A\00#5,\9C\00\00&\00W;\0Afma\1D\00#6,\E8\00\02\D7\00\00/\00\08V\00%7,\88\0A\08\BB\00\00\03\09\04e\05\0A\BB\00$3, \00\0B\BB\00\194\BB\00\183e\00\138\BB\00,4]\9E\00#9,\85\00\02&\00)%f\CD\01\00(\01\04\E4\0A\09\9E\00&5,Y\03\0A\9F\00\03Z\0C\1D5\9F\00\197\9F\00\09f\00\131\9F\00\1C7\9F\00\01\09\0A\01\86\00\02&\00\00\B1\00\089\00\05\0F\02\0A\F8\01\05\CF\09+96\9E\00$9, \00\0A\9E\00)30\9E\00\09e\00\03\F8\01-30\9E\00#5,\85\00\02&\00\00\B0\00\089\00%6,\F5\0B\08\17\00&7,\EC\03\0CR\00#8,9\00\02(\00\00d\00\08;\00%9,]\0C\07\17\00\00\BD\00\05c\03\0BR\00\01\8E\0A\019\00\02(\00\00d\00\08;\00\05}\0A\0AQ\03\143\98\03\0A\96\02/32\1B\05\04\00\AE\0A\03 \00\0BY\01$4,P\00\01'\00\08|\00\133Y\01\0D\07\01\015\00\01\9C\00\02&\00\00\C7\00\089\00%5,\08\0D\0C:\00\176\EF\00\01\9F\01)34\D8\00\05\B9\02*6]\A8\00$6, \00\03\A8\00\05\F9\0D\22rd;\0E(f3\EA\08(52h\05\06\7F\05\1F3\82\09\04#4,\1E\00\00;\00\0F\16\0C\02\185\B4\0B/55_\00\03\186\D2\04\07_\00#7,\1E\00\00;\00\0F\F5\0A\02(57H\00\1F8_\00\03\189v\04\06r\08360,\1E\00\00;\00\0F\06\0A\03\170H\00/61_\00\02(62\98\03\07_\00#3,\1E\00\00;\00\0F/\09\02\186\B4\0B/64_\00\03\0A\96\04\07`\00#6,\1F\00\00<\00\0F\1B\08\03\196M\0FL67, V#\03\9C\10\1C6m\0A$13\8A\08\173\00\09668,8\00\08\A5\00\04c\01\1A3\04\01370,\1E\00\05\D5\09\14g\D5\09#5,Q\00\00'\00\01\D5\09\175\D5\09\1C6\90\00\04[\0C614:5\03)65S\07\0Fu\08\01\186l\04)66U\04\0F\BC\08\01(66[\03/56\A6\08\01/71\A6\08\02/72\A6\08\03373,5\00\00$\00\0D\A6\08\02\E7\02)73|\04\00\CC\02\03\1C\00\0A|\04\02\B0\02\12d\16\038d58\DE\00\127|\04/59\A6\08\04(67.\00\1F8\A6\08\02/69\A6\08\03/70\A6\08\03\1F6p\0A\03/61\A6\08\04\02\11\03-d6p\0A\020\03\12d\96\03(d6u\06\137L\07,63\A6\08\00s\01\02\9C\00\00&\00\0B8\05\01x\01\01\E8\00\02\D7\00\00/\00\08V\00\1F4\A6\08\03/64\A6\08\04\02l\03\01 \00\0B\BB\00\196\BB\00\08r\02#75\BB\00,6]\9E\00#6,\85\00\02&\00\00\B0\00\089\00\1F7\A6\08\03/67\A6\08\05\02L\03-d63\02)69\9F\00\083\02\137E\09-69\9F\00#9,\86\00\02&\00\00\B1\00\079\00/80\A6\08\03/70\A6\08\04\02\FF\02\01 \00\0A\9E\00(72\9E\00(71e\00\03\F8\01\1D7\02\08382,\85\00\02&\00\00\B0\00\089\00\1F3\A6\08\02/84\A6\08\07385,9\00\02(\00\00d\00\08;\00\1F6\A6\08\02/87\A6\08\07388,9\00\02(\00\00d\00\08;\00\1F9\A6\08\03/73\A6\08\02/74\A6\08\04\01<\02\02 \00\0BY\01$6,P\00\01'\00\08\C1\0D\03\D0\0C\1D7\96\02\01\8D\12\01\9C\00\02&\00\00\C7\00\089\00\1F2\A6\08\06&93\EF\00\01\85\119f91\D8\00\1F7\A6\08\04\01^\02\02 \00\03\A8\00\07\A6\08\137R\0E\08*\07\1F7*\07\03/75\A6\08\03\02\08\01\02'\01/74\A6\08\02(76H\00\1F7_\00\03\1F8\A6\08\03\01)\03\12r\C5\00/77\A6\08\02\187\18\11/80_\00\02/81\A6\08\03\01\EA\02\01\1E\00\00;\00\0F\A6\08\02(82H\00\1F3_\00\03\1F4\A6\08\03\01\F7\02\01\1E\00\00;\00\0F\A6\08\02(85H\00\1F6_\00\03\1F7\A6\08\04\01\05\03\01\1F\00\00<\00\0F\A6\08\03,88\ED\07\145\ED\07\08\08\14)89}\08\06\C4\00\01\A8\02\01\1F\00\0F\DE\08\04/90\DE\08\06\176N\08/37N\08\0E\183B\07/38N\08\0E\193\98\0C\1F9r\07\02/40r\07\03/41r\07\03/37r\07\02/38r\07\04\02\DE\15-d3 \0E\00y\00\03P\00\01'\00\08|\00\122!\04,40r\07\00[\14\02\9C\00\00&\00\0B\04\04\01\A4\13\01\E8\00\02\D7\00\00/\00\08V\00\1F5r\07\03/41r\07\04\02\AB\14-d4-\08(43\BB\00(42e\00\136\BB\00,3]\9E\00#7,\85\00\02&\00\00\B0\00\089\00\1F8r\07\03/44r\07\05\01K\00\02!\00\0B\9F\00\196\9F\00\08{\05#49\9F\00\0D{\05\00\E5\12\02\86\00\02&\00\00\B1\00\079\00/51r\07\03/47r\07\04\01J\00\02 \00\0B\9E\00\199\9E\00\08\A3\02\145\F8\01\0D\10\08\01\E6\0D\01\85\00\02&\00\00\B0\00\089\00\1F4r\07\02/55r\07\07\01\C3\0A\019\00\02(\00\00d\00\08;\00\1F7r\07\02/58r\07\07\01\1C\0B\019\00\02(\00\00d\00\08\DB\0A\1F0r\07\03/50r\07\02\1F53\15\05\01\FF\00\02 \00\0B\9C\0B$3,P\00\01'\00\08|\00\03\CB\08\1D5\96\02\01\11\0B\01\9C\00\02&\00\00\C7\00\089\00\1F3r\07\06&64\EF\00\126\EC\0A\09\0B\16/54r\07\04\01n\01\02 \00\03\A8\00\07r\07\2255,\0C\B04;\0Aret;\0A\0A}\0A\00\00\00\00\00\00\00", section ".nv_fatbin", align 8 @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([27433 x i8], [27433 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 @__cuda_gpubin_handle = internal global i8** null, align 8 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] ; Function Attrs: noinline nounwind optnone uwtable define dso_local i64 @_Z8get_timev() #0 { entry: %tv = alloca %struct.timeval, align 8 %call = call i32 @gettimeofday(%struct.timeval* %tv, %struct.timezone* null) #8 %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 0 %0 = load i64, i64* %tv_sec, align 8 %mul = mul nsw i64 %0, 1000000 %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 1 %1 = load i64, i64* %tv_usec, align 8 %add = add nsw i64 %mul, %1 ret i64 %add } ; Function Attrs: nounwind declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #1 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z11hotspotOpt1PfS_S_fiiifffffff(float* %p, float* %tIn, float* %tOut, float %sdc, i32 %nx, i32 %ny, i32 %nz, float %ce, float %cw, float %cn, float %cs, float %ct, float %cb, float %cc) #2 { entry: %p.addr = alloca float*, align 8 %tIn.addr = alloca float*, align 8 %tOut.addr = alloca float*, align 8 %sdc.addr = alloca float, align 4 %nx.addr = alloca i32, align 4 %ny.addr = alloca i32, align 4 %nz.addr = alloca i32, align 4 %ce.addr = alloca float, align 4 %cw.addr = alloca float, align 4 %cn.addr = alloca float, align 4 %cs.addr = alloca float, align 4 %ct.addr = alloca float, align 4 %cb.addr = alloca float, align 4 %cc.addr = alloca float, align 4 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store float* %p, float** %p.addr, align 8 store float* %tIn, float** %tIn.addr, align 8 store float* %tOut, float** %tOut.addr, align 8 store float %sdc, float* %sdc.addr, align 4 store i32 %nx, i32* %nx.addr, align 4 store i32 %ny, i32* %ny.addr, align 4 store i32 %nz, i32* %nz.addr, align 4 store float %ce, float* %ce.addr, align 4 store float %cw, float* %cw.addr, align 4 store float %cn, float* %cn.addr, align 4 store float %cs, float* %cs.addr, align 4 store float %ct, float* %ct.addr, align 4 store float %cb, float* %cb.addr, align 4 store float %cc, float* %cc.addr, align 4 %kernel_args = alloca i8*, i64 14, align 16 %0 = bitcast float** %p.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast float** %tIn.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast float** %tOut.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = bitcast float* %sdc.addr to i8* %7 = getelementptr i8*, i8** %kernel_args, i32 3 store i8* %6, i8** %7 %8 = bitcast i32* %nx.addr to i8* %9 = getelementptr i8*, i8** %kernel_args, i32 4 store i8* %8, i8** %9 %10 = bitcast i32* %ny.addr to i8* %11 = getelementptr i8*, i8** %kernel_args, i32 5 store i8* %10, i8** %11 %12 = bitcast i32* %nz.addr to i8* %13 = getelementptr i8*, i8** %kernel_args, i32 6 store i8* %12, i8** %13 %14 = bitcast float* %ce.addr to i8* %15 = getelementptr i8*, i8** %kernel_args, i32 7 store i8* %14, i8** %15 %16 = bitcast float* %cw.addr to i8* %17 = getelementptr i8*, i8** %kernel_args, i32 8 store i8* %16, i8** %17 %18 = bitcast float* %cn.addr to i8* %19 = getelementptr i8*, i8** %kernel_args, i32 9 store i8* %18, i8** %19 %20 = bitcast float* %cs.addr to i8* %21 = getelementptr i8*, i8** %kernel_args, i32 10 store i8* %20, i8** %21 %22 = bitcast float* %ct.addr to i8* %23 = getelementptr i8*, i8** %kernel_args, i32 11 store i8* %22, i8** %23 %24 = bitcast float* %cb.addr to i8* %25 = getelementptr i8*, i8** %kernel_args, i32 12 store i8* %24, i8** %25 %26 = bitcast float* %cc.addr to i8* %27 = getelementptr i8*, i8** %kernel_args, i32 13 store i8* %26, i8** %27 %28 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %29 = load i64, i64* %shmem_size, align 8 %30 = load i8*, i8** %stream, align 8 %31 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %32 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %31, i8* align 8 %32, i64 12, i1 false) %33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %34 = load i64, i64* %33, align 8 %35 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %36 = load i32, i32* %35, align 8 %37 = bitcast { i64, i32 }* %block_dim.coerce to i8* %38 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %37, i8* align 8 %38, i64 12, i1 false) %39 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %40 = load i64, i64* %39, align 8 %41 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %42 = load i32, i32* %41, align 8 %43 = bitcast i8* %30 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, float*, float*, float, i32, i32, i32, float, float, float, float, float, float, float)* @_Z11hotspotOpt1PfS_S_fiiifffffff to i8*), i64 %34, i32 %36, i64 %40, i32 %42, i8** %kernel_args, i64 %29, %struct.CUstream_st* %43) br label %setup.end setup.end: ; preds = %entry ret void } declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #3 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z12hotspot_opt1PfS_S_iiifffffi(float* %p, float* %tIn, float* %tOut, i32 %nx, i32 %ny, i32 %nz, float %Cap, float %Rx, float %Ry, float %Rz, float %dt, i32 %numiter) #2 { entry: %p.addr = alloca float*, align 8 %tIn.addr = alloca float*, align 8 %tOut.addr = alloca float*, align 8 %nx.addr = alloca i32, align 4 %ny.addr = alloca i32, align 4 %nz.addr = alloca i32, align 4 %Cap.addr = alloca float, align 4 %Rx.addr = alloca float, align 4 %Ry.addr = alloca float, align 4 %Rz.addr = alloca float, align 4 %dt.addr = alloca float, align 4 %numiter.addr = alloca i32, align 4 %ce = alloca float, align 4 %cw = alloca float, align 4 %cn = alloca float, align 4 %cs = alloca float, align 4 %ct = alloca float, align 4 %cb = alloca float, align 4 %cc = alloca float, align 4 %stepDivCap = alloca float, align 4 %s = alloca i64, align 8 %tIn_d = alloca float*, align 8 %tOut_d = alloca float*, align 8 %p_d = alloca float*, align 8 %block_dim = alloca %struct.dim3, align 4 %grid_dim = alloca %struct.dim3, align 4 %start = alloca i64, align 8 %i = alloca i32, align 4 %agg.tmp = alloca %struct.dim3, align 4 %agg.tmp23 = alloca %struct.dim3, align 4 %agg.tmp.coerce = alloca { i64, i32 }, align 4 %agg.tmp23.coerce = alloca { i64, i32 }, align 4 %t = alloca float*, align 8 %stop = alloca i64, align 8 %time = alloca float, align 4 store float* %p, float** %p.addr, align 8 store float* %tIn, float** %tIn.addr, align 8 store float* %tOut, float** %tOut.addr, align 8 store i32 %nx, i32* %nx.addr, align 4 store i32 %ny, i32* %ny.addr, align 4 store i32 %nz, i32* %nz.addr, align 4 store float %Cap, float* %Cap.addr, align 4 store float %Rx, float* %Rx.addr, align 4 store float %Ry, float* %Ry.addr, align 4 store float %Rz, float* %Rz.addr, align 4 store float %dt, float* %dt.addr, align 4 store i32 %numiter, i32* %numiter.addr, align 4 %0 = load float, float* %dt.addr, align 4 %1 = load float, float* %Cap.addr, align 4 %div = fdiv float %0, %1 store float %div, float* %stepDivCap, align 4 %2 = load float, float* %stepDivCap, align 4 %3 = load float, float* %Rx.addr, align 4 %div1 = fdiv float %2, %3 store float %div1, float* %cw, align 4 store float %div1, float* %ce, align 4 %4 = load float, float* %stepDivCap, align 4 %5 = load float, float* %Ry.addr, align 4 %div2 = fdiv float %4, %5 store float %div2, float* %cs, align 4 store float %div2, float* %cn, align 4 %6 = load float, float* %stepDivCap, align 4 %7 = load float, float* %Rz.addr, align 4 %div3 = fdiv float %6, %7 store float %div3, float* %cb, align 4 store float %div3, float* %ct, align 4 %8 = load float, float* %ce, align 4 %conv = fpext float %8 to double %mul = fmul contract double 2.000000e+00, %conv %9 = load float, float* %cn, align 4 %conv4 = fpext float %9 to double %mul5 = fmul contract double 2.000000e+00, %conv4 %add = fadd contract double %mul, %mul5 %10 = load float, float* %ct, align 4 %conv6 = fpext float %10 to double %mul7 = fmul contract double 3.000000e+00, %conv6 %add8 = fadd contract double %add, %mul7 %sub = fsub contract double 1.000000e+00, %add8 %conv9 = fptrunc double %sub to float store float %conv9, float* %cc, align 4 %11 = load i32, i32* %nx.addr, align 4 %conv10 = sext i32 %11 to i64 %mul11 = mul i64 4, %conv10 %12 = load i32, i32* %ny.addr, align 4 %conv12 = sext i32 %12 to i64 %mul13 = mul i64 %mul11, %conv12 %13 = load i32, i32* %nz.addr, align 4 %conv14 = sext i32 %13 to i64 %mul15 = mul i64 %mul13, %conv14 store i64 %mul15, i64* %s, align 8 %14 = bitcast float** %p_d to i8** %15 = load i64, i64* %s, align 8 %call = call i32 @cudaMalloc(i8** %14, i64 %15) %16 = bitcast float** %tIn_d to i8** %17 = load i64, i64* %s, align 8 %call16 = call i32 @cudaMalloc(i8** %16, i64 %17) %18 = bitcast float** %tOut_d to i8** %19 = load i64, i64* %s, align 8 %call17 = call i32 @cudaMalloc(i8** %18, i64 %19) %20 = load float*, float** %tIn_d, align 8 %21 = bitcast float* %20 to i8* %22 = load float*, float** %tIn.addr, align 8 %23 = bitcast float* %22 to i8* %24 = load i64, i64* %s, align 8 %call18 = call i32 @cudaMemcpy(i8* %21, i8* %23, i64 %24, i32 1) %25 = load float*, float** %p_d, align 8 %26 = bitcast float* %25 to i8* %27 = load float*, float** %p.addr, align 8 %28 = bitcast float* %27 to i8* %29 = load i64, i64* %s, align 8 %call19 = call i32 @cudaMemcpy(i8* %26, i8* %28, i64 %29, i32 1) call void @_ZN4dim3C2Ejjj(%struct.dim3* %block_dim, i32 64, i32 4, i32 1) %30 = load i32, i32* %nx.addr, align 4 %div20 = sdiv i32 %30, 64 %31 = load i32, i32* %ny.addr, align 4 %div21 = sdiv i32 %31, 4 call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid_dim, i32 %div20, i32 %div21, i32 1) %call22 = call i64 @_Z8get_timev() store i64 %call22, i64* %start, align 8 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry %32 = load i32, i32* %i, align 4 %33 = load i32, i32* %numiter.addr, align 4 %cmp = icmp slt i32 %32, %33 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %34 = bitcast %struct.dim3* %agg.tmp to i8* %35 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %34, i8* align 4 %35, i64 12, i1 false) %36 = bitcast %struct.dim3* %agg.tmp23 to i8* %37 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %36, i8* align 4 %37, i64 12, i1 false) %38 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* %39 = bitcast %struct.dim3* %agg.tmp to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %38, i8* align 4 %39, i64 12, i1 false) %40 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 %41 = load i64, i64* %40, align 4 %42 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 %43 = load i32, i32* %42, align 4 %44 = bitcast { i64, i32 }* %agg.tmp23.coerce to i8* %45 = bitcast %struct.dim3* %agg.tmp23 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %44, i8* align 4 %45, i64 12, i1 false) %46 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp23.coerce, i32 0, i32 0 %47 = load i64, i64* %46, align 4 %48 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp23.coerce, i32 0, i32 1 %49 = load i32, i32* %48, align 4 %call24 = call i32 @__cudaPushCallConfiguration(i64 %41, i32 %43, i64 %47, i32 %49, i64 0, i8* null) %tobool = icmp ne i32 %call24, 0 br i1 %tobool, label %kcall.end, label %kcall.configok kcall.configok: ; preds = %for.body %50 = load float*, float** %p_d, align 8 %51 = load float*, float** %tIn_d, align 8 %52 = load float*, float** %tOut_d, align 8 %53 = load float, float* %stepDivCap, align 4 %54 = load i32, i32* %nx.addr, align 4 %55 = load i32, i32* %ny.addr, align 4 %56 = load i32, i32* %nz.addr, align 4 %57 = load float, float* %ce, align 4 %58 = load float, float* %cw, align 4 %59 = load float, float* %cn, align 4 %60 = load float, float* %cs, align 4 %61 = load float, float* %ct, align 4 %62 = load float, float* %cb, align 4 %63 = load float, float* %cc, align 4 call void @_Z11hotspotOpt1PfS_S_fiiifffffff(float* %50, float* %51, float* %52, float %53, i32 %54, i32 %55, i32 %56, float %57, float %58, float %59, float %60, float %61, float %62, float %63) br label %kcall.end kcall.end: ; preds = %kcall.configok, %for.body %64 = load float*, float** %tIn_d, align 8 store float* %64, float** %t, align 8 %65 = load float*, float** %tOut_d, align 8 store float* %65, float** %tIn_d, align 8 %66 = load float*, float** %t, align 8 store float* %66, float** %tOut_d, align 8 br label %for.inc for.inc: ; preds = %kcall.end %67 = load i32, i32* %i, align 4 %inc = add nsw i32 %67, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond %call25 = call i32 @cudaDeviceSynchronize() %call26 = call i64 @_Z8get_timev() store i64 %call26, i64* %stop, align 8 %68 = load i64, i64* %stop, align 8 %69 = load i64, i64* %start, align 8 %sub27 = sub nsw i64 %68, %69 %conv28 = sitofp i64 %sub27 to double %div29 = fdiv double %conv28, 1.000000e+06 %conv30 = fptrunc double %div29 to float store float %conv30, float* %time, align 4 %70 = load float, float* %time, align 4 %conv31 = fpext float %70 to double %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str, i64 0, i64 0), double %conv31) %71 = load float*, float** %tOut.addr, align 8 %72 = bitcast float* %71 to i8* %73 = load float*, float** %tOut_d, align 8 %74 = bitcast float* %73 to i8* %75 = load i64, i64* %s, align 8 %call33 = call i32 @cudaMemcpy(i8* %72, i8* %74, i64 %75, i32 2) %76 = load float*, float** %p_d, align 8 %77 = bitcast float* %76 to i8* %call34 = call i32 @cudaFree(i8* %77) %78 = load float*, float** %tIn_d, align 8 %79 = bitcast float* %78 to i8* %call35 = call i32 @cudaFree(i8* %79) %80 = load float*, float** %tOut_d, align 8 %81 = bitcast float* %80 to i8* %call36 = call i32 @cudaFree(i8* %81) ret void } declare dso_local i32 @cudaMalloc(i8**, i64) #4 declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #4 ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #0 comdat align 2 { entry: %this.addr = alloca %struct.dim3*, align 8 %vx.addr = alloca i32, align 4 %vy.addr = alloca i32, align 4 %vz.addr = alloca i32, align 4 store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 store i32 %vx, i32* %vx.addr, align 4 store i32 %vy, i32* %vy.addr, align 4 store i32 %vz, i32* %vz.addr, align 4 %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 %0 = load i32, i32* %vx.addr, align 4 store i32 %0, i32* %x, align 4 %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 %1 = load i32, i32* %vy.addr, align 4 store i32 %1, i32* %y, align 4 %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 %2 = load i32, i32* %vz.addr, align 4 store i32 %2, i32* %z, align 4 ret void } declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #4 declare dso_local i32 @cudaDeviceSynchronize() #4 declare dso_local i32 @printf(i8*, ...) #4 declare dso_local i32 @cudaFree(i8*) #4 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z5fatalPKc(i8* %s) #2 { entry: %s.addr = alloca i8*, align 8 store i8* %s, i8** %s.addr, align 8 %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %1 = load i8*, i8** %s.addr, align 8 %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.1, i64 0, i64 0), i8* %1) ret void } declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #4 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z9readinputPfiiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i32 %layers, i8* %file) #2 { entry: %vect.addr = alloca float*, align 8 %grid_rows.addr = alloca i32, align 4 %grid_cols.addr = alloca i32, align 4 %layers.addr = alloca i32, align 4 %file.addr = alloca i8*, align 8 %i = alloca i32, align 4 %j = alloca i32, align 4 %k = alloca i32, align 4 %fp = alloca %struct._IO_FILE*, align 8 %str = alloca [256 x i8], align 16 %val = alloca float, align 4 store float* %vect, float** %vect.addr, align 8 store i32 %grid_rows, i32* %grid_rows.addr, align 4 store i32 %grid_cols, i32* %grid_cols.addr, align 4 store i32 %layers, i32* %layers.addr, align 4 store i8* %file, i8** %file.addr, align 8 %0 = load i8*, i8** %file.addr, align 8 %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i64 0, i64 0)) store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 %cmp = icmp eq %struct._IO_FILE* %call, null br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry call void @_Z5fatalPKc(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.3, i64 0, i64 0)) br label %if.end if.end: ; preds = %if.then, %entry store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc28, %if.end %1 = load i32, i32* %i, align 4 %2 = load i32, i32* %grid_rows.addr, align 4 %sub = sub nsw i32 %2, 1 %cmp1 = icmp sle i32 %1, %sub br i1 %cmp1, label %for.body, label %for.end30 for.body: ; preds = %for.cond store i32 0, i32* %j, align 4 br label %for.cond2 for.cond2: ; preds = %for.inc25, %for.body %3 = load i32, i32* %j, align 4 %4 = load i32, i32* %grid_cols.addr, align 4 %sub3 = sub nsw i32 %4, 1 %cmp4 = icmp sle i32 %3, %sub3 br i1 %cmp4, label %for.body5, label %for.end27 for.body5: ; preds = %for.cond2 store i32 0, i32* %k, align 4 br label %for.cond6 for.cond6: ; preds = %for.inc, %for.body5 %5 = load i32, i32* %k, align 4 %6 = load i32, i32* %layers.addr, align 4 %sub7 = sub nsw i32 %6, 1 %cmp8 = icmp sle i32 %5, %sub7 br i1 %cmp8, label %for.body9, label %for.end for.body9: ; preds = %for.cond6 %arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 %7 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call10 = call i8* @fgets(i8* %arraydecay, i32 256, %struct._IO_FILE* %7) %cmp11 = icmp eq i8* %call10, null br i1 %cmp11, label %if.then12, label %if.end13 if.then12: ; preds = %for.body9 call void @_Z5fatalPKc(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.4, i64 0, i64 0)) br label %if.end13 if.end13: ; preds = %if.then12, %for.body9 %8 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call14 = call i32 @feof(%struct._IO_FILE* %8) #8 %tobool = icmp ne i32 %call14, 0 br i1 %tobool, label %if.then15, label %if.end16 if.then15: ; preds = %if.end13 call void @_Z5fatalPKc(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.5, i64 0, i64 0)) br label %if.end16 if.end16: ; preds = %if.then15, %if.end13 %arraydecay17 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 %call18 = call i32 (i8*, i8*, ...) @sscanf(i8* %arraydecay17, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.6, i64 0, i64 0), float* %val) #8 %cmp19 = icmp ne i32 %call18, 1 br i1 %cmp19, label %if.then20, label %if.end21 if.then20: ; preds = %if.end16 call void @_Z5fatalPKc(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.7, i64 0, i64 0)) br label %if.end21 if.end21: ; preds = %if.then20, %if.end16 %9 = load float, float* %val, align 4 %10 = load float*, float** %vect.addr, align 8 %11 = load i32, i32* %i, align 4 %12 = load i32, i32* %grid_cols.addr, align 4 %mul = mul nsw i32 %11, %12 %13 = load i32, i32* %j, align 4 %add = add nsw i32 %mul, %13 %14 = load i32, i32* %k, align 4 %15 = load i32, i32* %grid_rows.addr, align 4 %mul22 = mul nsw i32 %14, %15 %16 = load i32, i32* %grid_cols.addr, align 4 %mul23 = mul nsw i32 %mul22, %16 %add24 = add nsw i32 %add, %mul23 %idxprom = sext i32 %add24 to i64 %arrayidx = getelementptr inbounds float, float* %10, i64 %idxprom store float %9, float* %arrayidx, align 4 br label %for.inc for.inc: ; preds = %if.end21 %17 = load i32, i32* %k, align 4 %inc = add nsw i32 %17, 1 store i32 %inc, i32* %k, align 4 br label %for.cond6 for.end: ; preds = %for.cond6 br label %for.inc25 for.inc25: ; preds = %for.end %18 = load i32, i32* %j, align 4 %inc26 = add nsw i32 %18, 1 store i32 %inc26, i32* %j, align 4 br label %for.cond2 for.end27: ; preds = %for.cond2 br label %for.inc28 for.inc28: ; preds = %for.end27 %19 = load i32, i32* %i, align 4 %inc29 = add nsw i32 %19, 1 store i32 %inc29, i32* %i, align 4 br label %for.cond for.end30: ; preds = %for.cond %20 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call31 = call i32 @fclose(%struct._IO_FILE* %20) ret void } declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #4 declare dso_local i8* @fgets(i8*, i32, %struct._IO_FILE*) #4 ; Function Attrs: nounwind declare dso_local i32 @feof(%struct._IO_FILE*) #1 ; Function Attrs: nounwind declare dso_local i32 @sscanf(i8*, i8*, ...) #1 declare dso_local i32 @fclose(%struct._IO_FILE*) #4 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z11writeoutputPfiiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i32 %layers, i8* %file) #2 { entry: %vect.addr = alloca float*, align 8 %grid_rows.addr = alloca i32, align 4 %grid_cols.addr = alloca i32, align 4 %layers.addr = alloca i32, align 4 %file.addr = alloca i8*, align 8 %i = alloca i32, align 4 %j = alloca i32, align 4 %k = alloca i32, align 4 %index = alloca i32, align 4 %fp = alloca %struct._IO_FILE*, align 8 %str = alloca [256 x i8], align 16 store float* %vect, float** %vect.addr, align 8 store i32 %grid_rows, i32* %grid_rows.addr, align 4 store i32 %grid_cols, i32* %grid_cols.addr, align 4 store i32 %layers, i32* %layers.addr, align 4 store i8* %file, i8** %file.addr, align 8 store i32 0, i32* %index, align 4 %0 = load i8*, i8** %file.addr, align 8 %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.8, i64 0, i64 0)) store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 %cmp = icmp eq %struct._IO_FILE* %call, null br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.9, i64 0, i64 0)) br label %if.end if.end: ; preds = %if.then, %entry store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc19, %if.end %1 = load i32, i32* %i, align 4 %2 = load i32, i32* %grid_rows.addr, align 4 %cmp2 = icmp slt i32 %1, %2 br i1 %cmp2, label %for.body, label %for.end21 for.body: ; preds = %for.cond store i32 0, i32* %j, align 4 br label %for.cond3 for.cond3: ; preds = %for.inc16, %for.body %3 = load i32, i32* %j, align 4 %4 = load i32, i32* %grid_cols.addr, align 4 %cmp4 = icmp slt i32 %3, %4 br i1 %cmp4, label %for.body5, label %for.end18 for.body5: ; preds = %for.cond3 store i32 0, i32* %k, align 4 br label %for.cond6 for.cond6: ; preds = %for.inc, %for.body5 %5 = load i32, i32* %k, align 4 %6 = load i32, i32* %layers.addr, align 4 %cmp7 = icmp slt i32 %5, %6 br i1 %cmp7, label %for.body8, label %for.end for.body8: ; preds = %for.cond6 %arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 %7 = load i32, i32* %index, align 4 %8 = load float*, float** %vect.addr, align 8 %9 = load i32, i32* %i, align 4 %10 = load i32, i32* %grid_cols.addr, align 4 %mul = mul nsw i32 %9, %10 %11 = load i32, i32* %j, align 4 %add = add nsw i32 %mul, %11 %12 = load i32, i32* %k, align 4 %13 = load i32, i32* %grid_rows.addr, align 4 %mul9 = mul nsw i32 %12, %13 %14 = load i32, i32* %grid_cols.addr, align 4 %mul10 = mul nsw i32 %mul9, %14 %add11 = add nsw i32 %add, %mul10 %idxprom = sext i32 %add11 to i64 %arrayidx = getelementptr inbounds float, float* %8, i64 %idxprom %15 = load float, float* %arrayidx, align 4 %conv = fpext float %15 to double %call12 = call i32 (i8*, i8*, ...) @sprintf(i8* %arraydecay, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.10, i64 0, i64 0), i32 %7, double %conv) #8 %arraydecay13 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 %16 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call14 = call i32 @fputs(i8* %arraydecay13, %struct._IO_FILE* %16) %17 = load i32, i32* %index, align 4 %inc = add nsw i32 %17, 1 store i32 %inc, i32* %index, align 4 br label %for.inc for.inc: ; preds = %for.body8 %18 = load i32, i32* %k, align 4 %inc15 = add nsw i32 %18, 1 store i32 %inc15, i32* %k, align 4 br label %for.cond6 for.end: ; preds = %for.cond6 br label %for.inc16 for.inc16: ; preds = %for.end %19 = load i32, i32* %j, align 4 %inc17 = add nsw i32 %19, 1 store i32 %inc17, i32* %j, align 4 br label %for.cond3 for.end18: ; preds = %for.cond3 br label %for.inc19 for.inc19: ; preds = %for.end18 %20 = load i32, i32* %i, align 4 %inc20 = add nsw i32 %20, 1 store i32 %inc20, i32* %i, align 4 br label %for.cond for.end21: ; preds = %for.cond %21 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call22 = call i32 @fclose(%struct._IO_FILE* %21) ret void } ; Function Attrs: nounwind declare dso_local i32 @sprintf(i8*, i8*, ...) #1 declare dso_local i32 @fputs(i8*, %struct._IO_FILE*) #4 ; Function Attrs: noinline nounwind optnone uwtable define dso_local void @_Z14computeTempCPUPfS_S_iiifffffi(float* %pIn, float* %tIn, float* %tOut, i32 %nx, i32 %ny, i32 %nz, float %Cap, float %Rx, float %Ry, float %Rz, float %dt, i32 %numiter) #0 { entry: %pIn.addr = alloca float*, align 8 %tIn.addr = alloca float*, align 8 %tOut.addr = alloca float*, align 8 %nx.addr = alloca i32, align 4 %ny.addr = alloca i32, align 4 %nz.addr = alloca i32, align 4 %Cap.addr = alloca float, align 4 %Rx.addr = alloca float, align 4 %Ry.addr = alloca float, align 4 %Rz.addr = alloca float, align 4 %dt.addr = alloca float, align 4 %numiter.addr = alloca i32, align 4 %ce = alloca float, align 4 %cw = alloca float, align 4 %cn = alloca float, align 4 %cs = alloca float, align 4 %ct = alloca float, align 4 %cb = alloca float, align 4 %cc = alloca float, align 4 %stepDivCap = alloca float, align 4 %c = alloca i32, align 4 %w = alloca i32, align 4 %e = alloca i32, align 4 %n = alloca i32, align 4 %s = alloca i32, align 4 %b = alloca i32, align 4 %t = alloca i32, align 4 %x = alloca i32, align 4 %y = alloca i32, align 4 %z = alloca i32, align 4 %i = alloca i32, align 4 %temp = alloca float*, align 8 store float* %pIn, float** %pIn.addr, align 8 store float* %tIn, float** %tIn.addr, align 8 store float* %tOut, float** %tOut.addr, align 8 store i32 %nx, i32* %nx.addr, align 4 store i32 %ny, i32* %ny.addr, align 4 store i32 %nz, i32* %nz.addr, align 4 store float %Cap, float* %Cap.addr, align 4 store float %Rx, float* %Rx.addr, align 4 store float %Ry, float* %Ry.addr, align 4 store float %Rz, float* %Rz.addr, align 4 store float %dt, float* %dt.addr, align 4 store i32 %numiter, i32* %numiter.addr, align 4 %0 = load float, float* %dt.addr, align 4 %1 = load float, float* %Cap.addr, align 4 %div = fdiv float %0, %1 store float %div, float* %stepDivCap, align 4 %2 = load float, float* %stepDivCap, align 4 %3 = load float, float* %Rx.addr, align 4 %div1 = fdiv float %2, %3 store float %div1, float* %cw, align 4 store float %div1, float* %ce, align 4 %4 = load float, float* %stepDivCap, align 4 %5 = load float, float* %Ry.addr, align 4 %div2 = fdiv float %4, %5 store float %div2, float* %cs, align 4 store float %div2, float* %cn, align 4 %6 = load float, float* %stepDivCap, align 4 %7 = load float, float* %Rz.addr, align 4 %div3 = fdiv float %6, %7 store float %div3, float* %cb, align 4 store float %div3, float* %ct, align 4 %8 = load float, float* %ce, align 4 %conv = fpext float %8 to double %mul = fmul contract double 2.000000e+00, %conv %9 = load float, float* %cn, align 4 %conv4 = fpext float %9 to double %mul5 = fmul contract double 2.000000e+00, %conv4 %add = fadd contract double %mul, %mul5 %10 = load float, float* %ct, align 4 %conv6 = fpext float %10 to double %mul7 = fmul contract double 3.000000e+00, %conv6 %add8 = fadd contract double %add, %mul7 %sub = fsub contract double 1.000000e+00, %add8 %conv9 = fptrunc double %sub to float store float %conv9, float* %cc, align 4 store i32 0, i32* %i, align 4 br label %do.body do.body: ; preds = %do.cond, %entry store i32 0, i32* %z, align 4 br label %for.cond for.cond: ; preds = %for.inc95, %do.body %11 = load i32, i32* %z, align 4 %12 = load i32, i32* %nz.addr, align 4 %cmp = icmp slt i32 %11, %12 br i1 %cmp, label %for.body, label %for.end97 for.body: ; preds = %for.cond store i32 0, i32* %y, align 4 br label %for.cond10 for.cond10: ; preds = %for.inc92, %for.body %13 = load i32, i32* %y, align 4 %14 = load i32, i32* %ny.addr, align 4 %cmp11 = icmp slt i32 %13, %14 br i1 %cmp11, label %for.body12, label %for.end94 for.body12: ; preds = %for.cond10 store i32 0, i32* %x, align 4 br label %for.cond13 for.cond13: ; preds = %for.inc, %for.body12 %15 = load i32, i32* %x, align 4 %16 = load i32, i32* %nx.addr, align 4 %cmp14 = icmp slt i32 %15, %16 br i1 %cmp14, label %for.body15, label %for.end for.body15: ; preds = %for.cond13 %17 = load i32, i32* %x, align 4 %18 = load i32, i32* %y, align 4 %19 = load i32, i32* %nx.addr, align 4 %mul16 = mul nsw i32 %18, %19 %add17 = add nsw i32 %17, %mul16 %20 = load i32, i32* %z, align 4 %21 = load i32, i32* %nx.addr, align 4 %mul18 = mul nsw i32 %20, %21 %22 = load i32, i32* %ny.addr, align 4 %mul19 = mul nsw i32 %mul18, %22 %add20 = add nsw i32 %add17, %mul19 store i32 %add20, i32* %c, align 4 %23 = load i32, i32* %x, align 4 %cmp21 = icmp eq i32 %23, 0 br i1 %cmp21, label %cond.true, label %cond.false cond.true: ; preds = %for.body15 %24 = load i32, i32* %c, align 4 br label %cond.end cond.false: ; preds = %for.body15 %25 = load i32, i32* %c, align 4 %sub22 = sub nsw i32 %25, 1 br label %cond.end cond.end: ; preds = %cond.false, %cond.true %cond = phi i32 [ %24, %cond.true ], [ %sub22, %cond.false ] store i32 %cond, i32* %w, align 4 %26 = load i32, i32* %x, align 4 %27 = load i32, i32* %nx.addr, align 4 %sub23 = sub nsw i32 %27, 1 %cmp24 = icmp eq i32 %26, %sub23 br i1 %cmp24, label %cond.true25, label %cond.false26 cond.true25: ; preds = %cond.end %28 = load i32, i32* %c, align 4 br label %cond.end28 cond.false26: ; preds = %cond.end %29 = load i32, i32* %c, align 4 %add27 = add nsw i32 %29, 1 br label %cond.end28 cond.end28: ; preds = %cond.false26, %cond.true25 %cond29 = phi i32 [ %28, %cond.true25 ], [ %add27, %cond.false26 ] store i32 %cond29, i32* %e, align 4 %30 = load i32, i32* %y, align 4 %cmp30 = icmp eq i32 %30, 0 br i1 %cmp30, label %cond.true31, label %cond.false32 cond.true31: ; preds = %cond.end28 %31 = load i32, i32* %c, align 4 br label %cond.end34 cond.false32: ; preds = %cond.end28 %32 = load i32, i32* %c, align 4 %33 = load i32, i32* %nx.addr, align 4 %sub33 = sub nsw i32 %32, %33 br label %cond.end34 cond.end34: ; preds = %cond.false32, %cond.true31 %cond35 = phi i32 [ %31, %cond.true31 ], [ %sub33, %cond.false32 ] store i32 %cond35, i32* %n, align 4 %34 = load i32, i32* %y, align 4 %35 = load i32, i32* %ny.addr, align 4 %sub36 = sub nsw i32 %35, 1 %cmp37 = icmp eq i32 %34, %sub36 br i1 %cmp37, label %cond.true38, label %cond.false39 cond.true38: ; preds = %cond.end34 %36 = load i32, i32* %c, align 4 br label %cond.end41 cond.false39: ; preds = %cond.end34 %37 = load i32, i32* %c, align 4 %38 = load i32, i32* %nx.addr, align 4 %add40 = add nsw i32 %37, %38 br label %cond.end41 cond.end41: ; preds = %cond.false39, %cond.true38 %cond42 = phi i32 [ %36, %cond.true38 ], [ %add40, %cond.false39 ] store i32 %cond42, i32* %s, align 4 %39 = load i32, i32* %z, align 4 %cmp43 = icmp eq i32 %39, 0 br i1 %cmp43, label %cond.true44, label %cond.false45 cond.true44: ; preds = %cond.end41 %40 = load i32, i32* %c, align 4 br label %cond.end48 cond.false45: ; preds = %cond.end41 %41 = load i32, i32* %c, align 4 %42 = load i32, i32* %nx.addr, align 4 %43 = load i32, i32* %ny.addr, align 4 %mul46 = mul nsw i32 %42, %43 %sub47 = sub nsw i32 %41, %mul46 br label %cond.end48 cond.end48: ; preds = %cond.false45, %cond.true44 %cond49 = phi i32 [ %40, %cond.true44 ], [ %sub47, %cond.false45 ] store i32 %cond49, i32* %b, align 4 %44 = load i32, i32* %z, align 4 %45 = load i32, i32* %nz.addr, align 4 %sub50 = sub nsw i32 %45, 1 %cmp51 = icmp eq i32 %44, %sub50 br i1 %cmp51, label %cond.true52, label %cond.false53 cond.true52: ; preds = %cond.end48 %46 = load i32, i32* %c, align 4 br label %cond.end56 cond.false53: ; preds = %cond.end48 %47 = load i32, i32* %c, align 4 %48 = load i32, i32* %nx.addr, align 4 %49 = load i32, i32* %ny.addr, align 4 %mul54 = mul nsw i32 %48, %49 %add55 = add nsw i32 %47, %mul54 br label %cond.end56 cond.end56: ; preds = %cond.false53, %cond.true52 %cond57 = phi i32 [ %46, %cond.true52 ], [ %add55, %cond.false53 ] store i32 %cond57, i32* %t, align 4 %50 = load float*, float** %tIn.addr, align 8 %51 = load i32, i32* %c, align 4 %idxprom = sext i32 %51 to i64 %arrayidx = getelementptr inbounds float, float* %50, i64 %idxprom %52 = load float, float* %arrayidx, align 4 %53 = load float, float* %cc, align 4 %mul58 = fmul contract float %52, %53 %54 = load float*, float** %tIn.addr, align 8 %55 = load i32, i32* %n, align 4 %idxprom59 = sext i32 %55 to i64 %arrayidx60 = getelementptr inbounds float, float* %54, i64 %idxprom59 %56 = load float, float* %arrayidx60, align 4 %57 = load float, float* %cn, align 4 %mul61 = fmul contract float %56, %57 %add62 = fadd contract float %mul58, %mul61 %58 = load float*, float** %tIn.addr, align 8 %59 = load i32, i32* %s, align 4 %idxprom63 = sext i32 %59 to i64 %arrayidx64 = getelementptr inbounds float, float* %58, i64 %idxprom63 %60 = load float, float* %arrayidx64, align 4 %61 = load float, float* %cs, align 4 %mul65 = fmul contract float %60, %61 %add66 = fadd contract float %add62, %mul65 %62 = load float*, float** %tIn.addr, align 8 %63 = load i32, i32* %e, align 4 %idxprom67 = sext i32 %63 to i64 %arrayidx68 = getelementptr inbounds float, float* %62, i64 %idxprom67 %64 = load float, float* %arrayidx68, align 4 %65 = load float, float* %ce, align 4 %mul69 = fmul contract float %64, %65 %add70 = fadd contract float %add66, %mul69 %66 = load float*, float** %tIn.addr, align 8 %67 = load i32, i32* %w, align 4 %idxprom71 = sext i32 %67 to i64 %arrayidx72 = getelementptr inbounds float, float* %66, i64 %idxprom71 %68 = load float, float* %arrayidx72, align 4 %69 = load float, float* %cw, align 4 %mul73 = fmul contract float %68, %69 %add74 = fadd contract float %add70, %mul73 %70 = load float*, float** %tIn.addr, align 8 %71 = load i32, i32* %t, align 4 %idxprom75 = sext i32 %71 to i64 %arrayidx76 = getelementptr inbounds float, float* %70, i64 %idxprom75 %72 = load float, float* %arrayidx76, align 4 %73 = load float, float* %ct, align 4 %mul77 = fmul contract float %72, %73 %add78 = fadd contract float %add74, %mul77 %74 = load float*, float** %tIn.addr, align 8 %75 = load i32, i32* %b, align 4 %idxprom79 = sext i32 %75 to i64 %arrayidx80 = getelementptr inbounds float, float* %74, i64 %idxprom79 %76 = load float, float* %arrayidx80, align 4 %77 = load float, float* %cb, align 4 %mul81 = fmul contract float %76, %77 %add82 = fadd contract float %add78, %mul81 %78 = load float, float* %dt.addr, align 4 %79 = load float, float* %Cap.addr, align 4 %div83 = fdiv float %78, %79 %80 = load float*, float** %pIn.addr, align 8 %81 = load i32, i32* %c, align 4 %idxprom84 = sext i32 %81 to i64 %arrayidx85 = getelementptr inbounds float, float* %80, i64 %idxprom84 %82 = load float, float* %arrayidx85, align 4 %mul86 = fmul contract float %div83, %82 %add87 = fadd contract float %add82, %mul86 %83 = load float, float* %ct, align 4 %84 = load float, float* @amb_temp, align 4 %mul88 = fmul contract float %83, %84 %add89 = fadd contract float %add87, %mul88 %85 = load float*, float** %tOut.addr, align 8 %86 = load i32, i32* %c, align 4 %idxprom90 = sext i32 %86 to i64 %arrayidx91 = getelementptr inbounds float, float* %85, i64 %idxprom90 store float %add89, float* %arrayidx91, align 4 br label %for.inc for.inc: ; preds = %cond.end56 %87 = load i32, i32* %x, align 4 %inc = add nsw i32 %87, 1 store i32 %inc, i32* %x, align 4 br label %for.cond13 for.end: ; preds = %for.cond13 br label %for.inc92 for.inc92: ; preds = %for.end %88 = load i32, i32* %y, align 4 %inc93 = add nsw i32 %88, 1 store i32 %inc93, i32* %y, align 4 br label %for.cond10 for.end94: ; preds = %for.cond10 br label %for.inc95 for.inc95: ; preds = %for.end94 %89 = load i32, i32* %z, align 4 %inc96 = add nsw i32 %89, 1 store i32 %inc96, i32* %z, align 4 br label %for.cond for.end97: ; preds = %for.cond %90 = load float*, float** %tIn.addr, align 8 store float* %90, float** %temp, align 8 %91 = load float*, float** %tOut.addr, align 8 store float* %91, float** %tIn.addr, align 8 %92 = load float*, float** %temp, align 8 store float* %92, float** %tOut.addr, align 8 %93 = load i32, i32* %i, align 4 %inc98 = add nsw i32 %93, 1 store i32 %inc98, i32* %i, align 4 br label %do.cond do.cond: ; preds = %for.end97 %94 = load i32, i32* %i, align 4 %95 = load i32, i32* %numiter.addr, align 4 %cmp99 = icmp slt i32 %94, %95 br i1 %cmp99, label %do.body, label %do.end do.end: ; preds = %do.cond ret void } ; Function Attrs: noinline optnone uwtable define dso_local float @_Z8accuracyPfS_i(float* %arr1, float* %arr2, i32 %len) #2 { entry: %arr1.addr = alloca float*, align 8 %arr2.addr = alloca float*, align 8 %len.addr = alloca i32, align 4 %err = alloca float, align 4 %i = alloca i32, align 4 store float* %arr1, float** %arr1.addr, align 8 store float* %arr2, float** %arr2.addr, align 8 store i32 %len, i32* %len.addr, align 4 store float 0.000000e+00, float* %err, align 4 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry %0 = load i32, i32* %i, align 4 %1 = load i32, i32* %len.addr, align 4 %cmp = icmp slt i32 %0, %1 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %2 = load float*, float** %arr1.addr, align 8 %3 = load i32, i32* %i, align 4 %idxprom = sext i32 %3 to i64 %arrayidx = getelementptr inbounds float, float* %2, i64 %idxprom %4 = load float, float* %arrayidx, align 4 %5 = load float*, float** %arr2.addr, align 8 %6 = load i32, i32* %i, align 4 %idxprom1 = sext i32 %6 to i64 %arrayidx2 = getelementptr inbounds float, float* %5, i64 %idxprom1 %7 = load float, float* %arrayidx2, align 4 %sub = fsub contract float %4, %7 %8 = load float*, float** %arr1.addr, align 8 %9 = load i32, i32* %i, align 4 %idxprom3 = sext i32 %9 to i64 %arrayidx4 = getelementptr inbounds float, float* %8, i64 %idxprom3 %10 = load float, float* %arrayidx4, align 4 %11 = load float*, float** %arr2.addr, align 8 %12 = load i32, i32* %i, align 4 %idxprom5 = sext i32 %12 to i64 %arrayidx6 = getelementptr inbounds float, float* %11, i64 %idxprom5 %13 = load float, float* %arrayidx6, align 4 %sub7 = fsub contract float %10, %13 %mul = fmul contract float %sub, %sub7 %14 = load float, float* %err, align 4 %add = fadd contract float %14, %mul store float %add, float* %err, align 4 br label %for.inc for.inc: ; preds = %for.body %15 = load i32, i32* %i, align 4 %inc = add nsw i32 %15, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond %16 = load float, float* %err, align 4 %17 = load i32, i32* %len.addr, align 4 %conv = sitofp i32 %17 to float %div = fdiv float %16, %conv %call = call float @_ZSt4sqrtf(float %div) ret float %call } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local float @_ZSt4sqrtf(float %__x) #0 comdat { entry: %__x.addr = alloca float, align 4 store float %__x, float* %__x.addr, align 4 %0 = load float, float* %__x.addr, align 4 %call = call float @sqrtf(float %0) #8 ret float %call } ; Function Attrs: noinline optnone uwtable define dso_local void @_Z5usageiPPc(i32 %argc, i8** %argv) #2 { entry: %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %1 = load i8**, i8*** %argv.addr, align 8 %arrayidx = getelementptr inbounds i8*, i8** %1, i64 0 %2 = load i8*, i8** %arrayidx, align 8 %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.11, i64 0, i64 0), i8* %2) %3 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([68 x i8], [68 x i8]* @.str.12, i64 0, i64 0)) %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([62 x i8], [62 x i8]* @.str.13, i64 0, i64 0)) %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call3 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.14, i64 0, i64 0)) %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call4 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([83 x i8], [83 x i8]* @.str.15, i64 0, i64 0)) %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([88 x i8], [88 x i8]* @.str.16, i64 0, i64 0)) %8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call6 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.17, i64 0, i64 0)) call void @exit(i32 1) #9 unreachable } ; Function Attrs: noreturn nounwind declare dso_local void @exit(i32) #5 ; Function Attrs: noinline norecurse optnone uwtable define dso_local i32 @main(i32 %argc, i8** %argv) #6 { entry: %retval = alloca i32, align 4 %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 %pfile = alloca i8*, align 8 %tfile = alloca i8*, align 8 %ofile = alloca i8*, align 8 %iterations = alloca i32, align 4 %numCols = alloca i32, align 4 %numRows = alloca i32, align 4 %layers = alloca i32, align 4 %dx = alloca float, align 4 %dy = alloca float, align 4 %dz = alloca float, align 4 %Cap = alloca float, align 4 %Rx = alloca float, align 4 %Ry = alloca float, align 4 %Rz = alloca float, align 4 %max_slope = alloca float, align 4 %dt = alloca float, align 4 %powerIn = alloca float*, align 8 %tempOut = alloca float*, align 8 %tempIn = alloca float*, align 8 %tempCopy = alloca float*, align 8 %size = alloca i32, align 4 %answer = alloca float*, align 8 %acc = alloca float, align 4 store i32 0, i32* %retval, align 4 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %call = call i32 @cudaSetDevice(i32 0) %0 = load i32, i32* %argc.addr, align 4 %cmp = icmp ne i32 %0, 7 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %1 = load i32, i32* %argc.addr, align 4 %2 = load i8**, i8*** %argv.addr, align 8 call void @_Z5usageiPPc(i32 %1, i8** %2) br label %if.end if.end: ; preds = %if.then, %entry %3 = load i8**, i8*** %argv.addr, align 8 %arrayidx = getelementptr inbounds i8*, i8** %3, i64 3 %4 = load i8*, i8** %arrayidx, align 8 %call1 = call i32 @atoi(i8* %4) #10 store i32 %call1, i32* %iterations, align 4 %5 = load i8**, i8*** %argv.addr, align 8 %arrayidx2 = getelementptr inbounds i8*, i8** %5, i64 4 %6 = load i8*, i8** %arrayidx2, align 8 store i8* %6, i8** %pfile, align 8 %7 = load i8**, i8*** %argv.addr, align 8 %arrayidx3 = getelementptr inbounds i8*, i8** %7, i64 5 %8 = load i8*, i8** %arrayidx3, align 8 store i8* %8, i8** %tfile, align 8 %9 = load i8**, i8*** %argv.addr, align 8 %arrayidx4 = getelementptr inbounds i8*, i8** %9, i64 6 %10 = load i8*, i8** %arrayidx4, align 8 store i8* %10, i8** %ofile, align 8 %11 = load i8**, i8*** %argv.addr, align 8 %arrayidx5 = getelementptr inbounds i8*, i8** %11, i64 1 %12 = load i8*, i8** %arrayidx5, align 8 %call6 = call i32 @atoi(i8* %12) #10 store i32 %call6, i32* %numCols, align 4 %13 = load i8**, i8*** %argv.addr, align 8 %arrayidx7 = getelementptr inbounds i8*, i8** %13, i64 1 %14 = load i8*, i8** %arrayidx7, align 8 %call8 = call i32 @atoi(i8* %14) #10 store i32 %call8, i32* %numRows, align 4 %15 = load i8**, i8*** %argv.addr, align 8 %arrayidx9 = getelementptr inbounds i8*, i8** %15, i64 2 %16 = load i8*, i8** %arrayidx9, align 8 %call10 = call i32 @atoi(i8* %16) #10 store i32 %call10, i32* %layers, align 4 %17 = load float, float* @chip_height, align 4 %18 = load i32, i32* %numRows, align 4 %conv = sitofp i32 %18 to float %div = fdiv float %17, %conv store float %div, float* %dx, align 4 %19 = load float, float* @chip_width, align 4 %20 = load i32, i32* %numCols, align 4 %conv11 = sitofp i32 %20 to float %div12 = fdiv float %19, %conv11 store float %div12, float* %dy, align 4 %21 = load float, float* @t_chip, align 4 %22 = load i32, i32* %layers, align 4 %conv13 = sitofp i32 %22 to float %div14 = fdiv float %21, %conv13 store float %div14, float* %dz, align 4 %23 = load float, float* @t_chip, align 4 %conv15 = fpext float %23 to double %mul = fmul contract double 8.750000e+05, %conv15 %24 = load float, float* %dx, align 4 %conv16 = fpext float %24 to double %mul17 = fmul contract double %mul, %conv16 %25 = load float, float* %dy, align 4 %conv18 = fpext float %25 to double %mul19 = fmul contract double %mul17, %conv18 %conv20 = fptrunc double %mul19 to float store float %conv20, float* %Cap, align 4 %26 = load float, float* %dy, align 4 %conv21 = fpext float %26 to double %27 = load float, float* @t_chip, align 4 %conv22 = fpext float %27 to double %mul23 = fmul contract double 2.000000e+02, %conv22 %28 = load float, float* %dx, align 4 %conv24 = fpext float %28 to double %mul25 = fmul contract double %mul23, %conv24 %div26 = fdiv double %conv21, %mul25 %conv27 = fptrunc double %div26 to float store float %conv27, float* %Rx, align 4 %29 = load float, float* %dx, align 4 %conv28 = fpext float %29 to double %30 = load float, float* @t_chip, align 4 %conv29 = fpext float %30 to double %mul30 = fmul contract double 2.000000e+02, %conv29 %31 = load float, float* %dy, align 4 %conv31 = fpext float %31 to double %mul32 = fmul contract double %mul30, %conv31 %div33 = fdiv double %conv28, %mul32 %conv34 = fptrunc double %div33 to float store float %conv34, float* %Ry, align 4 %32 = load float, float* %dz, align 4 %33 = load float, float* %dx, align 4 %mul35 = fmul contract float 1.000000e+02, %33 %34 = load float, float* %dy, align 4 %mul36 = fmul contract float %mul35, %34 %div37 = fdiv float %32, %mul36 store float %div37, float* %Rz, align 4 %35 = load float, float* @t_chip, align 4 %conv38 = fpext float %35 to double %mul39 = fmul contract double 5.000000e-01, %conv38 %mul40 = fmul contract double %mul39, 1.750000e+06 %div41 = fdiv double 3.000000e+06, %mul40 %conv42 = fptrunc double %div41 to float store float %conv42, float* %max_slope, align 4 %36 = load float, float* %max_slope, align 4 %conv43 = fpext float %36 to double %div44 = fdiv double 1.000000e-03, %conv43 %conv45 = fptrunc double %div44 to float store float %conv45, float* %dt, align 4 %37 = load i32, i32* %numCols, align 4 %38 = load i32, i32* %numRows, align 4 %mul46 = mul nsw i32 %37, %38 %39 = load i32, i32* %layers, align 4 %mul47 = mul nsw i32 %mul46, %39 store i32 %mul47, i32* %size, align 4 %40 = load i32, i32* %size, align 4 %conv48 = sext i32 %40 to i64 %call49 = call noalias i8* @calloc(i64 %conv48, i64 4) #8 %41 = bitcast i8* %call49 to float* store float* %41, float** %powerIn, align 8 %42 = load i32, i32* %size, align 4 %conv50 = sext i32 %42 to i64 %mul51 = mul i64 %conv50, 4 %call52 = call noalias i8* @malloc(i64 %mul51) #8 %43 = bitcast i8* %call52 to float* store float* %43, float** %tempCopy, align 8 %44 = load i32, i32* %size, align 4 %conv53 = sext i32 %44 to i64 %call54 = call noalias i8* @calloc(i64 %conv53, i64 4) #8 %45 = bitcast i8* %call54 to float* store float* %45, float** %tempIn, align 8 %46 = load i32, i32* %size, align 4 %conv55 = sext i32 %46 to i64 %call56 = call noalias i8* @calloc(i64 %conv55, i64 4) #8 %47 = bitcast i8* %call56 to float* store float* %47, float** %tempOut, align 8 %48 = load i32, i32* %size, align 4 %conv57 = sext i32 %48 to i64 %call58 = call noalias i8* @calloc(i64 %conv57, i64 4) #8 %49 = bitcast i8* %call58 to float* store float* %49, float** %answer, align 8 %50 = load float*, float** %powerIn, align 8 %51 = load i32, i32* %numRows, align 4 %52 = load i32, i32* %numCols, align 4 %53 = load i32, i32* %layers, align 4 %54 = load i8*, i8** %pfile, align 8 call void @_Z9readinputPfiiiPc(float* %50, i32 %51, i32 %52, i32 %53, i8* %54) %55 = load float*, float** %tempIn, align 8 %56 = load i32, i32* %numRows, align 4 %57 = load i32, i32* %numCols, align 4 %58 = load i32, i32* %layers, align 4 %59 = load i8*, i8** %tfile, align 8 call void @_Z9readinputPfiiiPc(float* %55, i32 %56, i32 %57, i32 %58, i8* %59) %60 = load float*, float** %tempCopy, align 8 %61 = bitcast float* %60 to i8* %62 = load float*, float** %tempIn, align 8 %63 = bitcast float* %62 to i8* %64 = load i32, i32* %size, align 4 %conv59 = sext i32 %64 to i64 %mul60 = mul i64 %conv59, 4 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %61, i8* align 4 %63, i64 %mul60, i1 false) %65 = load float*, float** %powerIn, align 8 %66 = load float*, float** %tempIn, align 8 %67 = load float*, float** %tempOut, align 8 %68 = load i32, i32* %numCols, align 4 %69 = load i32, i32* %numRows, align 4 %70 = load i32, i32* %layers, align 4 %71 = load float, float* %Cap, align 4 %72 = load float, float* %Rx, align 4 %73 = load float, float* %Ry, align 4 %74 = load float, float* %Rz, align 4 %75 = load float, float* %dt, align 4 %76 = load i32, i32* %iterations, align 4 call void @_Z12hotspot_opt1PfS_S_iiifffffi(float* %65, float* %66, float* %67, i32 %68, i32 %69, i32 %70, float %71, float %72, float %73, float %74, float %75, i32 %76) %77 = load float*, float** %powerIn, align 8 %78 = load float*, float** %tempCopy, align 8 %79 = load float*, float** %answer, align 8 %80 = load i32, i32* %numCols, align 4 %81 = load i32, i32* %numRows, align 4 %82 = load i32, i32* %layers, align 4 %83 = load float, float* %Cap, align 4 %84 = load float, float* %Rx, align 4 %85 = load float, float* %Ry, align 4 %86 = load float, float* %Rz, align 4 %87 = load float, float* %dt, align 4 %88 = load i32, i32* %iterations, align 4 call void @_Z14computeTempCPUPfS_S_iiifffffi(float* %77, float* %78, float* %79, i32 %80, i32 %81, i32 %82, float %83, float %84, float %85, float %86, float %87, i32 %88) %89 = load float*, float** %tempOut, align 8 %90 = load float*, float** %answer, align 8 %91 = load i32, i32* %numRows, align 4 %92 = load i32, i32* %numCols, align 4 %mul61 = mul nsw i32 %91, %92 %93 = load i32, i32* %layers, align 4 %mul62 = mul nsw i32 %mul61, %93 %call63 = call float @_Z8accuracyPfS_i(float* %89, float* %90, i32 %mul62) store float %call63, float* %acc, align 4 %94 = load float, float* %acc, align 4 %conv64 = fpext float %94 to double %call65 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.18, i64 0, i64 0), double %conv64) %95 = load float*, float** %tempOut, align 8 %96 = load i32, i32* %numRows, align 4 %97 = load i32, i32* %numCols, align 4 %98 = load i32, i32* %layers, align 4 %99 = load i8*, i8** %ofile, align 8 call void @_Z11writeoutputPfiiiPc(float* %95, i32 %96, i32 %97, i32 %98, i8* %99) %100 = load float*, float** %tempIn, align 8 %101 = bitcast float* %100 to i8* call void @free(i8* %101) #8 %102 = load float*, float** %tempOut, align 8 %103 = bitcast float* %102 to i8* call void @free(i8* %103) #8 %104 = load float*, float** %powerIn, align 8 %105 = bitcast float* %104 to i8* call void @free(i8* %105) #8 ret i32 0 } declare dso_local i32 @cudaSetDevice(i32) #4 ; Function Attrs: nounwind readonly declare dso_local i32 @atoi(i8*) #7 ; Function Attrs: nounwind declare dso_local noalias i8* @calloc(i64, i64) #1 ; Function Attrs: nounwind declare dso_local noalias i8* @malloc(i64) #1 ; Function Attrs: nounwind declare dso_local void @free(i8*) #1 ; Function Attrs: nounwind declare dso_local float @sqrtf(float) #1 define internal void @__cuda_register_globals(i8** %0) { entry: %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, float*, float*, float, i32, i32, i32, float, float, float, float, float, float, float)* @_Z11hotspotOpt1PfS_S_fiiifffffff to i8*), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) ret void } declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) declare dso_local i8** @__cudaRegisterFatBinary(i8*) define internal void @__cuda_module_ctor(i8* %0) { entry: %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) store i8** %1, i8*** @__cuda_gpubin_handle, align 8 call void @__cuda_register_globals(i8** %1) call void @__cudaRegisterFatBinaryEnd(i8** %1) %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) ret void } declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) declare dso_local void @__cudaUnregisterFatBinary(i8**) define internal void @__cuda_module_dtor(i8* %0) { entry: %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 call void @__cudaUnregisterFatBinary(i8** %1) ret void } declare dso_local i32 @atexit(void (i8*)*) attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { argmemonly nounwind willreturn } attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #6 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #7 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #8 = { nounwind } attributes #9 = { noreturn nounwind } attributes #10 = { nounwind readonly } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} !1 = !{i32 1, !"wchar_size", i32 4} !2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}