; ModuleID = 'hotspot-host-x86_64-unknown-linux-gnu.bc' source_filename = "hotspot.cu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } %struct.dim3 = type { i32, i32, i32 } %struct.CUstream_st = type opaque $_ZN4dim3C2Ejjj = comdat any @t_chip = dso_local global float 0x3F40624DE0000000, align 4 @chip_height = dso_local global float 0x3F90624DE0000000, align 4 @chip_width = dso_local global float 0x3F90624DE0000000, align 4 @amb_temp = dso_local global float 8.000000e+01, align 4 @stderr = external dso_local global %struct._IO_FILE*, align 8 @.str = private unnamed_addr constant [11 x i8] c"error: %s\0A\00", align 1 @.str.1 = private unnamed_addr constant [2 x i8] c"w\00", align 1 @.str.2 = private unnamed_addr constant [25 x i8] c"The file was not opened\0A\00", align 1 @.str.3 = private unnamed_addr constant [7 x i8] c"%d\09%g\0A\00", align 1 @.str.4 = private unnamed_addr constant [2 x i8] c"r\00", align 1 @.str.5 = private unnamed_addr constant [25 x i8] c"not enough lines in file\00", align 1 @.str.6 = private unnamed_addr constant [3 x i8] c"%f\00", align 1 @.str.7 = private unnamed_addr constant [20 x i8] c"invalid file format\00", align 1 @.str.8 = private unnamed_addr constant [100 x i8] c"Usage: %s \0A\00", align 1 @.str.9 = private unnamed_addr constant [78 x i8] c"\09 - number of rows/cols in the grid (positive integer)\0A\00", align 1 @.str.10 = private unnamed_addr constant [53 x i8] c"\09 - pyramid heigh(positive integer)\0A\00", align 1 @.str.11 = private unnamed_addr constant [38 x i8] c"\09 - number of iterations\0A\00", align 1 @.str.12 = private unnamed_addr constant [89 x i8] c"\09 - name of the file containing the initial temperature values of each cell\0A\00", align 1 @.str.13 = private unnamed_addr constant [86 x i8] c"\09 - name of the file containing the dissipated power values of each cell\0A\00", align 1 @.str.14 = private unnamed_addr constant [42 x i8] c"\09 - name of the output file\0A\00", align 1 @.str.15 = private unnamed_addr constant [29 x i8] c"WG size of kernel = %d X %d\0A\00", align 1 @.str.16 = private unnamed_addr constant [26 x i8] c"unable to allocate memory\00", align 1 @.str.17 = private unnamed_addr constant [94 x i8] c"pyramidHeight: %d\0AgridSize: [%d, %d]\0Aborder:[%d, %d]\0AblockGrid:[%d, %d]\0AtargetBlock:[%d, %d]\0A\00", align 1 @.str.18 = private unnamed_addr constant [43 x i8] c"Start computing the transient temperature\0A\00", align 1 @.str.19 = private unnamed_addr constant [19 x i8] c"Ending simulation\0A\00", align 1 @0 = private unnamed_addr constant [36 x i8] c"_Z14calculate_tempiPfS_S_iiiiffffff\00", align 1 @1 = private constant [35409 x i8] c"P\EDU\BA\01\00\10\00@\8A\00\00\00\00\00\00\02\00\01\01@\00\00\00\A8v\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\00v\00\00\00\00\00\00\80s\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0A\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.info._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.shared._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.global\00.nv.constant0._Z14calculate_tempiPfS_S_iiiiffffff\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z14calculate_tempiPfS_S_iiiiffffff\00.text._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.info._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.shared._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.global\00blockIdx\00threadIdx\00$_Z14calculate_tempiPfS_S_iiiiffffff$__cuda_sm20_rcp_rn_f32_slowpath\00$_Z14calculate_tempiPfS_S_iiiiffffff$__cuda_sm3x_div_rn_noftz_f32\00$_Z14calculate_tempiPfS_S_iiiiffffff$__cuda_sm3x_div_rn_noftz_f32_slowpath\00$___ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda__196\00$___ZZ14calculate_tempiPfS_S_iiiiffffffE13power_on_cuda__198\00$___ZZ14calculate_tempiPfS_S_iiiiffffffE6temp_t__200\00.nv.constant0._Z14calculate_tempiPfS_S_iiiiffffff\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00V\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\AD\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\DC\00\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\E7\00\00\00\01\00\09\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\F0\00\00\00\01\00\09\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\FA\00\00\00\22\00\07\00`]\00\00\00\00\00\00 \04\00\00\00\00\00\00?\01\00\00\22\00\07\00\80a\00\00\00\00\00\00`\01\00\00\00\00\00\00\81\01\00\00\22\00\07\00\E0b\00\00\00\00\00\00`\08\00\00\00\00\00\00z\02\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00@k\00\00\00\00\00\00\04/\08\00\0A\00\00\00\17\00\00\00\04#\08\00\08\00\00\00\00\00\00\00\04\12\08\00\08\00\00\00\00\00\00\00\04\11\08\00\08\00\00\00\00\00\00\00\04#\08\00\07\00\00\00\00\00\00\00\04\12\08\00\07\00\00\00\00\00\00\00\04\11\08\00\07\00\00\00\00\00\00\00\04#\08\00\06\00\00\00\00\00\00\00\04\12\08\00\06\00\00\00\00\00\00\00\04\11\08\00\06\00\00\00\00\00\00\00\04#\08\00\0A\00\00\00\00\00\00\00\04\12\08\00\0A\00\00\00\C0\00\00\00\04\11\08\00\0A\00\00\00\C0\00\00\00\010\00\00\01*\00\00\04\0A\08\00\09\00\00\00@\01H\00\03\19H\00\04\17\0C\00\00\00\00\00\0D\00D\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0C\00@\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0B\00<\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0A\008\00\00\F0\11\00\04\17\0C\00\00\00\00\00\09\004\00\00\F0\11\00\04\17\0C\00\00\00\00\00\08\000\00\00\F0\11\00\04\17\0C\00\00\00\00\00\07\00,\00\00\F0\11\00\04\17\0C\00\00\00\00\00\06\00(\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00$\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0\11\00\03\1B\FF\00\04\1D\08\00h\09\00\00\D8\09\00\00\04\1C\04\00X]\00\00\04\1E\04\00\B0\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F1\1Evisible .entry _Z14calculate_tempiPfS_S_iiiif\01\00\06\AC\04\00x\00\0F1\00\10\0E\99\04\00\DF\00\0F9\00\17\1F19\00%\1F29\00%/3,\E4\00$\1F49\00%\1F59\00%\1F69\00%\1679\00\1Ff9\00\1A\1F89\00%\1F99\00%/10:\00&\1F1:\00&\1F2:\00&\0F\9A\0A\14O6[19\9B\0A\16\A6pred %p<25U\06\8516 %rs<7\12\00\00\A8\00k%f<36>\8A\06'14%\00\00Z\00]fd<15\9F\06 88\A0\06P\09.shao\00\03\BB\00\124\BB\00\1FZ\01\01\0F0E12\18\00@_on_G\05o[1024]R\00,o3powerS\009\116\A4\00\14tK\00\0F\8F\07\08\1F6\8F\07\12\02v\01O6, [$\02\19\1D]B\00\1F5B\00\1B\1E2B\00\1F4B\00\1B\1E1B\00\1F3B\00\1B\1E0B\00\1F2B\00\1A\1E9A\00\1F1A\00\1A\1E8p\08\1F2I\01\1B\1F7B\00\00\0FI\01\1B\1F6B\00\00\0FI\01\1B\1F5B\00\00\0FI\01\1B\0F\CD\09\01\0F\84\00\1B\1F3f\09\00\0F\84\00\1B\0F\A8\09\01\0F\CE\01\1B\1F1\08\01\00\0FB\00\1B#0]\B4\03#to\8C\19\04\8A\00\144H\09\01\1F\00\0A\1C\00\115\1C\00\1F4;\00\05\146\B0\09\0F;\00\00\117\1C\00\1F6;\00\05\148\01\0A\0F;\00\00\119\1C\00\1A8\DA\09\03y\0E\0F3\0A\03\1A9\16\00\033\0A/d74\0A\03\1F54\0A\03\0Bx\0A\123E\00\1B2b\0A\134\89\00\1B4\17\00\02\\\00\122\\\00\15f\17\00\01\A1\00*f1\16\00\115r\00+f2\16\00\01q\00*f3\16\00\116p\00+f4\16\00\01o\00\1Bfn\00\126n\00\196\D4\0F\DA6, 1117782016\B5\00\137\FA\00\0A1\00\00\BA\01zctaid.x/\00\139/\00\197/\00\158/\00\1By/\00\03A\01\198/\00\00\DD\01\1Et\\\00#10X\01\199\BB\0B\130.\00\1Cy.\00\02o\01!30\18\03\02!\05%7,\1C\01\07\16\00%8,\8A\01\83;\0Adiv.rn\1A\00\229,5\00:%f8G\01\127\89\01\179F\00510,\BB\01V;\0ArcpG\00!11\E2\01\1A0D\00\128\B7\01(11E\00%2,\EA\01\0CE\00\113.\00\0B\12\02\128\E6\01(13E\00%4,\19\02\0CE\00!5,!\00\0BE\00\03\83\02\115E\00\03X\01$1,V\03S;\0Ashl\10\09332,\1D\00\09\E6\15A33, ;\02Bub.s\12\00#4,\18\00\005\00\0C\9C\01\02\95\03(34q\00\1F5q\00\03#6,\1D\00\191_\00\177_\00\0B\B4\02\131\07\18(37_\00&8,}\00\08\18\00%9,\8C\02\83;\0Amul.lod\00340,9\00\00'\00\074\00541,\D3\03\08\95\00542,7\00\1C4\9B\18\03r\04(42I\00&3,\B3\00\08\18\00%4,P\03\0C\95\00#5,9\00\00'\00\084\00%6,\7F\04\09\95\00&7,7\00\0C*\01\122S\03\184*\01648,\B3\00T;\0AaddJ\00#9,\1F\00,15H\00\02m\03\184&\01650,f\00\08H\00351,\1F\00\0EH\00\02\19\02(51H\00\192\90\00\06\18\00&3,\EB\03\09`\00#4,7\00\00%\00\0Bb\00\03\CB\05\185;\01)55\AA\00\06\18\00&6,{\04\09\C2\00#7,7\00\00%\00\0Cb\00\02\E9\01\185T\01658,\80\00\0B0\00\03/\06(580\00&9,N\00\0C0\00\02l\01\185l\01\146l\01\193H\02661,e\00\0BI\02362,9\00\00(\00\085\00\05X\01\1A4X\01664,8\00\0CX\01\02\C7\06(r6X\01)65\7F\00rsetp.ltN\003p1,\22\00\F2\0C0;\0A@%p1 bra LBB6_5;\0Abra.uni\10\0021;\0A\08\00\17:[\00\196[\00\06\18\00\04\E3\05\1A3\BC\02368,\1E\00#-1\8A\00\14g\8A\00#2,Q\00\00'\00\01\8D\00\1F2\8D\00\07\132\8D\00\182\8D\00\1992\01\0A\E8\00#3,\22\00\02\E8\00\1F3[\00\07\133[\00\173[\00)70[\00\06\18\00\181\F1\01\06\E8\00372,\1E\00\0E\E8\00#4,Q\00\00'\00\01\8D\00\1F4\8D\00\07\134\8D\00\124\8D\00\03\93\0A\05\8E\00\03\8A\03\02u\00&d1h\02\148\BE\05\032\00$2,!\00\132\A8\00\03\19\00$3,R\00\01'\00\08\C0\06\01\DC\01\00#\00\0Ae\00\194\F0\03\08e\00$5,!\00\176\96\0E rdJ\00\0F~\0F \03s\0A\02'\0F\05L\00\02\AF\0A*16\C7\00(8,\1D\00\195\B1\00\199?\04\07\B1\00\132\F6\00\1C9\16\01\00\B3\0B\06V\00\1822\08\00\1D\00\02\A7\07'6;\93\01\142\1E\05\198-\01/23\92\01\05\00\A2\0D\03!\00\0B|\00$5,Q\00\01'\00\08\92\01\227,\82\00\1A5e\00\196\92\01\07y\01\00\22\0A\0F\A5\10!\0Fz\01\02\132\0E\01\1A7\C8\00$9,\84\00\0A\93\01\01\12\0A\066\00\189\B2\00/31\93\01\05\02\D9\08-d3\A9\02\02d\08\04V\00)32\93\01\2233\93\01\1B7>\03\135>\03\D85:\0Abar.sync 0\BF\03\193\D4\06\0A\A8\03#5,\22\00!-1\A6\03\165\A6\03\0Ch\00\136h\00\1863\04\195\\\00Tneg.sN\18\00\1E\00\09\9E\1F#39:\17\09W\00\138W\00'7:,\00!74\C4\04\0D=\00/74>\00\04*8:9\1C\012\00\0B7\06\155\8E\17\06\1C\01\05V\02\1A2\F3\04\0F\DB\05\04\117\E9\01\187\DB\05#leK\01#6,Q\00\00'\00\01M\01\166M\01+10\B9\00\139\B9\00\179N\01/80\8E\00\03(81\8E\00\06o\09382,\1E\00\00<\00\08\A8\00#3,\1F\00)14F\01\03g\0A\0C\F5\06\05\F6\06\190\85\01\1296\09\0D@\00/79A\00\06\181A\00\224,4\00\0C\88\01\03\9E\0A\08\E2\00\193\16\09\0A\A4\02\01\F0\04\168\A4\02\177W\01\0D\CB\00\044\07\1815\07\1A8u\09\06\A7\02\01\FF\02\1B8\E4\00\023\0A\0CY\00\04\A5\06(13\E3\00\1D8\A9\02\02?\00/84@\00\06\184@\00\01/\02\1D1\C1\0B\02g\0F(r6#\01\06\AB\02\09U\0C/87\9E\07\03\118\AB\02\1F8\AB\02\02#8,Q\00\00'\00\01T\01\178T\01\1C6\BB\00\04b\04'15T\01/90\90\00\03\199.\08\06\AD\02392,\1E\00\00<\00\09U\03\01\13\00\0F\AD\02\00\02`\08\0C\8E\00\147\8E\00\196\89\01\0F\AD\02\00\02\1F\03\1F9A\00\06\09s\04\228,4\00\0C\8A\01\02\02\0B\081\0B\1F9\0D\0C\05394,\1F\00\1D-\D1\01\02\B3\0A\189\F5\02/95H\00\05#6,\1F\00\0CG\00\03#\11\189\19\02/97:\0C\04398,\1F\00\0D\8F\00\02T\10)r9\D7\00\1F9H\00\04C100, \00\0C\90\00\138P%\180q\06%10\FA\06\196\1B\02F102,\8C\05\04\E7\03\04\95\02$9,<\00\01*\00\01\97\02\179\97\02\0D\C8\01\04\FC\05(18\97\02\09b\00\08\0E\02\01\14\05\0CE\00$20E\00\08\89\05\07|\0A\1F8F\00\01/10G\00\05\192B\05\01\8D\11\005\00\0F\0E\02\03)11\1D\01\05W\02*726\01&4,:\05\0C\CB\03\02\D5\0A#10\98\00\124\E1\0C\1608\01,22\AC\00\04\AD\05\182\E5\0C)12e\00\08\F3\00\02\C7\05\0DG\00\04C\0C\182j\05\1A1\C5\00\0EG\00\1F3G\00\06\09X\05\131<\06\1F4\01\03\03)14!\01\05I\03*76:\01&6,Q\05\0Cp\02\03\9D\01\2205\07\00\126:\01\07\1C\0E\1D2\90\0D\04\C4\05(24\F3\00\195e\00\08\F3\00\02[\00\0DG\00\04J\09\182P\05)16\C5\00\0EG\00\1F6G\00\06\09\09\05\02\1F\0C?145\F3\03\03)17!\01\05<\04*80:\01&8,\01\05\0Dt\02$2,=\00\01+\00\02:\01\07\C9\0E,28\AC\00\04t\05(27\F3\00\0Ae\00\08\F3\00\02\18\07\0DG\00\046\09\182\F3\03)19\C5\00\0EG\00\1F9G\00\06\1997\0A\02K\03/46\E5\04\03\1A2\E8\07\120g\0F\0A*\00\03\D8\03\1C0r\00\143 \04(30\B9\00\06h\04\1A8)\05\05\E5\0E\0F\B7\02\00$3,;\00\01)\00\02}\01\07\EB\0F,46}\00\04\F1\03\143\9E\09\02\93\1E\151\C1\00\148\86\00\02\ED\06\19s\98\04)12\18\06\07\A5\00\193\BE\00\071\06\03\BE\03\00!\00\1D1\8C\11$4,V\00\01*\00\02\C0\00\07\1E\10,40\C0\00\04\D4\09\193j\04/15\98\00\05\196\98\00\08\8F\05\127e\08\06\96\08\121\FF\0E\1316\03,16}\0A\02\F0\0F\141\07\00\03\EC\02\1F5\AF\00\09\05\\\11\193\AF\00\199\EE\07\07y\05\1F0G\01\05\02\A2\0F\01!\00\0EG\01$6,V\00\01*\00\02\98\00\1F6\98\00\09\04\BE\04\193\BE\04/22\98\00\05\1F3G\01\05-24G\01\02\D1\0F#12,\06\1D3G\01$7,m\00\01.\00\02\AF\00\1F7\AF\00\09\04v\0A\193&\05/26\F6\01\04.27\EC\05\06\01\14$8,=\00\01+\00\02\7F\00\1F8\7F\00\09\04^\05)36\7F\00\1F8\7F\00\05\1E91\05\06\FE\00$9,=\00\01+\00\02\7F\00\1F9\7F\00\09\041\05\1931\05/30\AD\01\04.31Z\09\05\FE\00\03\D9\04\133\07\00\02,\02/20\7F\00\09\04\\\09\193i\05/32\7F\00\05\1E3\A3\08\06\F4\14$1,=\00\01+\00\02\7F\00\1F1\7F\00\09\04\A1\05)39\B2\04\00$\0C\0F\B2\04\03\192z\11\1F4,\12\05/35\A5\137\02.\1A:d35\DC\11$7,\83\00\0B+\12(8,6\00\197\B1\00\0F\BE\13\06\02q\0F-d3\BE\13\02g\0E\04V\00(40B\13\128B\13\2241f\1F\00L$\03\1B\00\12d\1E\1C\188.\00%9,h\1C\0D/\00!2,\22\00\09D\01/42p\138\02P\0B+d4\DD\00'4,\1D\00*37\1D\00\03\07 \1E4\FA\00#20\FA\00\1E5\CB\00\01\D3\1C(20w\01)46\AD\0A\08w\01\02\C6\01\1D4\F1\13\194\C6\01\1B4\9A\00'9,$\00\0A\9A\00\131\9A\00\199\D6\14\1A5i\0C\07;\15\02\B9\1A\01!\00\0A\82\00(52\82\00*51\1D\00'3,$\00\0A\82\00\122\82\00#53e\06\05\A9\1D\00\E9\05\02\A2\00\00&\00\0D9\01\114\18\00a3;\0Afma5\00\02\9A,P\02J0dC0\01\00\01\1A\00\08\D4\15\01\AA#\03C\1E\0D`\00!6,\22\00\0C`\00&7,f\00\01#\00)d3:\01\194\F5\09\08:\01$5,!\00\0A3\03(563\03(55\A1\00\135\1D\01\0Al\18)57\94\0B\08e\00\03E\00\1D7e\00\199e\00\08j\03#26e\00\1D9\82\01#7,\85\00\00&\00\0D\22\01\118\18\00\1C7\22\01\1F9\82\01\0C\09|\00%8,\0A \0E\15\04\010\00,28a\00\02L \04h\00\02U\00\187U\00\05;\04\04\EB\11\05\D2\00\01l\06\01!\00\0Bo\04\05\A9\06#88&\1C\064\00%2,:\00.31\A6\00\02\1F\00\03o\01\07\A6\00'3,\AD\00-12\C6\00%4,\CB\04\05-\00\02^\00\02{\00\03)\00\133\1B\00\194\EE\04\00\B5\1C\0F\B0(\19\0F\E6\04\02\126\CD\03)60+\02\02\DE\1C\04\1D\00\0B\E6\04\00\1A\1D\06$\00\1A4\9E\19\126\0B\18\1C3M\0F\144g\0C/40\0D\18\04?134s\0C\04\09\14!\07\FD\11\02i\04\01\1F\00\05I\14\14n\8D\0C\03\D0\09\133\DC\0F\02D\0F\172\0A\0E,42\A3\00\04\8D\0C;41:\1A\00\04r\09\134\E7\0B\108\D9\07\06\85\0A\01$\13$ndQ+\01s\00\17sb\0C\22eq\1B\00\11p\D0\04\01!\00\00\EA\08\10!\11\00\07\91\00\0C\99\15$43w\00'3:\FB\03\1F6\02\1C\06\00H\1E\03!\00\0A\02\1C/66+\021\03\AA&\0A\F5\05\02S\1E\05\1D\00\09\FB\1B\1F6=\08\06\02\80\1D-d6=\08\00\B4\1D\06V\00\187'\06\02 \02\00#\00\0A:\09/72:\097\2273x\00\0B\95\00'4,\1D\00*65\1D\00(5,$\00\0A\0F\03\2275\0F\03\0D\DB\01\05Y\1E\1A4\0F\03\0A&\00\04i\0C\194i\0C/37\97\0D\05\02\DC\05\01!\00\1F1\01\10\04/38\01\10\05'46\DD\02\1F5\DD\02\05#6,\1F\00\0E\DD\02\02\F8\02\166\DD\02\08;\0F\0Cw\00$47w\00\187\DD\02/76\DD\02\05\00\DB\1A\03!\00\0A\DD\02/78\DD\020\2279^\00\198\DC\01\00\F7\1A\07\1D\00\09\1A\0B\1F8E\1D\06\02\FD\1A-d8E\1D\00*\1A\06V\00\188\EE\1F\133\E7\07883]\D8\1E\00\C5\19\04\93)\09}\00\1F5\D9\1E\05\02<\19\01!\00\0B}\00$7,R\00\01'\00\09\A8\02\2287\A8\02\0D\00!\04n\0D\C048:\0Aret;\0A\0A}\0A\00\00\00", section ".nv_fatbin", align 8 @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([35409 x i8], [35409 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 @__cuda_gpubin_handle = internal global i8** null, align 8 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] ; Function Attrs: noinline optnone uwtable define dso_local void @_Z5fatalPc(i8* %s) #0 { entry: %s.addr = alloca i8*, align 8 store i8* %s, i8** %s.addr, align 8 %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %1 = load i8*, i8** %s.addr, align 8 %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i8* %1) ret void } declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z11writeoutputPfiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i8* %file) #0 { entry: %vect.addr = alloca float*, align 8 %grid_rows.addr = alloca i32, align 4 %grid_cols.addr = alloca i32, align 4 %file.addr = alloca i8*, align 8 %i = alloca i32, align 4 %j = alloca i32, align 4 %index = alloca i32, align 4 %fp = alloca %struct._IO_FILE*, align 8 %str = alloca [256 x i8], align 16 store float* %vect, float** %vect.addr, align 8 store i32 %grid_rows, i32* %grid_rows.addr, align 4 store i32 %grid_cols, i32* %grid_cols.addr, align 4 store i8* %file, i8** %file.addr, align 8 store i32 0, i32* %index, align 4 %0 = load i8*, i8** %file.addr, align 8 %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0)) store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 %cmp = icmp eq %struct._IO_FILE* %call, null br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.2, i64 0, i64 0)) br label %if.end if.end: ; preds = %if.then, %entry store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc10, %if.end %1 = load i32, i32* %i, align 4 %2 = load i32, i32* %grid_rows.addr, align 4 %cmp2 = icmp slt i32 %1, %2 br i1 %cmp2, label %for.body, label %for.end12 for.body: ; preds = %for.cond store i32 0, i32* %j, align 4 br label %for.cond3 for.cond3: ; preds = %for.inc, %for.body %3 = load i32, i32* %j, align 4 %4 = load i32, i32* %grid_cols.addr, align 4 %cmp4 = icmp slt i32 %3, %4 br i1 %cmp4, label %for.body5, label %for.end for.body5: ; preds = %for.cond3 %arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 %5 = load i32, i32* %index, align 4 %6 = load float*, float** %vect.addr, align 8 %7 = load i32, i32* %i, align 4 %8 = load i32, i32* %grid_cols.addr, align 4 %mul = mul nsw i32 %7, %8 %9 = load i32, i32* %j, align 4 %add = add nsw i32 %mul, %9 %idxprom = sext i32 %add to i64 %arrayidx = getelementptr inbounds float, float* %6, i64 %idxprom %10 = load float, float* %arrayidx, align 4 %conv = fpext float %10 to double %call6 = call i32 (i8*, i8*, ...) @sprintf(i8* %arraydecay, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.3, i64 0, i64 0), i32 %5, double %conv) #8 %arraydecay7 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 %11 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call8 = call i32 @fputs(i8* %arraydecay7, %struct._IO_FILE* %11) %12 = load i32, i32* %index, align 4 %inc = add nsw i32 %12, 1 store i32 %inc, i32* %index, align 4 br label %for.inc for.inc: ; preds = %for.body5 %13 = load i32, i32* %j, align 4 %inc9 = add nsw i32 %13, 1 store i32 %inc9, i32* %j, align 4 br label %for.cond3 for.end: ; preds = %for.cond3 br label %for.inc10 for.inc10: ; preds = %for.end %14 = load i32, i32* %i, align 4 %inc11 = add nsw i32 %14, 1 store i32 %inc11, i32* %i, align 4 br label %for.cond for.end12: ; preds = %for.cond %15 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call13 = call i32 @fclose(%struct._IO_FILE* %15) ret void } declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #1 declare dso_local i32 @printf(i8*, ...) #1 ; Function Attrs: nounwind declare dso_local i32 @sprintf(i8*, i8*, ...) #2 declare dso_local i32 @fputs(i8*, %struct._IO_FILE*) #1 declare dso_local i32 @fclose(%struct._IO_FILE*) #1 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z9readinputPfiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i8* %file) #0 { entry: %vect.addr = alloca float*, align 8 %grid_rows.addr = alloca i32, align 4 %grid_cols.addr = alloca i32, align 4 %file.addr = alloca i8*, align 8 %i = alloca i32, align 4 %j = alloca i32, align 4 %fp = alloca %struct._IO_FILE*, align 8 %str = alloca [256 x i8], align 16 %val = alloca float, align 4 store float* %vect, float** %vect.addr, align 8 store i32 %grid_rows, i32* %grid_rows.addr, align 4 store i32 %grid_cols, i32* %grid_cols.addr, align 4 store i8* %file, i8** %file.addr, align 8 %0 = load i8*, i8** %file.addr, align 8 %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i64 0, i64 0)) store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 %cmp = icmp eq %struct._IO_FILE* %call, null br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.2, i64 0, i64 0)) br label %if.end if.end: ; preds = %if.then, %entry store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc16, %if.end %1 = load i32, i32* %i, align 4 %2 = load i32, i32* %grid_rows.addr, align 4 %sub = sub nsw i32 %2, 1 %cmp2 = icmp sle i32 %1, %sub br i1 %cmp2, label %for.body, label %for.end18 for.body: ; preds = %for.cond store i32 0, i32* %j, align 4 br label %for.cond3 for.cond3: ; preds = %for.inc, %for.body %3 = load i32, i32* %j, align 4 %4 = load i32, i32* %grid_cols.addr, align 4 %sub4 = sub nsw i32 %4, 1 %cmp5 = icmp sle i32 %3, %sub4 br i1 %cmp5, label %for.body6, label %for.end for.body6: ; preds = %for.cond3 %arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 %5 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call7 = call i8* @fgets(i8* %arraydecay, i32 256, %struct._IO_FILE* %5) %6 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call8 = call i32 @feof(%struct._IO_FILE* %6) #8 %tobool = icmp ne i32 %call8, 0 br i1 %tobool, label %if.then9, label %if.end10 if.then9: ; preds = %for.body6 call void @_Z5fatalPc(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.5, i64 0, i64 0)) br label %if.end10 if.end10: ; preds = %if.then9, %for.body6 %arraydecay11 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 %call12 = call i32 (i8*, i8*, ...) @sscanf(i8* %arraydecay11, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.6, i64 0, i64 0), float* %val) #8 %cmp13 = icmp ne i32 %call12, 1 br i1 %cmp13, label %if.then14, label %if.end15 if.then14: ; preds = %if.end10 call void @_Z5fatalPc(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.7, i64 0, i64 0)) br label %if.end15 if.end15: ; preds = %if.then14, %if.end10 %7 = load float, float* %val, align 4 %8 = load float*, float** %vect.addr, align 8 %9 = load i32, i32* %i, align 4 %10 = load i32, i32* %grid_cols.addr, align 4 %mul = mul nsw i32 %9, %10 %11 = load i32, i32* %j, align 4 %add = add nsw i32 %mul, %11 %idxprom = sext i32 %add to i64 %arrayidx = getelementptr inbounds float, float* %8, i64 %idxprom store float %7, float* %arrayidx, align 4 br label %for.inc for.inc: ; preds = %if.end15 %12 = load i32, i32* %j, align 4 %inc = add nsw i32 %12, 1 store i32 %inc, i32* %j, align 4 br label %for.cond3 for.end: ; preds = %for.cond3 br label %for.inc16 for.inc16: ; preds = %for.end %13 = load i32, i32* %i, align 4 %inc17 = add nsw i32 %13, 1 store i32 %inc17, i32* %i, align 4 br label %for.cond for.end18: ; preds = %for.cond %14 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call19 = call i32 @fclose(%struct._IO_FILE* %14) ret void } declare dso_local i8* @fgets(i8*, i32, %struct._IO_FILE*) #1 ; Function Attrs: nounwind declare dso_local i32 @feof(%struct._IO_FILE*) #2 ; Function Attrs: nounwind declare dso_local i32 @sscanf(i8*, i8*, ...) #2 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z14calculate_tempiPfS_S_iiiiffffff(i32 %iteration, float* %power, float* %temp_src, float* %temp_dst, i32 %grid_cols, i32 %grid_rows, i32 %border_cols, i32 %border_rows, float %Cap, float %Rx, float %Ry, float %Rz, float %step, float %time_elapsed) #0 { entry: %iteration.addr = alloca i32, align 4 %power.addr = alloca float*, align 8 %temp_src.addr = alloca float*, align 8 %temp_dst.addr = alloca float*, align 8 %grid_cols.addr = alloca i32, align 4 %grid_rows.addr = alloca i32, align 4 %border_cols.addr = alloca i32, align 4 %border_rows.addr = alloca i32, align 4 %Cap.addr = alloca float, align 4 %Rx.addr = alloca float, align 4 %Ry.addr = alloca float, align 4 %Rz.addr = alloca float, align 4 %step.addr = alloca float, align 4 %time_elapsed.addr = alloca float, align 4 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store i32 %iteration, i32* %iteration.addr, align 4 store float* %power, float** %power.addr, align 8 store float* %temp_src, float** %temp_src.addr, align 8 store float* %temp_dst, float** %temp_dst.addr, align 8 store i32 %grid_cols, i32* %grid_cols.addr, align 4 store i32 %grid_rows, i32* %grid_rows.addr, align 4 store i32 %border_cols, i32* %border_cols.addr, align 4 store i32 %border_rows, i32* %border_rows.addr, align 4 store float %Cap, float* %Cap.addr, align 4 store float %Rx, float* %Rx.addr, align 4 store float %Ry, float* %Ry.addr, align 4 store float %Rz, float* %Rz.addr, align 4 store float %step, float* %step.addr, align 4 store float %time_elapsed, float* %time_elapsed.addr, align 4 %kernel_args = alloca i8*, i64 14, align 16 %0 = bitcast i32* %iteration.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast float** %power.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast float** %temp_src.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = bitcast float** %temp_dst.addr to i8* %7 = getelementptr i8*, i8** %kernel_args, i32 3 store i8* %6, i8** %7 %8 = bitcast i32* %grid_cols.addr to i8* %9 = getelementptr i8*, i8** %kernel_args, i32 4 store i8* %8, i8** %9 %10 = bitcast i32* %grid_rows.addr to i8* %11 = getelementptr i8*, i8** %kernel_args, i32 5 store i8* %10, i8** %11 %12 = bitcast i32* %border_cols.addr to i8* %13 = getelementptr i8*, i8** %kernel_args, i32 6 store i8* %12, i8** %13 %14 = bitcast i32* %border_rows.addr to i8* %15 = getelementptr i8*, i8** %kernel_args, i32 7 store i8* %14, i8** %15 %16 = bitcast float* %Cap.addr to i8* %17 = getelementptr i8*, i8** %kernel_args, i32 8 store i8* %16, i8** %17 %18 = bitcast float* %Rx.addr to i8* %19 = getelementptr i8*, i8** %kernel_args, i32 9 store i8* %18, i8** %19 %20 = bitcast float* %Ry.addr to i8* %21 = getelementptr i8*, i8** %kernel_args, i32 10 store i8* %20, i8** %21 %22 = bitcast float* %Rz.addr to i8* %23 = getelementptr i8*, i8** %kernel_args, i32 11 store i8* %22, i8** %23 %24 = bitcast float* %step.addr to i8* %25 = getelementptr i8*, i8** %kernel_args, i32 12 store i8* %24, i8** %25 %26 = bitcast float* %time_elapsed.addr to i8* %27 = getelementptr i8*, i8** %kernel_args, i32 13 store i8* %26, i8** %27 %28 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %29 = load i64, i64* %shmem_size, align 8 %30 = load i8*, i8** %stream, align 8 %31 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %32 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %31, i8* align 8 %32, i64 12, i1 false) %33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %34 = load i64, i64* %33, align 8 %35 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %36 = load i32, i32* %35, align 8 %37 = bitcast { i64, i32 }* %block_dim.coerce to i8* %38 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %37, i8* align 8 %38, i64 12, i1 false) %39 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %40 = load i64, i64* %39, align 8 %41 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %42 = load i32, i32* %41, align 8 %43 = bitcast i8* %30 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32, float*, float*, float*, i32, i32, i32, i32, float, float, float, float, float, float)* @_Z14calculate_tempiPfS_S_iiiiffffff to i8*), i64 %34, i32 %36, i64 %40, i32 %42, i8** %kernel_args, i64 %29, %struct.CUstream_st* %43) br label %setup.end setup.end: ; preds = %entry ret void } declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #3 ; Function Attrs: noinline optnone uwtable define dso_local i32 @_Z17compute_tran_tempPfPS_iiiiiiii(float* %MatrixPower, float** %MatrixTemp, i32 %col, i32 %row, i32 %total_iterations, i32 %num_iterations, i32 %blockCols, i32 %blockRows, i32 %borderCols, i32 %borderRows) #0 { entry: %MatrixPower.addr = alloca float*, align 8 %MatrixTemp.addr = alloca float**, align 8 %col.addr = alloca i32, align 4 %row.addr = alloca i32, align 4 %total_iterations.addr = alloca i32, align 4 %num_iterations.addr = alloca i32, align 4 %blockCols.addr = alloca i32, align 4 %blockRows.addr = alloca i32, align 4 %borderCols.addr = alloca i32, align 4 %borderRows.addr = alloca i32, align 4 %dimBlock = alloca %struct.dim3, align 4 %dimGrid = alloca %struct.dim3, align 4 %grid_height = alloca float, align 4 %grid_width = alloca float, align 4 %Cap = alloca float, align 4 %Rx = alloca float, align 4 %Ry = alloca float, align 4 %Rz = alloca float, align 4 %max_slope = alloca float, align 4 %step = alloca float, align 4 %t = alloca float, align 4 %time_elapsed = alloca float, align 4 %src = alloca i32, align 4 %dst = alloca i32, align 4 %temp = alloca i32, align 4 %agg.tmp = alloca %struct.dim3, align 4 %agg.tmp35 = alloca %struct.dim3, align 4 %agg.tmp.coerce = alloca { i64, i32 }, align 4 %agg.tmp35.coerce = alloca { i64, i32 }, align 4 store float* %MatrixPower, float** %MatrixPower.addr, align 8 store float** %MatrixTemp, float*** %MatrixTemp.addr, align 8 store i32 %col, i32* %col.addr, align 4 store i32 %row, i32* %row.addr, align 4 store i32 %total_iterations, i32* %total_iterations.addr, align 4 store i32 %num_iterations, i32* %num_iterations.addr, align 4 store i32 %blockCols, i32* %blockCols.addr, align 4 store i32 %blockRows, i32* %blockRows.addr, align 4 store i32 %borderCols, i32* %borderCols.addr, align 4 store i32 %borderRows, i32* %borderRows.addr, align 4 call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimBlock, i32 16, i32 16, i32 1) %0 = load i32, i32* %blockCols.addr, align 4 %1 = load i32, i32* %blockRows.addr, align 4 call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimGrid, i32 %0, i32 %1, i32 1) %2 = load float, float* @chip_height, align 4 %3 = load i32, i32* %row.addr, align 4 %conv = sitofp i32 %3 to float %div = fdiv float %2, %conv store float %div, float* %grid_height, align 4 %4 = load float, float* @chip_width, align 4 %5 = load i32, i32* %col.addr, align 4 %conv1 = sitofp i32 %5 to float %div2 = fdiv float %4, %conv1 store float %div2, float* %grid_width, align 4 %6 = load float, float* @t_chip, align 4 %conv3 = fpext float %6 to double %mul = fmul contract double 8.750000e+05, %conv3 %7 = load float, float* %grid_width, align 4 %conv4 = fpext float %7 to double %mul5 = fmul contract double %mul, %conv4 %8 = load float, float* %grid_height, align 4 %conv6 = fpext float %8 to double %mul7 = fmul contract double %mul5, %conv6 %conv8 = fptrunc double %mul7 to float store float %conv8, float* %Cap, align 4 %9 = load float, float* %grid_width, align 4 %conv9 = fpext float %9 to double %10 = load float, float* @t_chip, align 4 %conv10 = fpext float %10 to double %mul11 = fmul contract double 2.000000e+02, %conv10 %11 = load float, float* %grid_height, align 4 %conv12 = fpext float %11 to double %mul13 = fmul contract double %mul11, %conv12 %div14 = fdiv double %conv9, %mul13 %conv15 = fptrunc double %div14 to float store float %conv15, float* %Rx, align 4 %12 = load float, float* %grid_height, align 4 %conv16 = fpext float %12 to double %13 = load float, float* @t_chip, align 4 %conv17 = fpext float %13 to double %mul18 = fmul contract double 2.000000e+02, %conv17 %14 = load float, float* %grid_width, align 4 %conv19 = fpext float %14 to double %mul20 = fmul contract double %mul18, %conv19 %div21 = fdiv double %conv16, %mul20 %conv22 = fptrunc double %div21 to float store float %conv22, float* %Ry, align 4 %15 = load float, float* @t_chip, align 4 %16 = load float, float* %grid_height, align 4 %mul23 = fmul contract float 1.000000e+02, %16 %17 = load float, float* %grid_width, align 4 %mul24 = fmul contract float %mul23, %17 %div25 = fdiv float %15, %mul24 store float %div25, float* %Rz, align 4 %18 = load float, float* @t_chip, align 4 %conv26 = fpext float %18 to double %mul27 = fmul contract double 5.000000e-01, %conv26 %mul28 = fmul contract double %mul27, 1.750000e+06 %div29 = fdiv double 3.000000e+06, %mul28 %conv30 = fptrunc double %div29 to float store float %conv30, float* %max_slope, align 4 %19 = load float, float* %max_slope, align 4 %conv31 = fpext float %19 to double %div32 = fdiv double 1.000000e-03, %conv31 %conv33 = fptrunc double %div32 to float store float %conv33, float* %step, align 4 store float 0x3F50624DE0000000, float* %time_elapsed, align 4 store i32 1, i32* %src, align 4 store i32 0, i32* %dst, align 4 store float 0.000000e+00, float* %t, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry %20 = load float, float* %t, align 4 %21 = load i32, i32* %total_iterations.addr, align 4 %conv34 = sitofp i32 %21 to float %cmp = fcmp olt float %20, %conv34 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %22 = load i32, i32* %src, align 4 store i32 %22, i32* %temp, align 4 %23 = load i32, i32* %dst, align 4 store i32 %23, i32* %src, align 4 %24 = load i32, i32* %temp, align 4 store i32 %24, i32* %dst, align 4 %25 = bitcast %struct.dim3* %agg.tmp to i8* %26 = bitcast %struct.dim3* %dimGrid to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %25, i8* align 4 %26, i64 12, i1 false) %27 = bitcast %struct.dim3* %agg.tmp35 to i8* %28 = bitcast %struct.dim3* %dimBlock to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %27, i8* align 4 %28, i64 12, i1 false) %29 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* %30 = bitcast %struct.dim3* %agg.tmp to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %29, i8* align 4 %30, i64 12, i1 false) %31 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 %32 = load i64, i64* %31, align 4 %33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 %34 = load i32, i32* %33, align 4 %35 = bitcast { i64, i32 }* %agg.tmp35.coerce to i8* %36 = bitcast %struct.dim3* %agg.tmp35 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %35, i8* align 4 %36, i64 12, i1 false) %37 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp35.coerce, i32 0, i32 0 %38 = load i64, i64* %37, align 4 %39 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp35.coerce, i32 0, i32 1 %40 = load i32, i32* %39, align 4 %call = call i32 @__cudaPushCallConfiguration(i64 %32, i32 %34, i64 %38, i32 %40, i64 0, i8* null) %tobool = icmp ne i32 %call, 0 br i1 %tobool, label %kcall.end, label %kcall.configok kcall.configok: ; preds = %for.body %41 = load i32, i32* %num_iterations.addr, align 4 %conv36 = sitofp i32 %41 to float %42 = load i32, i32* %total_iterations.addr, align 4 %conv37 = sitofp i32 %42 to float %43 = load float, float* %t, align 4 %sub = fsub contract float %conv37, %43 %cmp38 = fcmp ole float %conv36, %sub br i1 %cmp38, label %cond.true, label %cond.false cond.true: ; preds = %kcall.configok %44 = load i32, i32* %num_iterations.addr, align 4 %conv39 = sitofp i32 %44 to float br label %cond.end cond.false: ; preds = %kcall.configok %45 = load i32, i32* %total_iterations.addr, align 4 %conv40 = sitofp i32 %45 to float %46 = load float, float* %t, align 4 %sub41 = fsub contract float %conv40, %46 br label %cond.end cond.end: ; preds = %cond.false, %cond.true %cond = phi float [ %conv39, %cond.true ], [ %sub41, %cond.false ] %conv42 = fptosi float %cond to i32 %47 = load float*, float** %MatrixPower.addr, align 8 %48 = load float**, float*** %MatrixTemp.addr, align 8 %49 = load i32, i32* %src, align 4 %idxprom = sext i32 %49 to i64 %arrayidx = getelementptr inbounds float*, float** %48, i64 %idxprom %50 = load float*, float** %arrayidx, align 8 %51 = load float**, float*** %MatrixTemp.addr, align 8 %52 = load i32, i32* %dst, align 4 %idxprom43 = sext i32 %52 to i64 %arrayidx44 = getelementptr inbounds float*, float** %51, i64 %idxprom43 %53 = load float*, float** %arrayidx44, align 8 %54 = load i32, i32* %col.addr, align 4 %55 = load i32, i32* %row.addr, align 4 %56 = load i32, i32* %borderCols.addr, align 4 %57 = load i32, i32* %borderRows.addr, align 4 %58 = load float, float* %Cap, align 4 %59 = load float, float* %Rx, align 4 %60 = load float, float* %Ry, align 4 %61 = load float, float* %Rz, align 4 %62 = load float, float* %step, align 4 %63 = load float, float* %time_elapsed, align 4 call void @_Z14calculate_tempiPfS_S_iiiiffffff(i32 %conv42, float* %47, float* %50, float* %53, i32 %54, i32 %55, i32 %56, i32 %57, float %58, float %59, float %60, float %61, float %62, float %63) br label %kcall.end kcall.end: ; preds = %cond.end, %for.body %call45 = call i32 @cudaDeviceSynchronize() br label %for.inc for.inc: ; preds = %kcall.end %64 = load i32, i32* %num_iterations.addr, align 4 %conv46 = sitofp i32 %64 to float %65 = load float, float* %t, align 4 %add = fadd contract float %65, %conv46 store float %add, float* %t, align 4 br label %for.cond for.end: ; preds = %for.cond %66 = load i32, i32* %dst, align 4 ret i32 %66 } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #4 comdat align 2 { entry: %this.addr = alloca %struct.dim3*, align 8 %vx.addr = alloca i32, align 4 %vy.addr = alloca i32, align 4 %vz.addr = alloca i32, align 4 store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 store i32 %vx, i32* %vx.addr, align 4 store i32 %vy, i32* %vy.addr, align 4 store i32 %vz, i32* %vz.addr, align 4 %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 %0 = load i32, i32* %vx.addr, align 4 store i32 %0, i32* %x, align 4 %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 %1 = load i32, i32* %vy.addr, align 4 store i32 %1, i32* %y, align 4 %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 %2 = load i32, i32* %vz.addr, align 4 store i32 %2, i32* %z, align 4 ret void } declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #1 declare dso_local i32 @cudaDeviceSynchronize() #1 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z5usageiPPc(i32 %argc, i8** %argv) #0 { entry: %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %1 = load i8**, i8*** %argv.addr, align 8 %arrayidx = getelementptr inbounds i8*, i8** %1, i64 0 %2 = load i8*, i8** %arrayidx, align 8 %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @.str.8, i64 0, i64 0), i8* %2) %3 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([78 x i8], [78 x i8]* @.str.9, i64 0, i64 0)) %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([53 x i8], [53 x i8]* @.str.10, i64 0, i64 0)) %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call3 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([38 x i8], [38 x i8]* @.str.11, i64 0, i64 0)) %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call4 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([89 x i8], [89 x i8]* @.str.12, i64 0, i64 0)) %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([86 x i8], [86 x i8]* @.str.13, i64 0, i64 0)) %8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call6 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.14, i64 0, i64 0)) call void @exit(i32 1) #9 unreachable } ; Function Attrs: noreturn nounwind declare dso_local void @exit(i32) #5 ; Function Attrs: noinline norecurse optnone uwtable define dso_local i32 @main(i32 %argc, i8** %argv) #6 { entry: %retval = alloca i32, align 4 %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 store i32 0, i32* %retval, align 4 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %call = call i32 @cudaSetDevice(i32 0) %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str.15, i64 0, i64 0), i32 16, i32 16) %0 = load i32, i32* %argc.addr, align 4 %1 = load i8**, i8*** %argv.addr, align 8 call void @_Z3runiPPc(i32 %0, i8** %1) ret i32 0 } declare dso_local i32 @cudaSetDevice(i32) #1 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z3runiPPc(i32 %argc, i8** %argv) #0 { entry: %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 %size = alloca i32, align 4 %grid_rows = alloca i32, align 4 %grid_cols = alloca i32, align 4 %FilesavingTemp = alloca float*, align 8 %FilesavingPower = alloca float*, align 8 %MatrixOut = alloca float*, align 8 %tfile = alloca i8*, align 8 %pfile = alloca i8*, align 8 %ofile = alloca i8*, align 8 %total_iterations = alloca i32, align 4 %pyramid_height = alloca i32, align 4 %borderCols = alloca i32, align 4 %borderRows = alloca i32, align 4 %smallBlockCol = alloca i32, align 4 %smallBlockRow = alloca i32, align 4 %blockCols = alloca i32, align 4 %blockRows = alloca i32, align 4 %MatrixTemp = alloca [2 x float*], align 16 %MatrixPower = alloca float*, align 8 %ret = alloca i32, align 4 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 store i32 60, i32* %total_iterations, align 4 store i32 1, i32* %pyramid_height, align 4 %0 = load i32, i32* %argc.addr, align 4 %cmp = icmp ne i32 %0, 7 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %1 = load i32, i32* %argc.addr, align 4 %2 = load i8**, i8*** %argv.addr, align 8 call void @_Z5usageiPPc(i32 %1, i8** %2) br label %if.end if.end: ; preds = %if.then, %entry %3 = load i8**, i8*** %argv.addr, align 8 %arrayidx = getelementptr inbounds i8*, i8** %3, i64 1 %4 = load i8*, i8** %arrayidx, align 8 %call = call i32 @atoi(i8* %4) #10 store i32 %call, i32* %grid_rows, align 4 %cmp1 = icmp sle i32 %call, 0 br i1 %cmp1, label %if.then13, label %lor.lhs.false lor.lhs.false: ; preds = %if.end %5 = load i8**, i8*** %argv.addr, align 8 %arrayidx2 = getelementptr inbounds i8*, i8** %5, i64 1 %6 = load i8*, i8** %arrayidx2, align 8 %call3 = call i32 @atoi(i8* %6) #10 store i32 %call3, i32* %grid_cols, align 4 %cmp4 = icmp sle i32 %call3, 0 br i1 %cmp4, label %if.then13, label %lor.lhs.false5 lor.lhs.false5: ; preds = %lor.lhs.false %7 = load i8**, i8*** %argv.addr, align 8 %arrayidx6 = getelementptr inbounds i8*, i8** %7, i64 2 %8 = load i8*, i8** %arrayidx6, align 8 %call7 = call i32 @atoi(i8* %8) #10 store i32 %call7, i32* %pyramid_height, align 4 %cmp8 = icmp sle i32 %call7, 0 br i1 %cmp8, label %if.then13, label %lor.lhs.false9 lor.lhs.false9: ; preds = %lor.lhs.false5 %9 = load i8**, i8*** %argv.addr, align 8 %arrayidx10 = getelementptr inbounds i8*, i8** %9, i64 3 %10 = load i8*, i8** %arrayidx10, align 8 %call11 = call i32 @atoi(i8* %10) #10 store i32 %call11, i32* %total_iterations, align 4 %cmp12 = icmp sle i32 %call11, 0 br i1 %cmp12, label %if.then13, label %if.end14 if.then13: ; preds = %lor.lhs.false9, %lor.lhs.false5, %lor.lhs.false, %if.end %11 = load i32, i32* %argc.addr, align 4 %12 = load i8**, i8*** %argv.addr, align 8 call void @_Z5usageiPPc(i32 %11, i8** %12) br label %if.end14 if.end14: ; preds = %if.then13, %lor.lhs.false9 %13 = load i8**, i8*** %argv.addr, align 8 %arrayidx15 = getelementptr inbounds i8*, i8** %13, i64 4 %14 = load i8*, i8** %arrayidx15, align 8 store i8* %14, i8** %tfile, align 8 %15 = load i8**, i8*** %argv.addr, align 8 %arrayidx16 = getelementptr inbounds i8*, i8** %15, i64 5 %16 = load i8*, i8** %arrayidx16, align 8 store i8* %16, i8** %pfile, align 8 %17 = load i8**, i8*** %argv.addr, align 8 %arrayidx17 = getelementptr inbounds i8*, i8** %17, i64 6 %18 = load i8*, i8** %arrayidx17, align 8 store i8* %18, i8** %ofile, align 8 %19 = load i32, i32* %grid_rows, align 4 %20 = load i32, i32* %grid_cols, align 4 %mul = mul nsw i32 %19, %20 store i32 %mul, i32* %size, align 4 %21 = load i32, i32* %pyramid_height, align 4 %mul18 = mul nsw i32 %21, 2 %div = sdiv i32 %mul18, 2 store i32 %div, i32* %borderCols, align 4 %22 = load i32, i32* %pyramid_height, align 4 %mul19 = mul nsw i32 %22, 2 %div20 = sdiv i32 %mul19, 2 store i32 %div20, i32* %borderRows, align 4 %23 = load i32, i32* %pyramid_height, align 4 %mul21 = mul nsw i32 %23, 2 %sub = sub nsw i32 16, %mul21 store i32 %sub, i32* %smallBlockCol, align 4 %24 = load i32, i32* %pyramid_height, align 4 %mul22 = mul nsw i32 %24, 2 %sub23 = sub nsw i32 16, %mul22 store i32 %sub23, i32* %smallBlockRow, align 4 %25 = load i32, i32* %grid_cols, align 4 %26 = load i32, i32* %smallBlockCol, align 4 %div24 = sdiv i32 %25, %26 %27 = load i32, i32* %grid_cols, align 4 %28 = load i32, i32* %smallBlockCol, align 4 %rem = srem i32 %27, %28 %cmp25 = icmp eq i32 %rem, 0 %29 = zext i1 %cmp25 to i64 %cond = select i1 %cmp25, i32 0, i32 1 %add = add nsw i32 %div24, %cond store i32 %add, i32* %blockCols, align 4 %30 = load i32, i32* %grid_rows, align 4 %31 = load i32, i32* %smallBlockRow, align 4 %div26 = sdiv i32 %30, %31 %32 = load i32, i32* %grid_rows, align 4 %33 = load i32, i32* %smallBlockRow, align 4 %rem27 = srem i32 %32, %33 %cmp28 = icmp eq i32 %rem27, 0 %34 = zext i1 %cmp28 to i64 %cond29 = select i1 %cmp28, i32 0, i32 1 %add30 = add nsw i32 %div26, %cond29 store i32 %add30, i32* %blockRows, align 4 %35 = load i32, i32* %size, align 4 %conv = sext i32 %35 to i64 %mul31 = mul i64 %conv, 4 %call32 = call noalias i8* @malloc(i64 %mul31) #8 %36 = bitcast i8* %call32 to float* store float* %36, float** %FilesavingTemp, align 8 %37 = load i32, i32* %size, align 4 %conv33 = sext i32 %37 to i64 %mul34 = mul i64 %conv33, 4 %call35 = call noalias i8* @malloc(i64 %mul34) #8 %38 = bitcast i8* %call35 to float* store float* %38, float** %FilesavingPower, align 8 %39 = load i32, i32* %size, align 4 %conv36 = sext i32 %39 to i64 %call37 = call noalias i8* @calloc(i64 %conv36, i64 4) #8 %40 = bitcast i8* %call37 to float* store float* %40, float** %MatrixOut, align 8 %41 = load float*, float** %FilesavingPower, align 8 %tobool = icmp ne float* %41, null br i1 %tobool, label %lor.lhs.false38, label %if.then42 lor.lhs.false38: ; preds = %if.end14 %42 = load float*, float** %FilesavingTemp, align 8 %tobool39 = icmp ne float* %42, null br i1 %tobool39, label %lor.lhs.false40, label %if.then42 lor.lhs.false40: ; preds = %lor.lhs.false38 %43 = load float*, float** %MatrixOut, align 8 %tobool41 = icmp ne float* %43, null br i1 %tobool41, label %if.end43, label %if.then42 if.then42: ; preds = %lor.lhs.false40, %lor.lhs.false38, %if.end14 call void @_Z5fatalPc(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.16, i64 0, i64 0)) br label %if.end43 if.end43: ; preds = %if.then42, %lor.lhs.false40 %44 = load i32, i32* %pyramid_height, align 4 %45 = load i32, i32* %grid_cols, align 4 %46 = load i32, i32* %grid_rows, align 4 %47 = load i32, i32* %borderCols, align 4 %48 = load i32, i32* %borderRows, align 4 %49 = load i32, i32* %blockCols, align 4 %50 = load i32, i32* %blockRows, align 4 %51 = load i32, i32* %smallBlockCol, align 4 %52 = load i32, i32* %smallBlockRow, align 4 %call44 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([94 x i8], [94 x i8]* @.str.17, i64 0, i64 0), i32 %44, i32 %45, i32 %46, i32 %47, i32 %48, i32 %49, i32 %50, i32 %51, i32 %52) %53 = load float*, float** %FilesavingTemp, align 8 %54 = load i32, i32* %grid_rows, align 4 %55 = load i32, i32* %grid_cols, align 4 %56 = load i8*, i8** %tfile, align 8 call void @_Z9readinputPfiiPc(float* %53, i32 %54, i32 %55, i8* %56) %57 = load float*, float** %FilesavingPower, align 8 %58 = load i32, i32* %grid_rows, align 4 %59 = load i32, i32* %grid_cols, align 4 %60 = load i8*, i8** %pfile, align 8 call void @_Z9readinputPfiiPc(float* %57, i32 %58, i32 %59, i8* %60) %arrayidx45 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 0 %61 = bitcast float** %arrayidx45 to i8** %62 = load i32, i32* %size, align 4 %conv46 = sext i32 %62 to i64 %mul47 = mul i64 4, %conv46 %call48 = call i32 @cudaMalloc(i8** %61, i64 %mul47) %arrayidx49 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 1 %63 = bitcast float** %arrayidx49 to i8** %64 = load i32, i32* %size, align 4 %conv50 = sext i32 %64 to i64 %mul51 = mul i64 4, %conv50 %call52 = call i32 @cudaMalloc(i8** %63, i64 %mul51) %arrayidx53 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 0 %65 = load float*, float** %arrayidx53, align 16 %66 = bitcast float* %65 to i8* %67 = load float*, float** %FilesavingTemp, align 8 %68 = bitcast float* %67 to i8* %69 = load i32, i32* %size, align 4 %conv54 = sext i32 %69 to i64 %mul55 = mul i64 4, %conv54 %call56 = call i32 @cudaMemcpy(i8* %66, i8* %68, i64 %mul55, i32 1) %70 = bitcast float** %MatrixPower to i8** %71 = load i32, i32* %size, align 4 %conv57 = sext i32 %71 to i64 %mul58 = mul i64 4, %conv57 %call59 = call i32 @cudaMalloc(i8** %70, i64 %mul58) %72 = load float*, float** %MatrixPower, align 8 %73 = bitcast float* %72 to i8* %74 = load float*, float** %FilesavingPower, align 8 %75 = bitcast float* %74 to i8* %76 = load i32, i32* %size, align 4 %conv60 = sext i32 %76 to i64 %mul61 = mul i64 4, %conv60 %call62 = call i32 @cudaMemcpy(i8* %73, i8* %75, i64 %mul61, i32 1) %call63 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str.18, i64 0, i64 0)) %77 = load float*, float** %MatrixPower, align 8 %arraydecay = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 0 %78 = load i32, i32* %grid_cols, align 4 %79 = load i32, i32* %grid_rows, align 4 %80 = load i32, i32* %total_iterations, align 4 %81 = load i32, i32* %pyramid_height, align 4 %82 = load i32, i32* %blockCols, align 4 %83 = load i32, i32* %blockRows, align 4 %84 = load i32, i32* %borderCols, align 4 %85 = load i32, i32* %borderRows, align 4 %call64 = call i32 @_Z17compute_tran_tempPfPS_iiiiiiii(float* %77, float** %arraydecay, i32 %78, i32 %79, i32 %80, i32 %81, i32 %82, i32 %83, i32 %84, i32 %85) store i32 %call64, i32* %ret, align 4 %call65 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.19, i64 0, i64 0)) %86 = load float*, float** %MatrixOut, align 8 %87 = bitcast float* %86 to i8* %88 = load i32, i32* %ret, align 4 %idxprom = sext i32 %88 to i64 %arrayidx66 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 %idxprom %89 = load float*, float** %arrayidx66, align 8 %90 = bitcast float* %89 to i8* %91 = load i32, i32* %size, align 4 %conv67 = sext i32 %91 to i64 %mul68 = mul i64 4, %conv67 %call69 = call i32 @cudaMemcpy(i8* %87, i8* %90, i64 %mul68, i32 2) %92 = load float*, float** %MatrixOut, align 8 %93 = load i32, i32* %grid_rows, align 4 %94 = load i32, i32* %grid_cols, align 4 %95 = load i8*, i8** %ofile, align 8 call void @_Z11writeoutputPfiiPc(float* %92, i32 %93, i32 %94, i8* %95) %96 = load float*, float** %MatrixPower, align 8 %97 = bitcast float* %96 to i8* %call70 = call i32 @cudaFree(i8* %97) %arrayidx71 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 0 %98 = load float*, float** %arrayidx71, align 16 %99 = bitcast float* %98 to i8* %call72 = call i32 @cudaFree(i8* %99) %arrayidx73 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 1 %100 = load float*, float** %arrayidx73, align 8 %101 = bitcast float* %100 to i8* %call74 = call i32 @cudaFree(i8* %101) %102 = load float*, float** %MatrixOut, align 8 %103 = bitcast float* %102 to i8* call void @free(i8* %103) #8 ret void } ; Function Attrs: nounwind readonly declare dso_local i32 @atoi(i8*) #7 ; Function Attrs: nounwind declare dso_local noalias i8* @malloc(i64) #2 ; Function Attrs: nounwind declare dso_local noalias i8* @calloc(i64, i64) #2 declare dso_local i32 @cudaMalloc(i8**, i64) #1 declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #1 declare dso_local i32 @cudaFree(i8*) #1 ; Function Attrs: nounwind declare dso_local void @free(i8*) #2 define internal void @__cuda_register_globals(i8** %0) { entry: %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32, float*, float*, float*, i32, i32, i32, i32, float, float, float, float, float, float)* @_Z14calculate_tempiPfS_S_iiiiffffff to i8*), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) ret void } declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) declare dso_local i8** @__cudaRegisterFatBinary(i8*) define internal void @__cuda_module_ctor(i8* %0) { entry: %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) store i8** %1, i8*** @__cuda_gpubin_handle, align 8 call void @__cuda_register_globals(i8** %1) call void @__cudaRegisterFatBinaryEnd(i8** %1) %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) ret void } declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) declare dso_local void @__cudaUnregisterFatBinary(i8**) define internal void @__cuda_module_dtor(i8* %0) { entry: %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 call void @__cudaUnregisterFatBinary(i8** %1) ret void } declare dso_local i32 @atexit(void (i8*)*) attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { argmemonly nounwind willreturn } attributes #4 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #6 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #7 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #8 = { nounwind } attributes #9 = { noreturn nounwind } attributes #10 = { nounwind readonly } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} !1 = !{i32 1, !"wchar_size", i32 4} !2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}