; ModuleID = 'pathfinder-host-x86_64-unknown-linux-gnu.bc' source_filename = "pathfinder.cu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } %struct.dim3 = type { i32, i32, i32 } %struct.CUstream_st = type opaque $_ZN4dim3C2Ejjj = comdat any @rows = dso_local global i32 0, align 4 @cols = dso_local global i32 0, align 4 @data = dso_local global i32* null, align 8 @wall = dso_local global i32** null, align 8 @result = dso_local global i32* null, align 8 @pyramid_height = dso_local global i32 0, align 4 @.str = private unnamed_addr constant [47 x i8] c"Usage: dynproc row_len col_len pyramid_height\0A\00", align 1 @stderr = external dso_local global %struct._IO_FILE*, align 8 @.str.1 = private unnamed_addr constant [11 x i8] c"error: %s\0A\00", align 1 @.str.2 = private unnamed_addr constant [92 x i8] c"pyramidHeight: %d\0AgridSize: [%d]\0Aborder:[%d]\0AblockSize: %d\0AblockGrid:[%d]\0AtargetBlock:[%d]\0A\00", align 1 @.str.3 = private unnamed_addr constant [4 x i8] c"%d \00", align 1 @.str.4 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 @0 = private unnamed_addr constant [30 x i8] c"_Z14dynproc_kerneliPiS_S_iiii\00", align 1 @1 = private constant [20737 x i8] c"P\EDU\BA\01\00\10\00\F0P\00\00\00\00\00\00\02\00\01\01@\00\00\00\E8B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00@B\00\00\00\00\00\00\C0?\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0A\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z14dynproc_kerneliPiS_S_iiii\00.nv.info._Z14dynproc_kerneliPiS_S_iiii\00.nv.shared._Z14dynproc_kerneliPiS_S_iiii\00.nv.global\00.nv.constant0._Z14dynproc_kerneliPiS_S_iiii\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z14dynproc_kerneliPiS_S_iiii\00.text._Z14dynproc_kerneliPiS_S_iiii\00.nv.info._Z14dynproc_kerneliPiS_S_iiii\00.nv.shared._Z14dynproc_kerneliPiS_S_iiii\00.nv.global\00blockIdx\00threadIdx\00$___ZZ14dynproc_kerneliPiS_S_iiiiE4prev__187\00$___ZZ14dynproc_kerneliPiS_S_iiiiE6result__189\00.nv.constant0._Z14dynproc_kerneliPiS_S_iiii\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00P\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\9B\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\C4\00\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\CF\00\00\00\01\00\09\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\D8\00\00\00\01\00\09\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00>\01\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00\00:\00\00\00\00\00\00\04/\08\00\07\00\00\00\11\00\00\00\04#\08\00\07\00\00\00\00\00\00\00\04\12\08\00\07\00\00\00x\00\00\00\04\11\08\00\07\00\00\00x\00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\010\00\03\190\00\04\17\0C\00\00\00\00\00\07\00,\00\00\F0\11\00\04\17\0C\00\00\00\00\00\06\00(\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00$\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0\11\00\03\1B\FF\00\04\1D\04\00\E8\05\00\00\04\1C\04\00\D89\00\00\04\1E\04\00\80\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F0\07visible .entry _Z14dyn\E2\00\F6\03_kerneliPiS_S_iiii\A6\04\00r\00\0F+\00\0A\0E\8D\04\00\D3\00\0F3\00\11\1F13\00\1F\1F23\00\1F/3,\CC\00\1E\1F43\00\1F\1F53\00\1F\1F63\00\1F\1F7\C2\04\13O6[12\C3\04\16\A6pred %p<21\C5\04\AB16 %rs<10>\E9\04=105\EB\04 56\EC\04P\09.shaK\00\03\97\00\124\97\00\1FZ\D6\00\09\CFE4prev[1024]C\00%t6resultE\00\0Fs\05\08\1F6s\05\19\00!\04\0F\92\01\12\0E\06\05/20<\00\14\1F6B\05\00\1F9<\00\14\1F5<\00\00\1F8<\00\14\0F\0F\06\02\0F<\00\14\1F3\A2\05\01\0F<\00\14\0F\DE\05\01\0Fh\01\15\1F1\F0\00\00\1F7<\00\14#0]\FA\01#to\B6\15\04~\00\144r\05\01\1F\00\0A\1C\00\115\1C\00\1F4;\00\05\146\DA\05\0F;\00\00\117\1C\00\1F6;\00\05\148+\06\0F;\00\00\119\1C\00\1A8\04\06\03r\0E\1F7]\06\02\1A9\16\00\03]\06/d7^\06\03\1F5^\06\02\1B1q\00\133\A2\06\1A9\17\00\134,\0B\1B0\17\00\02\\\00\192I\0F\CB22, %ctaid.x/\00\02\B9\00\192\D3\06n23, %t-\00\135\FF\06\113\FF\01\03,\00$4,\18\01S;\0Ashl\9D\04325,\1D\00\0A\89\00\D26, 256;\0Asub.s\13\00#7,\19\00\006\00\0Bq\00\02\FB\00(27q\00%8,\1D\00\08\17\00%9,\D2\00\83;\0Amul.lob\00330,8\00\00'\00\074\00531,5\01\08\93\00532,7\00\1B3'\12\136\CD\07\182H\00%3,\1D\00T;\0AaddH\00#4,\1E\00+25\DA\00\126\A7\01(34G\00\185G\00\06\17\00%6,\7F\01\09^\00#7,5\00\00$\00\0B_\00\02\D7\01\1839\01(38_\00rsetp.gtL\003p3,!\00\F2\0D-1;\0A@%p3 bra LBB6_2;\0Abra.uni\10\0021;\0A\08\00\16:[\00(40[\00Tneg.s\84\0A\00\1D\00\08\02\02#99o\09\09U\00\133U\00(2:Z\09?9, <\00\00/39=\00\04*3:m\0E\001\00\0A4\03\157\C0\09\06\BB\00%1,\92\01\08\17\00%2,\95\03\08u\01343,\1E\00#-1E\01#leE\01#4,P\00\00'\00\01G\01\164G\01\1B5\B5\00\134\B5\00\184G\01\1F5\8C\00\03\186\8C\00\06\A7\02347,\1E\00\00;\00\09\1B\02\13,\1F\009254Q\13\120\D4\02\0B\8C\00\136\8C\00\185A\01#44\B7\02\0D?\00/44@\00\04\196@\00\12,2\00\0B\82\01\02\D2\03\08\F7\02/48\E0\02\03349,\1E\00\1C-\C0\15\02\84\03\184\CC\03/50F\00\03351,\1E\00\0CE\00\02\82\03(51E\00\04\F6\01\198\82\03553,@\02\04:\03\04\F5\01#5,8\00\00'\00\01\F5\01\165\F5\01\1B8)\01\137)\01\177\F5\01\185\\\00\08l\01\02\B9\00\0BB\00\139B\00\178B\00\186\B5\00\0DB\00\1F6B\00\04\189\AD\01\227,1\00\0Fh\01\02\08E\04\145\EF\05\198\A3\02\155\C6\00,6]\17\03#6,8\00\00'\00\01\22\01\166\22\01\1C1\0A\04$10`\04\170\E3\00\188_\00\08\E3\00\01y\01\0Cg\01$12D\00\08\A5\04\189\BA\00\0DD\00\1F9D\00\06\09\94\04210,4\00\0FM\02\02(10\14\01\09\B8\05\06\17\00\1D7M\02\14l\87\05#8,8\00\00'\00\02\A5\00\03\12\0C\157\0B\05\03\11\00\00\D9\0A1%p7P\01\178P\01\0C\9C\03\141\1E\05(13u\02\1F8\9C\00\03\1F9\C7\01\07#1,8\00\00'\00\0F\8B\00\00\0D\C9\01\04\E2\04914:.\00\11,3\00\00_\00Blp.u\DE\0Ca1, 1, H\00\03Z\01\138Y\01\02\C9\06\18s\A6\03\156n\06\1D8B\01#9,!\00\110\1A\01\179\1A\01\1C7\A1\00\145\A1\00\185M\03\181]\00\06\17\00\0F\10\06\04\116k\05\186\10\06\05U\07\02,\02\126\86\03\02\1C\04/10\91\00\09\04H\05\2216\91\00\03\96\0B%0,N\0A\01\92\00\02z\00)d1\AA\00\01{\09\031\00$2, \00\132\AC\00\03\19\00$3,Q\00\01'\00\08\E0\00\01\D8\03\00#\00\0Ad\00\184(\02\08d\00$5, \00\172\E9\0D_rd16,i\0E\12\03q\0B\02e\0E\05=\00\02\AD\0B*16\B7\00(8,\1D\00\08J\09\00\1D\00\01!\02\1D6\F8\02\04l\05\CA17:\0Abar.syncO\08K65, \9F\06\129!\08\1C6w\05$18N\00\09y\05\04\E2\03\199F\03(67\13\0B\09.\06\00\94\01\028\00\00'\00\02\13\02\161\13\02\1D3x\00\04\AE\05\141\AF\05\020\03\152\BA\00\04(\03\129(\03\09\D4\01\0F\E5\03\03(69\B6\00\06\CB\02370,\1E\00\1C1V\03\02R\02\116\88\01\127Z\03'12#\0A\0C=\05\142\C5\05\182\C5\05/71\90\00\02(72\90\00\07\94\06\133\C0\08\06\F2\08374,\19\00\007\00\0Dp\03#3,f\00\00*\00\02\A6\00\1F3\A6\00\09\04\CB\0A\132'\06\108B\01%3,z\04T;\0Aand\81\11#4,\1E\00\04\1E\01\22eq\1B\00\10p\05\03\02!\00\00=\0B\10!\11\00\0Fv\00\09\04Y\06)22\D3\01\155\A2\08\0B\D3\01\195\91\03\1D9\ED\07\03\F8\0F\1F0x\03(\2221A\0F\190\E4\03\016\0E\02s\00\0AH\04\01 \0E\066\00\08t\02\2275H\04;23]Z\03\02\F9\09\197\CE\00\1F2_\04\05\02?\0E\01 \00\0B{\00\196{\00\185{\00\136{\00\1B6{\00#10\C8\08\196|\00\187\10\08\08|\00$8, \00\0B|\00\199|\00\188|\00\137|\00\1D9|\00\02\1A\08\187E\09\147\99\03\199\C3\0E679,\B1\00\0D\DD\02#5,:\00\00)\00\02\DD\02\07j\0A\1C2\00\05\152S\0D\08\F8\07\054\06\1C9I\09\01\D3\01\0D\8E\09$25F\00\08\A8\0C)12\A9\00\0EG\00\1F2G\00\06\09c\0C\02\B1\03<1034\01\03\A1!\08h\10\158\06\07\190\99\07681,j\01\0D5\01#6,;\00\00)\00\025\01\07}\0A,27\A8\00\04\82\07(26\EF\00\194{\00\08\EF\00\02\E5\0C\0D|\01\04.\06\182\E9\0B)15\AA\00\0EG\00\1F5G\00\06\1986\01\226,5\00\0F6\01\04\186\1E\01\09\B7\08\06\17\00\04\E5\04\194\E8\0C/84\1A\06\03385,5\00\00$\00\0B^\11386,i\00\00)\00\08N\00\187\A0\08\07N\00&8,7\00\1B7\C5\00\03\86#\188]\03-89\FB\01\03(\15\04\14\02\198\B5\08\05\D7\11\00N\00\09\DB\03\02\D9\11-d3\1A\09\00\A4\11\03Q\00\01'\00\07\DD\11\2290\DB\03)33\C6\00391,\9B\00\00#\00\09\7F\00\09\D5\04\07\A3\05\00\AB\11\0FA\17\13\0F\A5\05\02\2236\9D\00\1A5\D3\00$7,u\00\0B\D3\00(8,6\00\1976\09\123,\02\1D9\F0\03\04p\08*296\09\06\0F\01\1F2\F4\08\02'93\F4\08\07&\01#4,\1D\00\05\1F\0B\15n\0C\09#7,P\00\00(\00\02\9D\03\177\0C\09\0D$\12\04U\08;30:\1A\00\04_\04\183\C9\07\05\17\0F\1A9\C9\07#7,\1E\00\0E\C9\07#8,!\00\04\C9\07\07Z\0D\1C36\11$32v\00(2:\FF\01\1F9\D4\06\04\00%\13\03 \00\0A3\0B/41\18\02*\124\D5\02)41\FF\01\02\A9\12\05\1D\00\09\C3\01\03\A4\07:43]\88\00\1F4C\08(\134\A3\17\1A4\86\00(6,\1D\00\190\85\02\124\13\08\1C9\F8\04\153\D1\13\1A3\85\02\0A&\00\04`\0E\183\9B\06/96\C3\04\03\129m\02\1D6\07\12\03\F2\0B/97\F2\0B\05'35T\02\1F8T\02\04#9,\1E\00\0ET\02\02\1B\00\169T\02\07\94\0E\1C3v\00$36v\00\186T\02/47T\02\05$8, \00\0BT\02\1F9T\02*\2250X\00\199\CE\01\02L\13\05\1D\00\08\A4\05\2298T\02)51\A2\05\05Q\13)24\A3\05/53\BC\0E\04\02(\12\01 \00\0A\CF\04\00b\12\03Q\00\01'\00\09J\02\2255\C5\01\0C\09\12$37O\01\B07:\0Aret;\0A\0A}\0A\00\00", section ".nv_fatbin", align 8 @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([20737 x i8], [20737 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 @__cuda_gpubin_handle = internal global i8** null, align 8 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] ; Function Attrs: noinline optnone uwtable define dso_local void @_Z4initiPPc(i32 %argc, i8** %argv) #0 { entry: %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 %n = alloca i32, align 4 %seed = alloca i32, align 4 %i = alloca i32, align 4 %j = alloca i32, align 4 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %0 = load i32, i32* %argc.addr, align 4 %cmp = icmp eq i32 %0, 4 br i1 %cmp, label %if.then, label %if.else if.then: ; preds = %entry %1 = load i8**, i8*** %argv.addr, align 8 %arrayidx = getelementptr inbounds i8*, i8** %1, i64 1 %2 = load i8*, i8** %arrayidx, align 8 %call = call i32 @atoi(i8* %2) #11 store i32 %call, i32* @cols, align 4 %3 = load i8**, i8*** %argv.addr, align 8 %arrayidx1 = getelementptr inbounds i8*, i8** %3, i64 2 %4 = load i8*, i8** %arrayidx1, align 8 %call2 = call i32 @atoi(i8* %4) #11 store i32 %call2, i32* @rows, align 4 %5 = load i8**, i8*** %argv.addr, align 8 %arrayidx3 = getelementptr inbounds i8*, i8** %5, i64 3 %6 = load i8*, i8** %arrayidx3, align 8 %call4 = call i32 @atoi(i8* %6) #11 store i32 %call4, i32* @pyramid_height, align 4 br label %if.end if.else: ; preds = %entry %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str, i64 0, i64 0)) call void @exit(i32 0) #12 unreachable if.end: ; preds = %if.then %7 = load i32, i32* @rows, align 4 %8 = load i32, i32* @cols, align 4 %mul = mul nsw i32 %7, %8 %9 = sext i32 %mul to i64 %10 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %9, i64 4) %11 = extractvalue { i64, i1 } %10, 1 %12 = extractvalue { i64, i1 } %10, 0 %13 = select i1 %11, i64 -1, i64 %12 %call6 = call i8* @_Znam(i64 %13) #13 %14 = bitcast i8* %call6 to i32* store i32* %14, i32** @data, align 8 %15 = load i32, i32* @rows, align 4 %16 = sext i32 %15 to i64 %17 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %16, i64 8) %18 = extractvalue { i64, i1 } %17, 1 %19 = extractvalue { i64, i1 } %17, 0 %20 = select i1 %18, i64 -1, i64 %19 %call7 = call i8* @_Znam(i64 %20) #13 %21 = bitcast i8* %call7 to i32** store i32** %21, i32*** @wall, align 8 store i32 0, i32* %n, align 4 br label %for.cond for.cond: ; preds = %for.inc, %if.end %22 = load i32, i32* %n, align 4 %23 = load i32, i32* @rows, align 4 %cmp8 = icmp slt i32 %22, %23 br i1 %cmp8, label %for.body, label %for.end for.body: ; preds = %for.cond %24 = load i32*, i32** @data, align 8 %25 = load i32, i32* @cols, align 4 %26 = load i32, i32* %n, align 4 %mul9 = mul nsw i32 %25, %26 %idx.ext = sext i32 %mul9 to i64 %add.ptr = getelementptr inbounds i32, i32* %24, i64 %idx.ext %27 = load i32**, i32*** @wall, align 8 %28 = load i32, i32* %n, align 4 %idxprom = sext i32 %28 to i64 %arrayidx10 = getelementptr inbounds i32*, i32** %27, i64 %idxprom store i32* %add.ptr, i32** %arrayidx10, align 8 br label %for.inc for.inc: ; preds = %for.body %29 = load i32, i32* %n, align 4 %inc = add nsw i32 %29, 1 store i32 %inc, i32* %n, align 4 br label %for.cond for.end: ; preds = %for.cond %30 = load i32, i32* @cols, align 4 %31 = sext i32 %30 to i64 %32 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %31, i64 4) %33 = extractvalue { i64, i1 } %32, 1 %34 = extractvalue { i64, i1 } %32, 0 %35 = select i1 %33, i64 -1, i64 %34 %call11 = call i8* @_Znam(i64 %35) #13 %36 = bitcast i8* %call11 to i32* store i32* %36, i32** @result, align 8 store i32 9, i32* %seed, align 4 %37 = load i32, i32* %seed, align 4 call void @srand(i32 %37) #14 store i32 0, i32* %i, align 4 br label %for.cond12 for.cond12: ; preds = %for.inc26, %for.end %38 = load i32, i32* %i, align 4 %39 = load i32, i32* @rows, align 4 %cmp13 = icmp slt i32 %38, %39 br i1 %cmp13, label %for.body14, label %for.end28 for.body14: ; preds = %for.cond12 store i32 0, i32* %j, align 4 br label %for.cond15 for.cond15: ; preds = %for.inc23, %for.body14 %40 = load i32, i32* %j, align 4 %41 = load i32, i32* @cols, align 4 %cmp16 = icmp slt i32 %40, %41 br i1 %cmp16, label %for.body17, label %for.end25 for.body17: ; preds = %for.cond15 %call18 = call i32 @rand() #14 %rem = srem i32 %call18, 10 %42 = load i32**, i32*** @wall, align 8 %43 = load i32, i32* %i, align 4 %idxprom19 = sext i32 %43 to i64 %arrayidx20 = getelementptr inbounds i32*, i32** %42, i64 %idxprom19 %44 = load i32*, i32** %arrayidx20, align 8 %45 = load i32, i32* %j, align 4 %idxprom21 = sext i32 %45 to i64 %arrayidx22 = getelementptr inbounds i32, i32* %44, i64 %idxprom21 store i32 %rem, i32* %arrayidx22, align 4 br label %for.inc23 for.inc23: ; preds = %for.body17 %46 = load i32, i32* %j, align 4 %inc24 = add nsw i32 %46, 1 store i32 %inc24, i32* %j, align 4 br label %for.cond15 for.end25: ; preds = %for.cond15 br label %for.inc26 for.inc26: ; preds = %for.end25 %47 = load i32, i32* %i, align 4 %inc27 = add nsw i32 %47, 1 store i32 %inc27, i32* %i, align 4 br label %for.cond12 for.end28: ; preds = %for.cond12 ret void } ; Function Attrs: nounwind readonly declare dso_local i32 @atoi(i8*) #1 declare dso_local i32 @printf(i8*, ...) #2 ; Function Attrs: noreturn nounwind declare dso_local void @exit(i32) #3 ; Function Attrs: nounwind readnone speculatable willreturn declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #4 ; Function Attrs: nobuiltin declare dso_local noalias i8* @_Znam(i64) #5 ; Function Attrs: nounwind declare dso_local void @srand(i32) #6 ; Function Attrs: nounwind declare dso_local i32 @rand() #6 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z5fatalPc(i8* %s) #0 { entry: %s.addr = alloca i8*, align 8 store i8* %s, i8** %s.addr, align 8 %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %1 = load i8*, i8** %s.addr, align 8 %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.1, i64 0, i64 0), i8* %1) ret void } declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #2 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z14dynproc_kerneliPiS_S_iiii(i32 %iteration, i32* %gpuWall, i32* %gpuSrc, i32* %gpuResults, i32 %cols, i32 %rows, i32 %startStep, i32 %border) #0 { entry: %iteration.addr = alloca i32, align 4 %gpuWall.addr = alloca i32*, align 8 %gpuSrc.addr = alloca i32*, align 8 %gpuResults.addr = alloca i32*, align 8 %cols.addr = alloca i32, align 4 %rows.addr = alloca i32, align 4 %startStep.addr = alloca i32, align 4 %border.addr = alloca i32, align 4 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store i32 %iteration, i32* %iteration.addr, align 4 store i32* %gpuWall, i32** %gpuWall.addr, align 8 store i32* %gpuSrc, i32** %gpuSrc.addr, align 8 store i32* %gpuResults, i32** %gpuResults.addr, align 8 store i32 %cols, i32* %cols.addr, align 4 store i32 %rows, i32* %rows.addr, align 4 store i32 %startStep, i32* %startStep.addr, align 4 store i32 %border, i32* %border.addr, align 4 %kernel_args = alloca i8*, i64 8, align 16 %0 = bitcast i32* %iteration.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast i32** %gpuWall.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast i32** %gpuSrc.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = bitcast i32** %gpuResults.addr to i8* %7 = getelementptr i8*, i8** %kernel_args, i32 3 store i8* %6, i8** %7 %8 = bitcast i32* %cols.addr to i8* %9 = getelementptr i8*, i8** %kernel_args, i32 4 store i8* %8, i8** %9 %10 = bitcast i32* %rows.addr to i8* %11 = getelementptr i8*, i8** %kernel_args, i32 5 store i8* %10, i8** %11 %12 = bitcast i32* %startStep.addr to i8* %13 = getelementptr i8*, i8** %kernel_args, i32 6 store i8* %12, i8** %13 %14 = bitcast i32* %border.addr to i8* %15 = getelementptr i8*, i8** %kernel_args, i32 7 store i8* %14, i8** %15 %16 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %17 = load i64, i64* %shmem_size, align 8 %18 = load i8*, i8** %stream, align 8 %19 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %20 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %19, i8* align 8 %20, i64 12, i1 false) %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %22 = load i64, i64* %21, align 8 %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %24 = load i32, i32* %23, align 8 %25 = bitcast { i64, i32 }* %block_dim.coerce to i8* %26 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %25, i8* align 8 %26, i64 12, i1 false) %27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %28 = load i64, i64* %27, align 8 %29 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %30 = load i32, i32* %29, align 8 %31 = bitcast i8* %18 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32, i32*, i32*, i32*, i32, i32, i32, i32)* @_Z14dynproc_kerneliPiS_S_iiii to i8*), i64 %22, i32 %24, i64 %28, i32 %30, i8** %kernel_args, i64 %17, %struct.CUstream_st* %31) br label %setup.end setup.end: ; preds = %entry ret void } declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 ; Function Attrs: noinline optnone uwtable define dso_local i32 @_Z9calc_pathPiPS_iiiii(i32* %gpuWall, i32** %gpuResult, i32 %rows, i32 %cols, i32 %pyramid_height, i32 %blockCols, i32 %borderCols) #0 { entry: %gpuWall.addr = alloca i32*, align 8 %gpuResult.addr = alloca i32**, align 8 %rows.addr = alloca i32, align 4 %cols.addr = alloca i32, align 4 %pyramid_height.addr = alloca i32, align 4 %blockCols.addr = alloca i32, align 4 %borderCols.addr = alloca i32, align 4 %dimBlock = alloca %struct.dim3, align 4 %dimGrid = alloca %struct.dim3, align 4 %src = alloca i32, align 4 %dst = alloca i32, align 4 %t = alloca i32, align 4 %temp = alloca i32, align 4 %agg.tmp = alloca %struct.dim3, align 4 %agg.tmp1 = alloca %struct.dim3, align 4 %agg.tmp.coerce = alloca { i64, i32 }, align 4 %agg.tmp1.coerce = alloca { i64, i32 }, align 4 store i32* %gpuWall, i32** %gpuWall.addr, align 8 store i32** %gpuResult, i32*** %gpuResult.addr, align 8 store i32 %rows, i32* %rows.addr, align 4 store i32 %cols, i32* %cols.addr, align 4 store i32 %pyramid_height, i32* %pyramid_height.addr, align 4 store i32 %blockCols, i32* %blockCols.addr, align 4 store i32 %borderCols, i32* %borderCols.addr, align 4 call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimBlock, i32 256, i32 1, i32 1) %0 = load i32, i32* %blockCols.addr, align 4 call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimGrid, i32 %0, i32 1, i32 1) store i32 1, i32* %src, align 4 store i32 0, i32* %dst, align 4 store i32 0, i32* %t, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry %1 = load i32, i32* %t, align 4 %2 = load i32, i32* %rows.addr, align 4 %sub = sub nsw i32 %2, 1 %cmp = icmp slt i32 %1, %sub br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %3 = load i32, i32* %src, align 4 store i32 %3, i32* %temp, align 4 %4 = load i32, i32* %dst, align 4 store i32 %4, i32* %src, align 4 %5 = load i32, i32* %temp, align 4 store i32 %5, i32* %dst, align 4 %6 = bitcast %struct.dim3* %agg.tmp to i8* %7 = bitcast %struct.dim3* %dimGrid to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %6, i8* align 4 %7, i64 12, i1 false) %8 = bitcast %struct.dim3* %agg.tmp1 to i8* %9 = bitcast %struct.dim3* %dimBlock to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %8, i8* align 4 %9, i64 12, i1 false) %10 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* %11 = bitcast %struct.dim3* %agg.tmp to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %10, i8* align 4 %11, i64 12, i1 false) %12 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 %13 = load i64, i64* %12, align 4 %14 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 %15 = load i32, i32* %14, align 4 %16 = bitcast { i64, i32 }* %agg.tmp1.coerce to i8* %17 = bitcast %struct.dim3* %agg.tmp1 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %16, i8* align 4 %17, i64 12, i1 false) %18 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp1.coerce, i32 0, i32 0 %19 = load i64, i64* %18, align 4 %20 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp1.coerce, i32 0, i32 1 %21 = load i32, i32* %20, align 4 %call = call i32 @__cudaPushCallConfiguration(i64 %13, i32 %15, i64 %19, i32 %21, i64 0, i8* null) %tobool = icmp ne i32 %call, 0 br i1 %tobool, label %kcall.end, label %kcall.configok kcall.configok: ; preds = %for.body %22 = load i32, i32* %pyramid_height.addr, align 4 %23 = load i32, i32* %rows.addr, align 4 %24 = load i32, i32* %t, align 4 %sub2 = sub nsw i32 %23, %24 %sub3 = sub nsw i32 %sub2, 1 %cmp4 = icmp sle i32 %22, %sub3 br i1 %cmp4, label %cond.true, label %cond.false cond.true: ; preds = %kcall.configok %25 = load i32, i32* %pyramid_height.addr, align 4 br label %cond.end cond.false: ; preds = %kcall.configok %26 = load i32, i32* %rows.addr, align 4 %27 = load i32, i32* %t, align 4 %sub5 = sub nsw i32 %26, %27 %sub6 = sub nsw i32 %sub5, 1 br label %cond.end cond.end: ; preds = %cond.false, %cond.true %cond = phi i32 [ %25, %cond.true ], [ %sub6, %cond.false ] %28 = load i32*, i32** %gpuWall.addr, align 8 %29 = load i32**, i32*** %gpuResult.addr, align 8 %30 = load i32, i32* %src, align 4 %idxprom = sext i32 %30 to i64 %arrayidx = getelementptr inbounds i32*, i32** %29, i64 %idxprom %31 = load i32*, i32** %arrayidx, align 8 %32 = load i32**, i32*** %gpuResult.addr, align 8 %33 = load i32, i32* %dst, align 4 %idxprom7 = sext i32 %33 to i64 %arrayidx8 = getelementptr inbounds i32*, i32** %32, i64 %idxprom7 %34 = load i32*, i32** %arrayidx8, align 8 %35 = load i32, i32* %cols.addr, align 4 %36 = load i32, i32* %rows.addr, align 4 %37 = load i32, i32* %t, align 4 %38 = load i32, i32* %borderCols.addr, align 4 call void @_Z14dynproc_kerneliPiS_S_iiii(i32 %cond, i32* %28, i32* %31, i32* %34, i32 %35, i32 %36, i32 %37, i32 %38) br label %kcall.end kcall.end: ; preds = %cond.end, %for.body %call9 = call i32 @cudaDeviceSynchronize() br label %for.inc for.inc: ; preds = %kcall.end %39 = load i32, i32* %pyramid_height.addr, align 4 %40 = load i32, i32* %t, align 4 %add = add nsw i32 %40, %39 store i32 %add, i32* %t, align 4 br label %for.cond for.end: ; preds = %for.cond %41 = load i32, i32* %dst, align 4 ret i32 %41 } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #8 comdat align 2 { entry: %this.addr = alloca %struct.dim3*, align 8 %vx.addr = alloca i32, align 4 %vy.addr = alloca i32, align 4 %vz.addr = alloca i32, align 4 store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 store i32 %vx, i32* %vx.addr, align 4 store i32 %vy, i32* %vy.addr, align 4 store i32 %vz, i32* %vz.addr, align 4 %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 %0 = load i32, i32* %vx.addr, align 4 store i32 %0, i32* %x, align 4 %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 %1 = load i32, i32* %vy.addr, align 4 store i32 %1, i32* %y, align 4 %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 %2 = load i32, i32* %vz.addr, align 4 store i32 %2, i32* %z, align 4 ret void } declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #2 declare dso_local i32 @cudaDeviceSynchronize() #2 ; Function Attrs: noinline norecurse optnone uwtable define dso_local i32 @main(i32 %argc, i8** %argv) #9 { entry: %retval = alloca i32, align 4 %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 store i32 0, i32* %retval, align 4 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %call = call i32 @cudaSetDevice(i32 0) %0 = load i32, i32* %argc.addr, align 4 %1 = load i8**, i8*** %argv.addr, align 8 call void @_Z3runiPPc(i32 %0, i8** %1) ret i32 0 } declare dso_local i32 @cudaSetDevice(i32) #2 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z3runiPPc(i32 %argc, i8** %argv) #0 { entry: %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 %borderCols = alloca i32, align 4 %smallBlockCol = alloca i32, align 4 %blockCols = alloca i32, align 4 %gpuWall = alloca i32*, align 8 %gpuResult = alloca [2 x i32*], align 16 %size = alloca i32, align 4 %final_ret = alloca i32, align 4 %i = alloca i32, align 4 %i32 = alloca i32, align 4 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %0 = load i32, i32* %argc.addr, align 4 %1 = load i8**, i8*** %argv.addr, align 8 call void @_Z4initiPPc(i32 %0, i8** %1) %2 = load i32, i32* @pyramid_height, align 4 %mul = mul nsw i32 %2, 1 store i32 %mul, i32* %borderCols, align 4 %3 = load i32, i32* @pyramid_height, align 4 %mul1 = mul nsw i32 %3, 1 %mul2 = mul nsw i32 %mul1, 2 %sub = sub nsw i32 256, %mul2 store i32 %sub, i32* %smallBlockCol, align 4 %4 = load i32, i32* @cols, align 4 %5 = load i32, i32* %smallBlockCol, align 4 %div = sdiv i32 %4, %5 %6 = load i32, i32* @cols, align 4 %7 = load i32, i32* %smallBlockCol, align 4 %rem = srem i32 %6, %7 %cmp = icmp eq i32 %rem, 0 %8 = zext i1 %cmp to i64 %cond = select i1 %cmp, i32 0, i32 1 %add = add nsw i32 %div, %cond store i32 %add, i32* %blockCols, align 4 %9 = load i32, i32* @pyramid_height, align 4 %10 = load i32, i32* @cols, align 4 %11 = load i32, i32* %borderCols, align 4 %12 = load i32, i32* %blockCols, align 4 %13 = load i32, i32* %smallBlockCol, align 4 %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([92 x i8], [92 x i8]* @.str.2, i64 0, i64 0), i32 %9, i32 %10, i32 %11, i32 256, i32 %12, i32 %13) %14 = load i32, i32* @rows, align 4 %15 = load i32, i32* @cols, align 4 %mul3 = mul nsw i32 %14, %15 store i32 %mul3, i32* %size, align 4 %arrayidx = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0 %16 = bitcast i32** %arrayidx to i8** %17 = load i32, i32* @cols, align 4 %conv = sext i32 %17 to i64 %mul4 = mul i64 4, %conv %call5 = call i32 @cudaMalloc(i8** %16, i64 %mul4) %arrayidx6 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 1 %18 = bitcast i32** %arrayidx6 to i8** %19 = load i32, i32* @cols, align 4 %conv7 = sext i32 %19 to i64 %mul8 = mul i64 4, %conv7 %call9 = call i32 @cudaMalloc(i8** %18, i64 %mul8) %arrayidx10 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0 %20 = load i32*, i32** %arrayidx10, align 16 %21 = bitcast i32* %20 to i8* %22 = load i32*, i32** @data, align 8 %23 = bitcast i32* %22 to i8* %24 = load i32, i32* @cols, align 4 %conv11 = sext i32 %24 to i64 %mul12 = mul i64 4, %conv11 %call13 = call i32 @cudaMemcpy(i8* %21, i8* %23, i64 %mul12, i32 1) %25 = bitcast i32** %gpuWall to i8** %26 = load i32, i32* %size, align 4 %27 = load i32, i32* @cols, align 4 %sub14 = sub nsw i32 %26, %27 %conv15 = sext i32 %sub14 to i64 %mul16 = mul i64 4, %conv15 %call17 = call i32 @cudaMalloc(i8** %25, i64 %mul16) %28 = load i32*, i32** %gpuWall, align 8 %29 = bitcast i32* %28 to i8* %30 = load i32*, i32** @data, align 8 %31 = load i32, i32* @cols, align 4 %idx.ext = sext i32 %31 to i64 %add.ptr = getelementptr inbounds i32, i32* %30, i64 %idx.ext %32 = bitcast i32* %add.ptr to i8* %33 = load i32, i32* %size, align 4 %34 = load i32, i32* @cols, align 4 %sub18 = sub nsw i32 %33, %34 %conv19 = sext i32 %sub18 to i64 %mul20 = mul i64 4, %conv19 %call21 = call i32 @cudaMemcpy(i8* %29, i8* %32, i64 %mul20, i32 1) %35 = load i32*, i32** %gpuWall, align 8 %arraydecay = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0 %36 = load i32, i32* @rows, align 4 %37 = load i32, i32* @cols, align 4 %38 = load i32, i32* @pyramid_height, align 4 %39 = load i32, i32* %blockCols, align 4 %40 = load i32, i32* %borderCols, align 4 %call22 = call i32 @_Z9calc_pathPiPS_iiiii(i32* %35, i32** %arraydecay, i32 %36, i32 %37, i32 %38, i32 %39, i32 %40) store i32 %call22, i32* %final_ret, align 4 %41 = load i32*, i32** @result, align 8 %42 = bitcast i32* %41 to i8* %43 = load i32, i32* %final_ret, align 4 %idxprom = sext i32 %43 to i64 %arrayidx23 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 %idxprom %44 = load i32*, i32** %arrayidx23, align 8 %45 = bitcast i32* %44 to i8* %46 = load i32, i32* @cols, align 4 %conv24 = sext i32 %46 to i64 %mul25 = mul i64 4, %conv24 %call26 = call i32 @cudaMemcpy(i8* %42, i8* %45, i64 %mul25, i32 2) store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry %47 = load i32, i32* %i, align 4 %48 = load i32, i32* @cols, align 4 %cmp27 = icmp slt i32 %47, %48 br i1 %cmp27, label %for.body, label %for.end for.body: ; preds = %for.cond %49 = load i32*, i32** @data, align 8 %50 = load i32, i32* %i, align 4 %idxprom28 = sext i32 %50 to i64 %arrayidx29 = getelementptr inbounds i32, i32* %49, i64 %idxprom28 %51 = load i32, i32* %arrayidx29, align 4 %call30 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.3, i64 0, i64 0), i32 %51) br label %for.inc for.inc: ; preds = %for.body %52 = load i32, i32* %i, align 4 %inc = add nsw i32 %52, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond %call31 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i64 0, i64 0)) store i32 0, i32* %i32, align 4 br label %for.cond33 for.cond33: ; preds = %for.inc39, %for.end %53 = load i32, i32* %i32, align 4 %54 = load i32, i32* @cols, align 4 %cmp34 = icmp slt i32 %53, %54 br i1 %cmp34, label %for.body35, label %for.end41 for.body35: ; preds = %for.cond33 %55 = load i32*, i32** @result, align 8 %56 = load i32, i32* %i32, align 4 %idxprom36 = sext i32 %56 to i64 %arrayidx37 = getelementptr inbounds i32, i32* %55, i64 %idxprom36 %57 = load i32, i32* %arrayidx37, align 4 %call38 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.3, i64 0, i64 0), i32 %57) br label %for.inc39 for.inc39: ; preds = %for.body35 %58 = load i32, i32* %i32, align 4 %inc40 = add nsw i32 %58, 1 store i32 %inc40, i32* %i32, align 4 br label %for.cond33 for.end41: ; preds = %for.cond33 %call42 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i64 0, i64 0)) %59 = load i32*, i32** %gpuWall, align 8 %60 = bitcast i32* %59 to i8* %call43 = call i32 @cudaFree(i8* %60) %arrayidx44 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0 %61 = load i32*, i32** %arrayidx44, align 16 %62 = bitcast i32* %61 to i8* %call45 = call i32 @cudaFree(i8* %62) %arrayidx46 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 1 %63 = load i32*, i32** %arrayidx46, align 8 %64 = bitcast i32* %63 to i8* %call47 = call i32 @cudaFree(i8* %64) %65 = load i32*, i32** @data, align 8 %isnull = icmp eq i32* %65, null br i1 %isnull, label %delete.end, label %delete.notnull delete.notnull: ; preds = %for.end41 %66 = bitcast i32* %65 to i8* call void @_ZdaPv(i8* %66) #15 br label %delete.end delete.end: ; preds = %delete.notnull, %for.end41 %67 = load i32**, i32*** @wall, align 8 %isnull48 = icmp eq i32** %67, null br i1 %isnull48, label %delete.end50, label %delete.notnull49 delete.notnull49: ; preds = %delete.end %68 = bitcast i32** %67 to i8* call void @_ZdaPv(i8* %68) #15 br label %delete.end50 delete.end50: ; preds = %delete.notnull49, %delete.end %69 = load i32*, i32** @result, align 8 %isnull51 = icmp eq i32* %69, null br i1 %isnull51, label %delete.end53, label %delete.notnull52 delete.notnull52: ; preds = %delete.end50 %70 = bitcast i32* %69 to i8* call void @_ZdaPv(i8* %70) #15 br label %delete.end53 delete.end53: ; preds = %delete.notnull52, %delete.end50 ret void } declare dso_local i32 @cudaMalloc(i8**, i64) #2 declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #2 declare dso_local i32 @cudaFree(i8*) #2 ; Function Attrs: nobuiltin nounwind declare dso_local void @_ZdaPv(i8*) #10 define internal void @__cuda_register_globals(i8** %0) { entry: %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32, i32*, i32*, i32*, i32, i32, i32, i32)* @_Z14dynproc_kerneliPiS_S_iiii to i8*), i8* getelementptr inbounds ([30 x i8], [30 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([30 x i8], [30 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) ret void } declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) declare dso_local i8** @__cudaRegisterFatBinary(i8*) define internal void @__cuda_module_ctor(i8* %0) { entry: %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) store i8** %1, i8*** @__cuda_gpubin_handle, align 8 call void @__cuda_register_globals(i8** %1) call void @__cudaRegisterFatBinaryEnd(i8** %1) %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) ret void } declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) declare dso_local void @__cudaUnregisterFatBinary(i8**) define internal void @__cuda_module_dtor(i8* %0) { entry: %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 call void @__cudaUnregisterFatBinary(i8** %1) ret void } declare dso_local i32 @atexit(void (i8*)*) attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #4 = { nounwind readnone speculatable willreturn } attributes #5 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #6 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #7 = { argmemonly nounwind willreturn } attributes #8 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #9 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #10 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #11 = { nounwind readonly } attributes #12 = { noreturn nounwind } attributes #13 = { builtin } attributes #14 = { nounwind } attributes #15 = { builtin nounwind } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} !1 = !{i32 1, !"wchar_size", i32 4} !2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}