; ModuleID = 'lud_kernel-host-x86_64-unknown-linux-gnu.bc' source_filename = "cuda/lud_kernel.cu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %struct.dim3 = type { i32, i32, i32 } %struct.CUstream_st = type opaque $_ZN4dim3C2Ejjj = comdat any @0 = private unnamed_addr constant [21 x i8] c"_Z12lud_diagonalPfii\00", align 1 @1 = private unnamed_addr constant [22 x i8] c"_Z13lud_perimeterPfii\00", align 1 @2 = private unnamed_addr constant [21 x i8] c"_Z12lud_internalPfii\00", align 1 @3 = private constant [51057 x i8] c"P\EDU\BA\01\00\10\00`\C7\00\00\00\00\00\00\02\00\01\01@\00\00\00\E8\AE\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\002\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00@\AE\00\00\00\00\00\00\C0\A9\00\00\00\00\00\002\052\00@\008\00\03\00@\00\12\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z12lud_internalPfii\00.nv.info._Z12lud_internalPfii\00.nv.shared._Z12lud_internalPfii\00.nv.global\00.nv.constant0._Z12lud_internalPfii\00.text._Z13lud_perimeterPfii\00.nv.info._Z13lud_perimeterPfii\00.nv.shared._Z13lud_perimeterPfii\00.nv.constant0._Z13lud_perimeterPfii\00.text._Z12lud_diagonalPfii\00.nv.info._Z12lud_diagonalPfii\00.nv.shared._Z12lud_diagonalPfii\00.nv.constant0._Z12lud_diagonalPfii\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z12lud_internalPfii\00.text._Z12lud_internalPfii\00.nv.info._Z12lud_internalPfii\00.nv.shared._Z12lud_internalPfii\00.nv.global\00threadIdx\00blockIdx\00$___ZZ12lud_internalPfiiE8peri_row__905\00$___ZZ12lud_internalPfiiE8peri_col__907\00.nv.constant0._Z12lud_internalPfii\00_param\00_Z13lud_perimeterPfii\00.text._Z13lud_perimeterPfii\00.nv.info._Z13lud_perimeterPfii\00.nv.shared._Z13lud_perimeterPfii\00$_Z13lud_perimeterPfii$__cuda_sm3x_div_rn_noftz_f32\00$_Z13lud_perimeterPfii$__cuda_sm3x_div_rn_noftz_f32_slowpath\00$___ZZ13lud_perimeterPfiiE3dia__430\00$___ZZ13lud_perimeterPfiiE8peri_row__432\00$___ZZ13lud_perimeterPfiiE8peri_col__434\00.nv.constant0._Z13lud_perimeterPfii\00_Z12lud_diagonalPfii\00.text._Z12lud_diagonalPfii\00.nv.info._Z12lud_diagonalPfii\00.nv.shared._Z12lud_diagonalPfii\00$_Z12lud_diagonalPfii$__cuda_sm3x_div_rn_noftz_f32\00$_Z12lud_diagonalPfii$__cuda_sm3x_div_rn_noftz_f32_slowpath\00$___ZZ12lud_diagonalPfiiE6shadow__186\00.nv.constant0._Z12lud_diagonalPfii\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00G\00\00\00\03\00\0B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\80\00\00\00\03\00\0E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A0\00\00\00\03\00\0F\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\AB\00\00\00\01\00\0F\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\B5\00\00\00\01\00\0F\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\0E\01\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00N\01\00\00\03\00\0C\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\89\01\00\00\03\00\10\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\AA\01\00\00\22\00\0C\00PN\00\00\00\00\00\00`\01\00\00\00\00\00\00\DE\01\00\00\22\00\0C\00\B0O\00\00\00\00\00\00P\08\00\00\00\00\00\00\91\02\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\CA\02\00\00\03\00\0D\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\03\00\00\03\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00#\03\00\00\22\00\0D\00\E0$\00\00\00\00\00\00`\01\00\00\00\00\00\00V\03\00\00\22\00\0D\00@&\00\00\00\00\00\00@\08\00\00\00\00\00\00\B8\03\00\00\03\00\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\0B\00\00\00\00\00\00\00\00\00@\15\00\00\00\00\00\008\01\00\00\12\10\0C\00\00\00\00\00\00\00\00\00\00X\00\00\00\00\00\00\B5\02\00\00\12\10\0D\00\00\00\00\00\00\00\00\00\80.\00\00\00\00\00\00\04/\08\00\13\00\00\00\11\00\00\00\04#\08\00\0F\00\00\00\00\00\00\00\04\12\08\00\0F\00\00\00\00\00\00\00\04\11\08\00\0F\00\00\00\00\00\00\00\04#\08\00\0E\00\00\00\00\00\00\00\04\12\08\00\0E\00\00\00\00\00\00\00\04\11\08\00\0E\00\00\00\00\00\00\00\04#\08\00\13\00\00\00\00\00\00\00\04\12\08\00\13\00\00\00 \00\00\00\04\11\08\00\13\00\00\00 \00\00\00\04/\08\00\12\00\00\00\11\00\00\00\04#\08\00\0A\00\00\00\00\00\00\00\04\12\08\00\0A\00\00\00\00\00\00\00\04\11\08\00\0A\00\00\00\00\00\00\00\04#\08\00\09\00\00\00\00\00\00\00\04\12\08\00\09\00\00\00\00\00\00\00\04\11\08\00\09\00\00\00\00\00\00\00\04#\08\00\12\00\00\00\00\00\00\00\04\12\08\00\12\00\00\00 \00\00\00\04\11\08\00\12\00\00\00 \00\00\00\04/\08\00\11\00\00\00\0E\00\00\00\04#\08\00\11\00\00\00\00\00\00\00\04\12\08\00\11\00\00\00 \00\00\00\04\11\08\00\11\00\00\00 \00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\01\10\00\03\19\10\00\04\17\0C\00\00\00\00\00\02\00\0C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0\11\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00\08\03\00\00\F8\03\00\00\04\1C\04\000\15\00\00\04\1E\04\00 \00\00\00\010\00\00\01*\00\00\04\0A\08\00\0B\00\00\00@\01\10\00\03\19\10\00\04\17\0C\00\00\00\00\00\02\00\0C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0\11\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\10\00\B8\0E\00\00(\1D\00\00XB\00\00(F\00\00\04\1C\04\00HN\00\00\04\1E\04\00\90\00\00\00\010\00\00\01*\00\00\04\0A\08\00\10\00\00\00@\01\10\00\03\19\10\00\04\17\0C\00\00\00\00\00\02\00\0C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0\11\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1C\04\00\D8$\00\00\04\1E\04\00\90\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveB5\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F6\14visible .entry _Z12lud_diagonalPfii\9D\04\00\97\00\0F\22\00\01\0E{\04\00\93\00\0F*\00\08\1F1*\00\0F\0F\8B\0C\1B\1F6\E7\07\18\95pred %p<8\A0\03\10fA\01[f<16>\C3\03-56\C4\03 67\C5\03P\09.shaH\00\03\93\00\124\93\00\1FZ\C9\00\00\C0E6shadow[102t\03\0F\00\04\08\1F6\00\04\1C\0F8\01\01\0F\EA\0C\0B\0F\94\01\01\0F\12\03\0C\0F\F1\01\09\13]\B1\00#to\FB\12\07\A0\03\02\F9\02\01\F8\0F\0A\1C\00\183\FF\02\0B+\03\1F3!\0D\1A&ld\FC\02\04\1B\00\07\16\00$4,F\00\A1;\0Amul.lo.s\19\00\225,4\00\83%r4;\0Aadd\17\00%6,\1C\00\0Ar\00\03\86\03\186o\03O7, 0\C2\03\02\F2\047;\0Abra.uni LBB6_1;\0A\08\00\16:\9A\00%8,3\00\92;\0Asetp.gt\85\002p1, \00\C215;\0A@%p1 braI\00\1B4Y\00\132Y\00\122Y\00\02]\01455,Q\01\08\0A\01\04q\00\06\82\02\02\18\00\989, %tid.x\08\01350,4\00\00\22\00\01\C4\01\00_\00\034\00\22d5(\01\00\FE\00$hl\18\03457,\1C\00\132L\00\03\19\00$8,\98\00\01'\00\01\96\00\02q\03\111\AC\00\00#\00\02\AC\00\13s`\00\199\1E\01\06d\00460, \00\176H\03_rd61,|\03\0B\03\88\02\02\B4\03\056\00\02\A5\02)61\B0\00863,\1D\00\1D0\FF\00$64\18\01\08\9B\00$5,\1C\00\0A\FF\00866,V\00\1153\02\11f3\02\00\1D\00!],\08\01\07\AB\01'51\B6\02\06\16\00\182\C1\01\07\AB\01#3,\1E\00\00:\00\0F\B6\02\02+538\02\1338\02\173\91\02(54t\01\07`\00#5,\1E\00\1F1\ED\02\02/55\EE\02\04\D84:\0Abar.sync 0s\02\1F98\03\05\1B9J\00\135J\00\175\A7\00/109\03\07#2,!\00!14:\03\162:\03+20\\\00\136\\\00(6:\17\0C\166\0D\03\05y\01-27r\00\22le\1C\004p4,6\001r27t\00\164t\00+12t\00\137t\00\197t\00\1D8\0F\01\132|\0C\1B8@\00\138@\00\178\10\01\142+\03\192\EF\03\1F3'\01\00\13e'\01#5,8\00\00'\00\01\B5\00\175\B5\00\1B1u\00\139u\00\189\B5\00&45)\01\0B\1D\03\114\BF\04)45\9F\03/42\9F\03!\1243\03)42i\03444,h\00\196i\03\01\95\00\066\00\184R\04(46'\01\08N\00\02\B6\04\1D4\B6\04\02\FD\04\05U\00\09\B6\04\120\B6\04+48L\00\159L\00\0A\9A\00)50\9A\00\199\EC\04\1F1\EC\04\04\02\D3\03\01 \00\0BP\05'3,U\00(52\9A\00\131P\05*3]3\00\184\CD\00\0A3\00\1323\00\00W\044neg\17\00!3,\EA\00\84;\0Afma.rn\17\00&4,\1D\00\111\06\00\192\D1\04\2254\D1\04\0C\AB\06$10\05\07\170\BE\02\0A\97\01\06u\04\02\96\01\00\1E\00\1F1\1B\03\02/47\1B\03\04711:\\\01/16\\\01\04417, \00\0AH\06/18\A9\02!\121\F9\01)18u\01820,\1D\00\197\82\00%21\82\00\0Ax\02722,=\00'21\AB\01\03\AA\01)22\C2\07/31\8C\03\06\112*\02*31z\00$4,\1C\00\0A\A6\02(25\B0\00*24\1D\00'6,$\00\09\97\00\133\97\00\00d\016div-\02\224,\1E\00*%f$\02\122\F5\06\0D#\02\04\CF\08/12;\06\05/32\A1\05\00/33\A1\05\07$6,6\001r33\EC\04\176\EC\04\0Ca\05\141\19\07(13\EE\04/34\A3\05\05\1D3\C4\00\144B\00\174\E7\02/35\A5\05\03\1F6s\07\03337,\1E\00\1C1\BC\05#7,O\00\00&\00\01\D0\00\177\D0\00\0Dy\0A\04B\07\181C\07/40w\00\03\03\BA\05\00@\07\02J\024s64J\03\122\91\03\191-\03\1F2-\03\22\132`\02\198\96\02\00\B1\06\03h\00\0A\96\02\01\DF\02\056\00(30<\05/32\D6\05\04\00\13\02\03 \00\0A^\03\00\CB\01\06U\00'33\C7\02\03\8B\0A+34K\00\155K\00\0B\99\00\196\99\00\195V\0B\1F2\94\03\06\02\DD\01\0B\D4\06438,\1C\00\0B\B0\00'9,l\00'38\B0\00\136\B0\00\1A9\EB\05)40\E2\00\082\00\137\B7\06\190\EA\05\00{\00+f5\E8\05$9,\1A\00\00z\00)f7\C0\03\114\FD\02\1Cf\AF\09\141T\09(16n\02\1F3\E3\05\04\02E\0B\1F3\E3\05\06\0F[\03\07+7:\1A\00\05\FE\05\1A89\04\0A@\00\04\CA\08(19\9B\03\1F8\84\03\04\02 \09\1F8\F7\0A\05\1F3\AD\0A\05\182\E0\06(11\97\0E\06^\00\00u\07\02\1E\00'1;.\00\1F3\AF\0E\05\01w\07\046\00\193J\00&5, \00\0F\FF\0B\03\191\9B\02/16\D3\00\05,16_\0E\04\CB\0B\182\BA\0E/17\82\0B\07#3,!\00\03\BC\0E\173\82\0B\0D\BD\0E\05\BE\0E\182\B4\07\1F4\0F\09\05\02\C2\08\0D\F9\0D\0F\83\04!\02\FE\07\0A\CF\03&8,\1A\00\0A1\01\1F8\CC\03\06\01$\02\1A1\C9\04\00\1D\0A\02\1B\00\0A\CA\03\01.\09\04g\00(10\82\09\02\97\03\131\7F\11\04T\08\1F2\C7\0F\01/19\06\0E\03\02/\08\151\91\00\0B\A9\00\01\B1\09:r20\AA\00$4,\1C\00\0B\AB\00$5,\82\00\01'\00\09\FD\03\2215\E1\09\08\22\0D\0F\B1\0E\02/22\AB\00\04#3,\1E\00\00:\00\0F\B2\02\02\1C2\B1\0E$23,\02\08\B3\0E\1F2\B3\0E\04\02\A5\08\1F2\B3\0E\06/25\E9\02\06/4:\1A\15\0A\103g\02\9Eperimeter\1B\15\0E#\00\0F\1C\15\05\0F+\00\0A\1F1+\00\0C\0F\1E\15\1F\1F7\1E\15 ,14\1F\15,20\1F\15=135 \15?122!\15\0B\0E\CD\00TE3dia\1F\15\0F:\00\1B\108\0F\00O_row?\00)?col\9D\15\10\1F7\9D\15\1F\0E\B8\01\0F\9E\15\0E\0E\16\02\0F\9F\15\0F\0Eu\02\0F\A0\15t\09\9D\18\04F\05\044\06\16u\EF\14\1E3\EF\14:7_1\C3\07\137[\1297_1\9E\0B\07\C2\14\09L\04\158\A9\07\0E\FB\0A\0B'\16\1F1y\07\05$42\14\0B\02\FE\0A\07\C0\08'3, \00\0F\C7\04\02\194t\1E/44\F8\12\05\0B\EB\08\137\D8\1587_2H\09\1F5w\07\07#4,!\00\0B\84\12;7_5Z\00\133Z\00(3:k\06/10l\06\02(27\C2\05\07\18\00\05L\16\1B8\D6\08$9,8\00\01'\00\0DG\0C\121\06\07*12<\15\141k\06.117\07%3,\A4\00\02+\00\08;\07\149<\07\193A\16/11G\08\05\141\DD\06\1D1K\08\101\1B\09\0F\A0\04\08\0F\FB\0F\03\04K\10+16\B8\00*8, \00\1F5\0E\01\00\030\01\0BB\08\049\10\1E1E\08\047\10\04^\00*20\9D\07\2221\9E\07\199W\0A\1F0\A0\07\01?131\A1\07\03\1210\0F\2213\AC\01\1F3\DD\02\03;132\\\02\134\\\02\08\FA\0E?133\A4\07\03\121\8A\0D\01 \00\0F\8F\0A\04\1F3\17\03\05\1E5B\12\0D\D2\03\1F7\D2\03\06\148[\12\02G\12\07\D2\03'9, \00\0F\D2\03\03\189\D2\03/50\D2\03\05\1C5\BA\04\136\BB\00.6:\E8\17\0F\D2\03\01\02u\17\151\16\05\165\D3\03\1B9[\00\137[\00\187\D3\03/98\D2\03\02/15\06\1A\03\01\FB\02C%ctaG\05#hl\E1\07\03\BF\02\01 \00\09*\1B\03\82\02\05 \00\195g\00\1F9!\04\05(0,<\00\1A9\1D\00$1,$\00.16l\0B\03,\03\1B1n\0B\03o\0B\0E+\03401,\0A\01\02)\00\088\04\1388\04*018\04/028\04\05503,\22\00\0B8\04?04,\9E\08\0E\0F=\04\03\130\8D\04+04\BC\00*6, \00\1E3=\04%07E\01\09\A8\00\04\F0\00\1E7\12\01*9,^\00\1A8=\04\2209=\04\198\ED\05\1F2=\04\02/23=\04\04\02_\14\121\E6\0B?122=\04\03\1C2\DC\03\138\C6\02\08\DC\18?125=\04\04\02\8E\14\01 \00\0F=\04\04/26\82\03\04+9:T\08\149U\08\190\E2\18\0F\17\1D\02\12,\1A\00?-16l\08\02\08\05\03\1F6\FA\0F\03\227,\1C\00\08d\01\1F8\AC\04\05$9,2\00\198\FF\00$0,\1D\00\0F\A7\04\03*10\DD\03\01o\00\0F&\01\01\1C1a\09\05b\09\09\F2\0F\1F2t\1B\0D\152\A9\04\172\BF\09\0C\E4\01$12^\00\09\B1\1E/16\AC\04\01/31}\08\02/32[\04\03\02\F2\13\02\B3\06.32.\03\147\03\16\08,\03\03+\03\0E*\03\159\9D\17\03\D7\17\0C\84\17)198\04/207\04\04%21\CE\17\0A5\04/22l\08\1F\132\98\0B\0B1\18(4,\1D\00\0F\BC\15\00\145\15\01\08\99\00\06\D0\17\0BO\00(7,V\00\1A6\BA\17\117\22\04\09y!\1F4 \04\01/35\1F\04\03'36\C1\16\1F4\BD\02\02\1C3\BA\03$137\02\08\FD\0F/37\B9\13\04#8,\1E\00\0F\B9\13\04\1F8\F4\02\06/4:\8C\13\00*12*\07\1F4)\07\04\05\88\13\0A'\07\02\A9\04\1F5\B9\13\00$7,\8E\13\186Y\05\0F\0D\04\06\03-\0A\147\E3\11\0F\FD\11\04\0F\B9\22\03\192\12\04/21\B9\08\05\1D2\12\04\145\1E\01.5:\DC\11\0F\04\14\06.22\04\14=7_1|\01\04\96\05(16\12\04\1F4\11\04\01/23\11\04\02/24\11\04\03\03\05\12\02m\12\1D4\15\03\145\09\12\08\C3!\02\FD\13\0C\12\03\147z\14\024\14\0C\B8\13\197\0A\04\1F8\09\04\04\149\FE\13\0C<\08\1F,\9A\10\0E\0Fx\0C\04\05I\14\09 \0C\17,\1D\00\1E9\07\05\05\0F\01\0F\08\14B\1F6\08\14\02\1F7\08\14\04#8,\1E\00\00:\00\0F\B6\02\03\0D0\02\04\ED\0A.17\04!\0E\0B\04\140\EA \0F\0B\04\04\1F0\ED\02\06\1F8&\08\06\1F9\9C\1C\05/52\E2\10\05\02.%\06\12\03\166\12\03\1C2\CE\0B$20\FC\0F\09\8F\08/64\E5\10\0A(64\8A\00\1F6\17\09\06,65o\00\04\FD\04\09\E3\17/66\DF\03\07\02w\08\156\CD\00\179\CD\00\0C\AF\01\152\C9\10\182\CD\00/10\F4\22\06-10D\00\04\FE\05\09\FB\15/10\F8\22\03/11\F9\22\08\02\FE\02\03\07\0B2110p\12\07\A0\04\0D5\0A$24}\00(4:\E5\03\0F\B4!\05482, \00\0A\E8\03/83\EF\07\1F\128\98\03*83\A8\18(5,\1D\00\192}\04\0F\E6\22\05\128\C4\04\1D8n 888,U\00\09\E6\22\03\D8\1F\1B8\E6\22\138\CB\04\0D\02\03\02\15\03\00\1E\00\09>\02\02\C6\02\03\1D\00\190\1A\00#4, \00\09\D5\15/75\EC\04\05\02\18\03\037\00\195K\00&7, \00\0F\83\1F\03)77\C5\00\1F8\908\06\0CT\11$45d\01\08;\0E/79\EC\04\08#1,\22\00\04\EC\04\07\A8#\1D4`\00\041\07(46\EC\04/57\82\0A\05\03\228\1D7\1A\0B\1F9I\08$\05)8\0A\8B7(1,\1D\00\09\BD\06\1F0\EC\04\08\02\0B8\1A8?)\07'8\0B\F17(4,m\00\1F3\A03\00)64\EC\04\1F6\B39\02\1F8\A0!\04\02e\12$81\94\00\0C\AD\00\013\02*82\AD\00\03\F3*\1D6\AD\00(8,\BE8\0A\E9-\2268\85\04\08\B8\07\1F8\84\04\02/84\AC\00\04#5,\1E\00\00:\00\0F\C6\02\02\1D8\7F\04\04\\\10\184\\\10\1F8~\04\04\02\DB\12\1F8|\04\06/87\FD\02\06\1F8{\04\06/9:!?\0F_inter!?\0A\08\22\00\0F!?\0B\0F*\00\00\1F1*\00\08\0F!?\22\1F8\03* \1D2!?\0D\02*\1F4\E5B\00\1F3!?\10\08\C9\00\0E\C5)\0F>\00 \0F\C4)\13\1F8a?$\0Ex\01\0Fa?\0D\0E\D4\01\0Fa?\0E\0E1\02\0Fa?\84\0A(!\02:\07\18y:\07\04*!\0F^?\07\0E\13!/16\E6\06\03\0E*!\0C\85\00\1F9\94\1D\04\04(!\0B\BA$\02\93\08\0EW!\06\951\0FL\08\03\04\8D\09\0E\A5\1C\0F7\1E\13\12t&\01\0D\BD1\02\97\18\184G\00\1F6\06\1E\06\177Q\1E\0E9\1E\0E\95)\0C7\1E\07\06\1E\1F0S\09\01&21M\1E\1E2\FC>\155\B7/\0F8\1D1\0B]\00\148\09\01\08V\18\1E,4\04\0F[%\12\04\03\1D\09h*\07\86\1D\0F7\1D\00\04x)\0F@1W\0E\02#\0FA1\1B\096\02\1F2\EFB\06\0C\0D\1F\0E\B3\1D\0B\A7$\03\97\1D\145\9D\1D\0F\B7\1D\04\0E \02$17\B9\1D\0Fi#8\088#.0,\06\06\0F\11\0A\10\05\89#\0Fm;\02\04V+\0A\1D\00\04o#\0F\C7\01\01$23\0A#\0F\98\11\03/29\D7?\06\1F9z!\03\0A\0A\1E\138\ED.\1986D/307D\0C.30I/\1B88D\138I+)8_\96\1D&40\A3\04\0CH<\148\E6.\08\8A\01\1F9\8A\01#/30\AF\14\02\181/\15\0F\E5\14\00\04\D78\09\9A\14\0F\D3\16\05\04\E4\14\0D\9B\03)35\FD\14\1F4P9\00\1F5\E5\14\03\1E3\E5\14\0F\9A\04\0E\0F\E4\14*\07\9A\01\0F\E2=\07\06\9A0\08i\18\03\FF\17\1D0\B4\18\182,\15/41\1C:\00,2]\B4\17\04<\17\0E :\1F6\A2\17\02\05\EA\02\1Cf\D2\02\133w\02\08\C9\1F/42\8D<\04\05_1\0F\D7\0B\03/43/\03\04+4:\D0\16\08\EA\1A\062'\0Fq(\0D\0Ft>\00\1Fyp(\09\0E\10'\0D\85\05\03\F6&\143\FC&\0E\08>/28\08>\01\027\00\09\12\02\1F3\0E\11\02\03\B4:\147\E7&\0C\C6\03\145\103\0F\08(#\0CX\17\102\C2\07?subX\17\0F\037(\B05;\0Aret;\0A\0A}\0A\00\00\00\00\00", section ".nv_fatbin", align 8 @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([51057 x i8], [51057 x i8]* @3, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 @__cuda_gpubin_handle = internal global i8** null, align 8 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] ; Function Attrs: noinline optnone uwtable define dso_local void @_Z12lud_diagonalPfii(float* %m, i32 %matrix_dim, i32 %offset) #0 { entry: %m.addr = alloca float*, align 8 %matrix_dim.addr = alloca i32, align 4 %offset.addr = alloca i32, align 4 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store float* %m, float** %m.addr, align 8 store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 store i32 %offset, i32* %offset.addr, align 4 %kernel_args = alloca i8*, i64 3, align 16 %0 = bitcast float** %m.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast i32* %matrix_dim.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast i32* %offset.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %7 = load i64, i64* %shmem_size, align 8 %8 = load i8*, i8** %stream, align 8 %9 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %10 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 12, i1 false) %11 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %12 = load i64, i64* %11, align 8 %13 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %14 = load i32, i32* %13, align 8 %15 = bitcast { i64, i32 }* %block_dim.coerce to i8* %16 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %18 = load i64, i64* %17, align 8 %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %20 = load i32, i32* %19, align 8 %21 = bitcast i8* %8 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, i32, i32)* @_Z12lud_diagonalPfii to i8*), i64 %12, i32 %14, i64 %18, i32 %20, i8** %kernel_args, i64 %7, %struct.CUstream_st* %21) br label %setup.end setup.end: ; preds = %entry ret void } declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z13lud_perimeterPfii(float* %m, i32 %matrix_dim, i32 %offset) #0 { entry: %m.addr = alloca float*, align 8 %matrix_dim.addr = alloca i32, align 4 %offset.addr = alloca i32, align 4 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store float* %m, float** %m.addr, align 8 store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 store i32 %offset, i32* %offset.addr, align 4 %kernel_args = alloca i8*, i64 3, align 16 %0 = bitcast float** %m.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast i32* %matrix_dim.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast i32* %offset.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %7 = load i64, i64* %shmem_size, align 8 %8 = load i8*, i8** %stream, align 8 %9 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %10 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 12, i1 false) %11 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %12 = load i64, i64* %11, align 8 %13 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %14 = load i32, i32* %13, align 8 %15 = bitcast { i64, i32 }* %block_dim.coerce to i8* %16 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %18 = load i64, i64* %17, align 8 %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %20 = load i32, i32* %19, align 8 %21 = bitcast i8* %8 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, i32, i32)* @_Z13lud_perimeterPfii to i8*), i64 %12, i32 %14, i64 %18, i32 %20, i8** %kernel_args, i64 %7, %struct.CUstream_st* %21) br label %setup.end setup.end: ; preds = %entry ret void } ; Function Attrs: noinline optnone uwtable define dso_local void @_Z12lud_internalPfii(float* %m, i32 %matrix_dim, i32 %offset) #0 { entry: %m.addr = alloca float*, align 8 %matrix_dim.addr = alloca i32, align 4 %offset.addr = alloca i32, align 4 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store float* %m, float** %m.addr, align 8 store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 store i32 %offset, i32* %offset.addr, align 4 %kernel_args = alloca i8*, i64 3, align 16 %0 = bitcast float** %m.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast i32* %matrix_dim.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast i32* %offset.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %7 = load i64, i64* %shmem_size, align 8 %8 = load i8*, i8** %stream, align 8 %9 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %10 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 12, i1 false) %11 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %12 = load i64, i64* %11, align 8 %13 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %14 = load i32, i32* %13, align 8 %15 = bitcast { i64, i32 }* %block_dim.coerce to i8* %16 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %18 = load i64, i64* %17, align 8 %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %20 = load i32, i32* %19, align 8 %21 = bitcast i8* %8 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, i32, i32)* @_Z12lud_internalPfii to i8*), i64 %12, i32 %14, i64 %18, i32 %20, i8** %kernel_args, i64 %7, %struct.CUstream_st* %21) br label %setup.end setup.end: ; preds = %entry ret void } ; Function Attrs: noinline optnone uwtable define dso_local void @_Z8lud_cudaPfi(float* %m, i32 %matrix_dim) #0 { entry: %m.addr = alloca float*, align 8 %matrix_dim.addr = alloca i32, align 4 %i = alloca i32, align 4 %dimBlock = alloca %struct.dim3, align 4 %m_debug = alloca float*, align 8 %agg.tmp = alloca %struct.dim3, align 4 %agg.tmp2 = alloca %struct.dim3, align 4 %agg.tmp.coerce = alloca { i64, i32 }, align 4 %agg.tmp2.coerce = alloca { i64, i32 }, align 4 %agg.tmp5 = alloca %struct.dim3, align 4 %agg.tmp8 = alloca %struct.dim3, align 4 %agg.tmp5.coerce = alloca { i64, i32 }, align 4 %agg.tmp8.coerce = alloca { i64, i32 }, align 4 %dimGrid = alloca %struct.dim3, align 4 %agg.tmp20 = alloca %struct.dim3, align 4 %agg.tmp21 = alloca %struct.dim3, align 4 %agg.tmp20.coerce = alloca { i64, i32 }, align 4 %agg.tmp21.coerce = alloca { i64, i32 }, align 4 %agg.tmp27 = alloca %struct.dim3, align 4 %agg.tmp28 = alloca %struct.dim3, align 4 %agg.tmp27.coerce = alloca { i64, i32 }, align 4 %agg.tmp28.coerce = alloca { i64, i32 }, align 4 store float* %m, float** %m.addr, align 8 store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 store i32 0, i32* %i, align 4 call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimBlock, i32 16, i32 16, i32 1) %0 = load i32, i32* %matrix_dim.addr, align 4 %1 = load i32, i32* %matrix_dim.addr, align 4 %mul = mul nsw i32 %0, %1 %conv = sext i32 %mul to i64 %mul1 = mul i64 %conv, 4 %call = call noalias i8* @malloc(i64 %mul1) #5 %2 = bitcast i8* %call to float* store float* %2, float** %m_debug, align 8 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry %3 = load i32, i32* %i, align 4 %4 = load i32, i32* %matrix_dim.addr, align 4 %sub = sub nsw i32 %4, 16 %cmp = icmp slt i32 %3, %sub br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp, i32 1, i32 1, i32 1) call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp2, i32 16, i32 1, i32 1) %5 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* %6 = bitcast %struct.dim3* %agg.tmp to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %5, i8* align 4 %6, i64 12, i1 false) %7 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 %8 = load i64, i64* %7, align 4 %9 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 %10 = load i32, i32* %9, align 4 %11 = bitcast { i64, i32 }* %agg.tmp2.coerce to i8* %12 = bitcast %struct.dim3* %agg.tmp2 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %11, i8* align 4 %12, i64 12, i1 false) %13 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp2.coerce, i32 0, i32 0 %14 = load i64, i64* %13, align 4 %15 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp2.coerce, i32 0, i32 1 %16 = load i32, i32* %15, align 4 %call3 = call i32 @__cudaPushCallConfiguration(i64 %8, i32 %10, i64 %14, i32 %16, i64 0, i8* null) %tobool = icmp ne i32 %call3, 0 br i1 %tobool, label %kcall.end, label %kcall.configok kcall.configok: ; preds = %for.body %17 = load float*, float** %m.addr, align 8 %18 = load i32, i32* %matrix_dim.addr, align 4 %19 = load i32, i32* %i, align 4 call void @_Z12lud_diagonalPfii(float* %17, i32 %18, i32 %19) br label %kcall.end kcall.end: ; preds = %kcall.configok, %for.body %call4 = call i32 @cudaDeviceSynchronize() %20 = load i32, i32* %matrix_dim.addr, align 4 %21 = load i32, i32* %i, align 4 %sub6 = sub nsw i32 %20, %21 %div = sdiv i32 %sub6, 16 %sub7 = sub nsw i32 %div, 1 call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp5, i32 %sub7, i32 1, i32 1) call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp8, i32 32, i32 1, i32 1) %22 = bitcast { i64, i32 }* %agg.tmp5.coerce to i8* %23 = bitcast %struct.dim3* %agg.tmp5 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %22, i8* align 4 %23, i64 12, i1 false) %24 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp5.coerce, i32 0, i32 0 %25 = load i64, i64* %24, align 4 %26 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp5.coerce, i32 0, i32 1 %27 = load i32, i32* %26, align 4 %28 = bitcast { i64, i32 }* %agg.tmp8.coerce to i8* %29 = bitcast %struct.dim3* %agg.tmp8 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %28, i8* align 4 %29, i64 12, i1 false) %30 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp8.coerce, i32 0, i32 0 %31 = load i64, i64* %30, align 4 %32 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp8.coerce, i32 0, i32 1 %33 = load i32, i32* %32, align 4 %call9 = call i32 @__cudaPushCallConfiguration(i64 %25, i32 %27, i64 %31, i32 %33, i64 0, i8* null) %tobool10 = icmp ne i32 %call9, 0 br i1 %tobool10, label %kcall.end12, label %kcall.configok11 kcall.configok11: ; preds = %kcall.end %34 = load float*, float** %m.addr, align 8 %35 = load i32, i32* %matrix_dim.addr, align 4 %36 = load i32, i32* %i, align 4 call void @_Z13lud_perimeterPfii(float* %34, i32 %35, i32 %36) br label %kcall.end12 kcall.end12: ; preds = %kcall.configok11, %kcall.end %call13 = call i32 @cudaDeviceSynchronize() %37 = load i32, i32* %matrix_dim.addr, align 4 %38 = load i32, i32* %i, align 4 %sub14 = sub nsw i32 %37, %38 %div15 = sdiv i32 %sub14, 16 %sub16 = sub nsw i32 %div15, 1 %39 = load i32, i32* %matrix_dim.addr, align 4 %40 = load i32, i32* %i, align 4 %sub17 = sub nsw i32 %39, %40 %div18 = sdiv i32 %sub17, 16 %sub19 = sub nsw i32 %div18, 1 call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimGrid, i32 %sub16, i32 %sub19, i32 1) %41 = bitcast %struct.dim3* %agg.tmp20 to i8* %42 = bitcast %struct.dim3* %dimGrid to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %41, i8* align 4 %42, i64 12, i1 false) %43 = bitcast %struct.dim3* %agg.tmp21 to i8* %44 = bitcast %struct.dim3* %dimBlock to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %43, i8* align 4 %44, i64 12, i1 false) %45 = bitcast { i64, i32 }* %agg.tmp20.coerce to i8* %46 = bitcast %struct.dim3* %agg.tmp20 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %45, i8* align 4 %46, i64 12, i1 false) %47 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp20.coerce, i32 0, i32 0 %48 = load i64, i64* %47, align 4 %49 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp20.coerce, i32 0, i32 1 %50 = load i32, i32* %49, align 4 %51 = bitcast { i64, i32 }* %agg.tmp21.coerce to i8* %52 = bitcast %struct.dim3* %agg.tmp21 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %51, i8* align 4 %52, i64 12, i1 false) %53 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp21.coerce, i32 0, i32 0 %54 = load i64, i64* %53, align 4 %55 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp21.coerce, i32 0, i32 1 %56 = load i32, i32* %55, align 4 %call22 = call i32 @__cudaPushCallConfiguration(i64 %48, i32 %50, i64 %54, i32 %56, i64 0, i8* null) %tobool23 = icmp ne i32 %call22, 0 br i1 %tobool23, label %kcall.end25, label %kcall.configok24 kcall.configok24: ; preds = %kcall.end12 %57 = load float*, float** %m.addr, align 8 %58 = load i32, i32* %matrix_dim.addr, align 4 %59 = load i32, i32* %i, align 4 call void @_Z12lud_internalPfii(float* %57, i32 %58, i32 %59) br label %kcall.end25 kcall.end25: ; preds = %kcall.configok24, %kcall.end12 %call26 = call i32 @cudaDeviceSynchronize() br label %for.inc for.inc: ; preds = %kcall.end25 %60 = load i32, i32* %i, align 4 %add = add nsw i32 %60, 16 store i32 %add, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp27, i32 1, i32 1, i32 1) call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp28, i32 16, i32 1, i32 1) %61 = bitcast { i64, i32 }* %agg.tmp27.coerce to i8* %62 = bitcast %struct.dim3* %agg.tmp27 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %61, i8* align 4 %62, i64 12, i1 false) %63 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp27.coerce, i32 0, i32 0 %64 = load i64, i64* %63, align 4 %65 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp27.coerce, i32 0, i32 1 %66 = load i32, i32* %65, align 4 %67 = bitcast { i64, i32 }* %agg.tmp28.coerce to i8* %68 = bitcast %struct.dim3* %agg.tmp28 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %67, i8* align 4 %68, i64 12, i1 false) %69 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp28.coerce, i32 0, i32 0 %70 = load i64, i64* %69, align 4 %71 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp28.coerce, i32 0, i32 1 %72 = load i32, i32* %71, align 4 %call29 = call i32 @__cudaPushCallConfiguration(i64 %64, i32 %66, i64 %70, i32 %72, i64 0, i8* null) %tobool30 = icmp ne i32 %call29, 0 br i1 %tobool30, label %kcall.end32, label %kcall.configok31 kcall.configok31: ; preds = %for.end %73 = load float*, float** %m.addr, align 8 %74 = load i32, i32* %matrix_dim.addr, align 4 %75 = load i32, i32* %i, align 4 call void @_Z12lud_diagonalPfii(float* %73, i32 %74, i32 %75) br label %kcall.end32 kcall.end32: ; preds = %kcall.configok31, %for.end %call33 = call i32 @cudaDeviceSynchronize() ret void } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #2 comdat align 2 { entry: %this.addr = alloca %struct.dim3*, align 8 %vx.addr = alloca i32, align 4 %vy.addr = alloca i32, align 4 %vz.addr = alloca i32, align 4 store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 store i32 %vx, i32* %vx.addr, align 4 store i32 %vy, i32* %vy.addr, align 4 store i32 %vz, i32* %vz.addr, align 4 %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 %0 = load i32, i32* %vx.addr, align 4 store i32 %0, i32* %x, align 4 %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 %1 = load i32, i32* %vy.addr, align 4 store i32 %1, i32* %y, align 4 %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 %2 = load i32, i32* %vz.addr, align 4 store i32 %2, i32* %z, align 4 ret void } ; Function Attrs: nounwind declare dso_local noalias i8* @malloc(i64) #3 declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #4 declare dso_local i32 @cudaDeviceSynchronize() #4 define internal void @__cuda_register_globals(i8** %0) { entry: %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, i32, i32)* @_Z12lud_diagonalPfii to i8*), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) %2 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, i32, i32)* @_Z13lud_perimeterPfii to i8*), i8* getelementptr inbounds ([22 x i8], [22 x i8]* @1, i64 0, i64 0), i8* getelementptr inbounds ([22 x i8], [22 x i8]* @1, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) %3 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, i32, i32)* @_Z12lud_internalPfii to i8*), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @2, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @2, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) ret void } declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) declare dso_local i8** @__cudaRegisterFatBinary(i8*) define internal void @__cuda_module_ctor(i8* %0) { entry: %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) store i8** %1, i8*** @__cuda_gpubin_handle, align 8 call void @__cuda_register_globals(i8** %1) call void @__cudaRegisterFatBinaryEnd(i8** %1) %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) ret void } declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) declare dso_local void @__cudaUnregisterFatBinary(i8**) define internal void @__cuda_module_dtor(i8* %0) { entry: %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 call void @__cudaUnregisterFatBinary(i8** %1) ret void } declare dso_local i32 @atexit(void (i8*)*) attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind willreturn } attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #5 = { nounwind } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} !1 = !{i32 1, !"wchar_size", i32 4} !2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}