; ModuleID = 'backprop_cuda-host-x86_64-unknown-linux-gnu.bc' source_filename = "backprop_cuda.cu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %struct.dim3 = type { i32, i32, i32 } %struct.CUstream_st = type opaque %struct.timeval = type { i64, i64 } %struct.timezone = type { i32, i32 } %struct.BPNN = type { i32, i32, i32, float*, float*, float*, float*, float*, float*, float**, float**, float**, float** } $_ZN4dim3C2Ejjj = comdat any $_ZSt3expf = comdat any @num_threads = dso_local global i32 0, align 4 @num_blocks = dso_local global i32 0, align 4 @.str = private unnamed_addr constant [28 x i8] c"Performing GPU computation\0A\00", align 1 @.str.1 = private unnamed_addr constant [23 x i8] c"bpnn kernel error: %s\0A\00", align 1 @.str.2 = private unnamed_addr constant [4 x i8] c"%f \00", align 1 @.str.3 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 @0 = private unnamed_addr constant [37 x i8] c"_Z22bpnn_layerforward_CUDAPfS_S_S_ii\00", align 1 @1 = private unnamed_addr constant [39 x i8] c"_Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00", align 1 @2 = private constant [26889 x i8] c"P\EDU\BA\01\00\10\00\F8h\00\00\00\00\00\00\02\00\01\01@\00\00\00xY\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\D0X\00\00\00\00\00\00\10U\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0F\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.info._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.shared._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.global\00.nv.global.init\00.nv.constant2._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.constant0._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.text._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.nv.info._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.nv.shared._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.nv.constant0._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.text._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.info._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.shared._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.global\00blockIdx\00threadIdx\00.nv.global.init\00$str\00.nv.constant2._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00__ocg_const\00.nv.constant0._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00_param\00_Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.text._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.nv.info._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.nv.shared._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00$___ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE10input_node__186\00$___ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix__188\00.nv.constant0._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00Y\00\00\00\03\00\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\E8\00\00\00\03\00\0D\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\F3\00\00\00\01\00\0D\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\FC\00\00\00\01\00\0D\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\06\01\00\00\03\00\0C\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\16\01\00\00\01\00\0C\00\00\00\00\00\00\00\00\00\0B\00\00\00\00\00\00\00\1B\01\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\\\01\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\BD\01\00\00\03\00\0B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\16\02\00\00\03\00\0E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\BF\02\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\0A\00\00\00\00\00\00\00\00\00\80\1D\00\00\00\00\00\00\98\01\00\00\12\10\0B\00\00\00\00\00\00\00\00\00\80,\00\00\00\00\00\00\04/\08\00\0D\00\00\00\10\00\00\00\04#\08\00\0D\00\00\00\00\00\00\00\04\12\08\00\0D\00\00\00X\00\00\00\04\11\08\00\0D\00\00\00X\00\00\00\04/\08\00\0C\00\00\00\10\00\00\00\04#\08\00\0C\00\00\00\00\00\00\00\04\12\08\00\0C\00\00\00H\00\00\00\04\11\08\00\0C\00\00\00H\00\00\00\010\00\00\01*\00\00\04\0A\08\00\08\00\00\00@\010\00\03\190\00\04\17\0C\00\00\00\00\00\05\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0\11\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00X\05\00\00\04\1C\04\00p\1D\00\00\04\1E\04\000\00\00\00\010\00\00\01*\00\00\04\0A\08\00\0B\00\00\00@\01(\00\03\19(\00\04\17\0C\00\00\00\00\00\05\00$\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00H\05\00\00\04\1C\04\008,\00\00\04\1E\04\00@\00\00\00333333\D3?\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveB\A5\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F0\1Evisible .entry _Z22bpnn_layerforward_CUDAPfS_\02\00&ii\AD\04\00\A7\00\0F2\00\11\0E\9B\04\0F:\00\1C\1F1:\00&\1F2:\00&\07e\04\00a\01\0F:\00\18\1F4:\00&\1F5\8D\04\13?6[8^\0B\16\95pred %p<5\8E\04\10f/\02\\f<20>\B1\04\1D7\B2\04\108%\00`\0A\09.shaH\00\03\93\00\124\93\00\1FZ\D9\00\10\FF\02E10input_node[64]O\00-\F0\033weight_matrix[102T\00\0FU\05\08\1F6U\05\1C\0F\AF\01\19\0F\EE\04\00\0FB\00\1B\0F\85\05\01\1F4C\00\1B\1F3\C8\05\01\0FC\00\1B\0Fb\05\01\0F\0B\01\1C\0FP\05\01\0F\0C\01\1C#0]\AA\01#tor\15\04I\00\115\04\05\04f\0B\0A\1C\00\116\1C\00\1F5;\00\05\147?\05\0F;\00\00\118\1C\00\1F7;\00\05\149\A7\05\0F;\00\00!10\1D\00\1F9<\00\05$11\FA\05\0F=\00\01\122\1D\00\0B\EC\05\03\02\06?d12\04\06\03*10\18\00\03\05\06*d8\17\00\134v\06\1A63\06\1F4m\10\02\1F5I\06\03\8B%ctaid.y-\00\02\A1\00\09L\0B\9A4, %tid.x+\00\126\85\00\184+\00\135+\00\0BV\00\126\DF\00\115@\02\02*\00%6,\9E\00q;\0Aadd.s\17\00\227,\1C\00\171+\00%8,\9C\00\83;\0Amul.lo.\00$9,3\00\00!\01#hl\E6\04\02\BA\01G9, 4F\00\00\AF\01\04\8D\00\0BG\00%12H\00(11\8E\00513,O\00(12M\00%4,\05\01\091\00&5,7\00\194\1A\00%6, \00\197\19\00#7,\1F\00\0B\B8\01\136\CE\01\187x\00\09\0C\01\07\F2\00#9,\1E\00\08\F3\00(20\F3\00\06\A6\00521,4\00)20\1A\00#2, \00\0B\8D\00\137E\02\08\05\01(23\05\01rsetp.neI\003p1,!\00\F2\0C0;\0A@%p1 bra LBB6_2;\0Abra.uni\10\0021;\0A\08\00\11:Z\00\03?\03%3,1\03\01r\00\02\9F\00\15dx\01$72\00\01\031\00$5, \00\132\D1\00\03\19\00$6,Q\00\01'\00\01N\00\02\1F\07\01\11\02,rdc\00\1875\01\08c\00$8, \00\172\8E\06 rdI\01\0F)\07\1F\037\04\02'\07\05\92\05\02t\04)19\C4\00\02\B0\01\05\1D\00\02\0D\04\11f\9B\01\00\1D\00\00\9A\01+f1W\01\132W\01\B02:\0Abar.sync\8C\01\06c\01\00\EB\01\04d\04\08\00\01\06\D6\01\1A8\00\01424, \00\0Ac\01425,Q\00\01'\00\07c\01\222,\A4\00\1A5c\00\196\8B\03\06J\01?27,$\08#\0FM\01\03\02\B1\01*27\C7\00$9,\84\00\196\C7\00830,6\00\189\B2\00)31\EB\02\06\15\01432, \00\0A\15\01733,U\00)32\B4\01!33\B4\01,2;\9C\01\04o\00\1F4\84\02\04435, \00\0B\BD\00\186\BD\00)35N\00\1F7\BD\00\05\03.\00\1D7\BD\00'9,U\00'38\D2\01\223,\C2\00\1A9\BA\01/40\04\036\124\EE\02)40\AC\00\134i\01\0D\E1\03843,6\00\172\AC\00\124\AC\00#43\F1\05#rn\19\00\225,\CA\00:%f4\98\01\119\98\01\1A5\98\01\07V\12\1E4\19\05\021\07+24\80\03\133\80\03'3:1\05%5,5\00\01\FB\00\03\92\00\02\A9\01\11fn\01)25q\00\00\17\06\9A098907648z\00\03\C0\12\176\F1\00\04\E9\01\00\E8\01\A3lg2.approx\1D\00\00\ED\01#f7\A9\052gtu\17\003p2,{\00!f8\AB\05\162\AB\05\1B8\D4\00\134\D4\00\174\D4\00/33\D4\00\0A\02%\09\193P\08\00\C4\01\AB1073741824\C3\08\02\7F\08\08\97\01\04E\198f11\BD\05\04\BF\04(4]\16\00\04\8E\00\1F8\02\01\01!14G\00\1B2\0E\02\01<\06\01;\00\00#\00L;\0Aex8\00\01\1F\01#15\E0\00 zi\DD\00\02\1C\00\22r3=\00\0B\8A\09\138\87\0F\08\F2\08\193\F6\04\06\17\00%7,4\00\00\B6\0F\13m\B7\03\02\9D\03\02\83\03,37`\07#4,#\00\02`\07\164\B5\01\1B6\B5\01\135\B5\01\185\B5\01\189\8B\00\00\D5\004s64/\04\126y\00\199\E0\03/69\9A\059\127\16\05)69\E3\03471,\80\00\0A\DD\04772,6\00(71\DD\04/73\DD\04\04\127\AF\06\1D7\AF\06875,U\00\08\AF\06\02\FC\07\00#\00\08\A2\01(40\A2\014shr\18\00#1,\1E\00\1936\0A$42\18\00\00$\00\022\00\03\1A\00#3, \00\0A1\00#4,\95\01\00#\00\0D\91\01\117\04\02*44\DF\00$7,\1C\00\0B-\01\198-\01\1A7\1D\00(9,$\00\09\FC\00\138\FC\00\139,\0A\06 \03#9,\1C\01+%f}\08\2275\9A\03\1B9u\02\136u\02\1A6~\08\09$\00\137$\00\177\99\02\194\22\05\07\22\01\02\03\01\1F5\7F\05\05/46\7F\05\04'8:,\02\1F4W\07\05\00Y\00\03 \00\1A6\D7\06\1F6\F7\029\124\B4\01\1A4\B1\01848,\1D\00\09\BB\07/49\DE\02\04450, \00\0A\DE\02751,U\00'50\E2\01\129\E1\01851]\08\0A\1F5\08\0A\04\1F5\08\0A\05454, \00\0B{\00$5,Q\00\01'\00\09*\02\125*\02\1D9\F3\08\03\E9\06\1F7L\0C\07#3,!\00\02\EC\04\163\EC\04\1C1T\02\139\F7\01\189\F7\01/56E\01\05$7, \00\0A\F7\01/58\F7\019\125u\03)58.\01860,\1D\00\08\E6\0D/28\9F\05\09\02[\0D\198\CA\00\126t\01\1D6\87\0A763,m\00\186\1E\09\2210\10\02)63\10\02564,e\10\08\94\00\189z\0E\06\BE\06\180\C8\0F\09V\0F\00\14\0B\028\00\00'\00\08\CE\03\025\0B\153\C9\00\0C\E2\00\01H\02*32\E2\00$6,\1C\00\0B\E2\00$7,\B7\00\01'\00\09v\02\2267\A0\04\0C(\02$10v\0E/0:*\17\0A\114\02\02radjust_\EB\01 s_A\18`PfiS_i\06\02\0D,\17\0F4\00\10\0F.\17\00/32<\00\1B/1,x\00'\1F2x\00(\1F3x\00(\1F4<\00(\0F8\17\14O7[728\17\1D\1C38\17,178\17\162\12\00\10fF\00Nfd<2E \1F5&'\0D\1F7F \19\03%\16\0F#\01\18\0F'\16\01\0FE\00\1D.4]3\17\0FD\00\1D\0F\B0\16\01\0FE\00\1E\0F\C9%\01\0FD\00\1D\0F\B3\16\01\0FE\00\17\0F\B5\16\F4\1A0\B4\16\0E\DA&\0F\E1\16\01\1A0-\00\03\E1\16\1F2\DF\16\1D\0F\B3\16\10/48\B3\16\16/52\B3\16\16/56\B3\16\06\1F8\B2\16\16/48\B2\162/56\B2\168/52\B2\16G\1F0\B2\16\08/48\B2\16\19/56\B2\16-/64\B2\16\08\0B\05\01$24\AF\16\0F\84\17\03\03\D4\14\0C\9D\16\1F0\9C\16\03/68\9C\16;\00\1F\093f64\1A\00#d1\EF\15\04\C4\10\02\8F\06#2,\1C\00h0d3FD3\01\00\09\BC\00\1F7Y\17\04\1F8\F6\16\05$9, \00\0B\F6\15\150\0F\17\03\B2\16\0D\F6\15\1E0\BD\00\133\F8\14\07\88\16\1B1p\0A\04%\16\1F2\B6\0B\04%23o\16\0B\92\00(4,.\17\1E3\B6\14.24\92\00\01\14\12\09$\01(25\12\0D\08a\00\03e\15\1E5a\00\03k\14\1F2\B0\01\00\135h\147fma\B0\01#6,}\00\0E\B0\01\01\1A\00\1C51\00&7,\E7\01\02#\00\196p\13\02'\00\01\DA\14*d7\22\18\146\D6\14\07\E4\00\1F7\C4\02\02/28\FD\17\05\03u\06\0D\07\02\046\17\1876\17\14f\D0\14\01U\15\0F\07\02\00\01\8C\14\1C6\C4\02#9,\1C\00\0F\C4\02\0B/31\C4\02\03/326\17\05\03\8C\17\1D2\BD\00\154\BE\17\01'\00\0CK\15\00\22\00\0F\81\03\00\130J\15\07\93\00\1F5\C5\02\03\1F3{\0E\05%37b\17\0B\93\00(8,\E9\17\177\93\00\03\A1\11/38\93\00\00\131\C1\15\0AQ\01\01'\06\02\1E\00\0F\17\04\01\0Aa\02\02\01\15\03\85\01\02\E4\00\01\F4\07\0Dd\02\03\22\00\1A3;\17\1F8\E0\0F\0B\195\B9\1C\0F,\1C\00\1D5,\1C97_3\B7\0D\137,\1C87_1U\17\196a\06\09Z\00\02\1C\06\146Z\00\1F2Z\00\07\132Z\00\182\86\1C/39%\03\02/40%\03\04\05\BA\18\0D\A4\18'2,W\19/41\86\0F\00/42\D6\01\00\02\B0\16\09\B7\1B(43j\02\08d\00)4,\0F\19\08d\00\03N\06\1F4\CD\02\01\175[\17\04d\00\0F\01\05\05\02\87\13\05/\13\0E\90\17\00#\00\0Fd\00\00\146x\17\0Br\02$7,\83\00\0F\06\05\04-164\00$8,\1B\01\0F4\00\05\1F7\B4\02\00\03\DD\02\0B\BF\14\124\1A\05)13\FE\00\0F\1B\05\03/48\F6\01\05\03\D6\01\0D\1B\05)50\D9\13\08\1B\05\1310\06\1F5\1C\05\00\03o\15\09s\08/51\F6\01\04\04m\13\1F1d\00\00\135d\00\0FZ\02\00$20\D2\18\0BF\08$1,\1F\00\0F1\04\0F\01{\09\02\B0\00\0F\8B\01\04/21\8B\01\01\02\FB\06*22 \14$2]I\19\09\99\03\133\99\03\B03:\0Aret;\0A\0A}\0A\00\00\00\00\00\00\00\00\00", section ".nv_fatbin", align 8 @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([26889 x i8], [26889 x i8]* @2, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 @__cuda_gpubin_handle = internal global i8** null, align 8 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] ; Function Attrs: noinline optnone uwtable define dso_local void @_Z22bpnn_layerforward_CUDAPfS_S_S_ii(float* %input_cuda, float* %output_hidden_cuda, float* %input_hidden_cuda, float* %hidden_partial_sum, i32 %in, i32 %hid) #0 { entry: %input_cuda.addr = alloca float*, align 8 %output_hidden_cuda.addr = alloca float*, align 8 %input_hidden_cuda.addr = alloca float*, align 8 %hidden_partial_sum.addr = alloca float*, align 8 %in.addr = alloca i32, align 4 %hid.addr = alloca i32, align 4 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store float* %input_cuda, float** %input_cuda.addr, align 8 store float* %output_hidden_cuda, float** %output_hidden_cuda.addr, align 8 store float* %input_hidden_cuda, float** %input_hidden_cuda.addr, align 8 store float* %hidden_partial_sum, float** %hidden_partial_sum.addr, align 8 store i32 %in, i32* %in.addr, align 4 store i32 %hid, i32* %hid.addr, align 4 %kernel_args = alloca i8*, i64 6, align 16 %0 = bitcast float** %input_cuda.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast float** %output_hidden_cuda.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast float** %input_hidden_cuda.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = bitcast float** %hidden_partial_sum.addr to i8* %7 = getelementptr i8*, i8** %kernel_args, i32 3 store i8* %6, i8** %7 %8 = bitcast i32* %in.addr to i8* %9 = getelementptr i8*, i8** %kernel_args, i32 4 store i8* %8, i8** %9 %10 = bitcast i32* %hid.addr to i8* %11 = getelementptr i8*, i8** %kernel_args, i32 5 store i8* %10, i8** %11 %12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %13 = load i64, i64* %shmem_size, align 8 %14 = load i8*, i8** %stream, align 8 %15 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %16 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %18 = load i64, i64* %17, align 8 %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %20 = load i32, i32* %19, align 8 %21 = bitcast { i64, i32 }* %block_dim.coerce to i8* %22 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false) %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %24 = load i64, i64* %23, align 8 %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %26 = load i32, i32* %25, align 8 %27 = bitcast i8* %14 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, float*, float*, float*, i32, i32)* @_Z22bpnn_layerforward_CUDAPfS_S_S_ii to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27) br label %setup.end setup.end: ; preds = %entry ret void } declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z24bpnn_adjust_weights_cudaPfiS_iS_S_(float* %delta, i32 %hid, float* %ly, i32 %in, float* %w, float* %oldw) #0 { entry: %delta.addr = alloca float*, align 8 %hid.addr = alloca i32, align 4 %ly.addr = alloca float*, align 8 %in.addr = alloca i32, align 4 %w.addr = alloca float*, align 8 %oldw.addr = alloca float*, align 8 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store float* %delta, float** %delta.addr, align 8 store i32 %hid, i32* %hid.addr, align 4 store float* %ly, float** %ly.addr, align 8 store i32 %in, i32* %in.addr, align 4 store float* %w, float** %w.addr, align 8 store float* %oldw, float** %oldw.addr, align 8 %kernel_args = alloca i8*, i64 6, align 16 %0 = bitcast float** %delta.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast i32* %hid.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast float** %ly.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = bitcast i32* %in.addr to i8* %7 = getelementptr i8*, i8** %kernel_args, i32 3 store i8* %6, i8** %7 %8 = bitcast float** %w.addr to i8* %9 = getelementptr i8*, i8** %kernel_args, i32 4 store i8* %8, i8** %9 %10 = bitcast float** %oldw.addr to i8* %11 = getelementptr i8*, i8** %kernel_args, i32 5 store i8* %10, i8** %11 %12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %13 = load i64, i64* %shmem_size, align 8 %14 = load i8*, i8** %stream, align 8 %15 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %16 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %18 = load i64, i64* %17, align 8 %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %20 = load i32, i32* %19, align 8 %21 = bitcast { i64, i32 }* %block_dim.coerce to i8* %22 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false) %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %24 = load i64, i64* %23, align 8 %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %26 = load i32, i32* %25, align 8 %27 = bitcast i8* %14 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, i32, float*, i32, float*, float*)* @_Z24bpnn_adjust_weights_cudaPfiS_iS_S_ to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27) br label %setup.end setup.end: ; preds = %entry ret void } ; Function Attrs: noinline nounwind optnone uwtable define dso_local double @_Z7gettimev() #2 { entry: %t = alloca %struct.timeval, align 8 %call = call i32 @gettimeofday(%struct.timeval* %t, %struct.timezone* null) #7 %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %t, i32 0, i32 0 %0 = load i64, i64* %tv_sec, align 8 %conv = sitofp i64 %0 to double %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %t, i32 0, i32 1 %1 = load i64, i64* %tv_usec, align 8 %conv1 = sitofp i64 %1 to double %mul = fmul contract double %conv1, 0x3EB0C6F7A0B5ED8D %add = fadd contract double %conv, %mul ret double %add } ; Function Attrs: nounwind declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #3 ; Function Attrs: noinline norecurse optnone uwtable define dso_local i32 @main(i32 %argc, i8** %argv) #4 { entry: %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %call = call i32 @cudaSetDevice(i32 0) %0 = load i32, i32* %argc.addr, align 4 %1 = load i8**, i8*** %argv.addr, align 8 %call1 = call i32 @setup(i32 %0, i8** %1) ret i32 0 } declare dso_local i32 @cudaSetDevice(i32) #5 declare dso_local i32 @setup(i32, i8**) #5 ; Function Attrs: noinline optnone uwtable define dso_local void @bpnn_train_cuda(%struct.BPNN* %net, float* %eo, float* %eh) #0 { entry: %net.addr = alloca %struct.BPNN*, align 8 %eo.addr = alloca float*, align 8 %eh.addr = alloca float*, align 8 %in = alloca i32, align 4 %hid = alloca i32, align 4 %out = alloca i32, align 4 %out_err = alloca float, align 4 %hid_err = alloca float, align 4 %m = alloca i32, align 4 %input_hidden_cuda = alloca float*, align 8 %input_cuda = alloca float*, align 8 %output_hidden_cuda = alloca float*, align 8 %partial_sum = alloca float*, align 8 %hidden_partial_sum = alloca float*, align 8 %hidden_delta_cuda = alloca float*, align 8 %input_prev_weights_cuda = alloca float*, align 8 %sum = alloca float, align 4 %input_weights_one_dim = alloca float*, align 8 %input_weights_prev_one_dim = alloca float*, align 8 %grid = alloca %struct.dim3, align 4 %threads = alloca %struct.dim3, align 4 %k = alloca i32, align 4 %j = alloca i32, align 4 %agg.tmp = alloca %struct.dim3, align 4 %agg.tmp59 = alloca %struct.dim3, align 4 %agg.tmp.coerce = alloca { i64, i32 }, align 4 %agg.tmp59.coerce = alloca { i64, i32 }, align 4 %error = alloca i32, align 4 %j70 = alloca i32, align 4 %k74 = alloca i32, align 4 %agg.tmp136 = alloca %struct.dim3, align 4 %agg.tmp137 = alloca %struct.dim3, align 4 %agg.tmp136.coerce = alloca { i64, i32 }, align 4 %agg.tmp137.coerce = alloca { i64, i32 }, align 4 %i = alloca i32, align 4 store %struct.BPNN* %net, %struct.BPNN** %net.addr, align 8 store float* %eo, float** %eo.addr, align 8 store float* %eh, float** %eh.addr, align 8 %0 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %input_n = getelementptr inbounds %struct.BPNN, %struct.BPNN* %0, i32 0, i32 0 %1 = load i32, i32* %input_n, align 8 store i32 %1, i32* %in, align 4 %2 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_n = getelementptr inbounds %struct.BPNN, %struct.BPNN* %2, i32 0, i32 1 %3 = load i32, i32* %hidden_n, align 4 store i32 %3, i32* %hid, align 4 %4 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %output_n = getelementptr inbounds %struct.BPNN, %struct.BPNN* %4, i32 0, i32 2 %5 = load i32, i32* %output_n, align 8 store i32 %5, i32* %out, align 4 store i32 0, i32* %m, align 4 %6 = load i32, i32* %in, align 4 %div = sdiv i32 %6, 16 store i32 %div, i32* @num_blocks, align 4 %7 = load i32, i32* @num_blocks, align 4 call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid, i32 1, i32 %7, i32 1) call void @_ZN4dim3C2Ejjj(%struct.dim3* %threads, i32 16, i32 16, i32 1) %8 = load i32, i32* %in, align 4 %add = add nsw i32 %8, 1 %9 = load i32, i32* %hid, align 4 %add1 = add nsw i32 %9, 1 %mul = mul nsw i32 %add, %add1 %conv = sext i32 %mul to i64 %mul2 = mul i64 %conv, 4 %call = call noalias i8* @malloc(i64 %mul2) #7 %10 = bitcast i8* %call to float* store float* %10, float** %input_weights_one_dim, align 8 %11 = load i32, i32* %in, align 4 %add3 = add nsw i32 %11, 1 %12 = load i32, i32* %hid, align 4 %add4 = add nsw i32 %12, 1 %mul5 = mul nsw i32 %add3, %add4 %conv6 = sext i32 %mul5 to i64 %mul7 = mul i64 %conv6, 4 %call8 = call noalias i8* @malloc(i64 %mul7) #7 %13 = bitcast i8* %call8 to float* store float* %13, float** %input_weights_prev_one_dim, align 8 %14 = load i32, i32* @num_blocks, align 4 %mul9 = mul i32 %14, 16 %conv10 = zext i32 %mul9 to i64 %mul11 = mul i64 %conv10, 4 %call12 = call noalias i8* @malloc(i64 %mul11) #7 %15 = bitcast i8* %call12 to float* store float* %15, float** %partial_sum, align 8 store i32 0, i32* %k, align 4 br label %for.cond for.cond: ; preds = %for.inc27, %entry %16 = load i32, i32* %k, align 4 %17 = load i32, i32* %in, align 4 %cmp = icmp sle i32 %16, %17 br i1 %cmp, label %for.body, label %for.end29 for.body: ; preds = %for.cond store i32 0, i32* %j, align 4 br label %for.cond13 for.cond13: ; preds = %for.inc, %for.body %18 = load i32, i32* %j, align 4 %19 = load i32, i32* %hid, align 4 %cmp14 = icmp sle i32 %18, %19 br i1 %cmp14, label %for.body15, label %for.end for.body15: ; preds = %for.cond13 %20 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %input_weights = getelementptr inbounds %struct.BPNN, %struct.BPNN* %20, i32 0, i32 9 %21 = load float**, float*** %input_weights, align 8 %22 = load i32, i32* %k, align 4 %idxprom = sext i32 %22 to i64 %arrayidx = getelementptr inbounds float*, float** %21, i64 %idxprom %23 = load float*, float** %arrayidx, align 8 %24 = load i32, i32* %j, align 4 %idxprom16 = sext i32 %24 to i64 %arrayidx17 = getelementptr inbounds float, float* %23, i64 %idxprom16 %25 = load float, float* %arrayidx17, align 4 %26 = load float*, float** %input_weights_one_dim, align 8 %27 = load i32, i32* %m, align 4 %idxprom18 = sext i32 %27 to i64 %arrayidx19 = getelementptr inbounds float, float* %26, i64 %idxprom18 store float %25, float* %arrayidx19, align 4 %28 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %input_prev_weights = getelementptr inbounds %struct.BPNN, %struct.BPNN* %28, i32 0, i32 11 %29 = load float**, float*** %input_prev_weights, align 8 %30 = load i32, i32* %k, align 4 %idxprom20 = sext i32 %30 to i64 %arrayidx21 = getelementptr inbounds float*, float** %29, i64 %idxprom20 %31 = load float*, float** %arrayidx21, align 8 %32 = load i32, i32* %j, align 4 %idxprom22 = sext i32 %32 to i64 %arrayidx23 = getelementptr inbounds float, float* %31, i64 %idxprom22 %33 = load float, float* %arrayidx23, align 4 %34 = load float*, float** %input_weights_prev_one_dim, align 8 %35 = load i32, i32* %m, align 4 %idxprom24 = sext i32 %35 to i64 %arrayidx25 = getelementptr inbounds float, float* %34, i64 %idxprom24 store float %33, float* %arrayidx25, align 4 %36 = load i32, i32* %m, align 4 %inc = add nsw i32 %36, 1 store i32 %inc, i32* %m, align 4 br label %for.inc for.inc: ; preds = %for.body15 %37 = load i32, i32* %j, align 4 %inc26 = add nsw i32 %37, 1 store i32 %inc26, i32* %j, align 4 br label %for.cond13 for.end: ; preds = %for.cond13 br label %for.inc27 for.inc27: ; preds = %for.end %38 = load i32, i32* %k, align 4 %inc28 = add nsw i32 %38, 1 store i32 %inc28, i32* %k, align 4 br label %for.cond for.end29: ; preds = %for.cond %39 = bitcast float** %input_cuda to i8** %40 = load i32, i32* %in, align 4 %add30 = add nsw i32 %40, 1 %conv31 = sext i32 %add30 to i64 %mul32 = mul i64 %conv31, 4 %call33 = call i32 @cudaMalloc(i8** %39, i64 %mul32) %41 = bitcast float** %output_hidden_cuda to i8** %42 = load i32, i32* %hid, align 4 %add34 = add nsw i32 %42, 1 %conv35 = sext i32 %add34 to i64 %mul36 = mul i64 %conv35, 4 %call37 = call i32 @cudaMalloc(i8** %41, i64 %mul36) %43 = bitcast float** %input_hidden_cuda to i8** %44 = load i32, i32* %in, align 4 %add38 = add nsw i32 %44, 1 %45 = load i32, i32* %hid, align 4 %add39 = add nsw i32 %45, 1 %mul40 = mul nsw i32 %add38, %add39 %conv41 = sext i32 %mul40 to i64 %mul42 = mul i64 %conv41, 4 %call43 = call i32 @cudaMalloc(i8** %43, i64 %mul42) %46 = bitcast float** %hidden_partial_sum to i8** %47 = load i32, i32* @num_blocks, align 4 %mul44 = mul i32 %47, 16 %conv45 = zext i32 %mul44 to i64 %mul46 = mul i64 %conv45, 4 %call47 = call i32 @cudaMalloc(i8** %46, i64 %mul46) %call48 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str, i64 0, i64 0)) %48 = load float*, float** %input_cuda, align 8 %49 = bitcast float* %48 to i8* %50 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %input_units = getelementptr inbounds %struct.BPNN, %struct.BPNN* %50, i32 0, i32 3 %51 = load float*, float** %input_units, align 8 %52 = bitcast float* %51 to i8* %53 = load i32, i32* %in, align 4 %add49 = add nsw i32 %53, 1 %conv50 = sext i32 %add49 to i64 %mul51 = mul i64 %conv50, 4 %call52 = call i32 @cudaMemcpy(i8* %49, i8* %52, i64 %mul51, i32 1) %54 = load float*, float** %input_hidden_cuda, align 8 %55 = bitcast float* %54 to i8* %56 = load float*, float** %input_weights_one_dim, align 8 %57 = bitcast float* %56 to i8* %58 = load i32, i32* %in, align 4 %add53 = add nsw i32 %58, 1 %59 = load i32, i32* %hid, align 4 %add54 = add nsw i32 %59, 1 %mul55 = mul nsw i32 %add53, %add54 %conv56 = sext i32 %mul55 to i64 %mul57 = mul i64 %conv56, 4 %call58 = call i32 @cudaMemcpy(i8* %55, i8* %57, i64 %mul57, i32 1) %60 = bitcast %struct.dim3* %agg.tmp to i8* %61 = bitcast %struct.dim3* %grid to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %60, i8* align 4 %61, i64 12, i1 false) %62 = bitcast %struct.dim3* %agg.tmp59 to i8* %63 = bitcast %struct.dim3* %threads to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %62, i8* align 4 %63, i64 12, i1 false) %64 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* %65 = bitcast %struct.dim3* %agg.tmp to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %64, i8* align 4 %65, i64 12, i1 false) %66 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 %67 = load i64, i64* %66, align 4 %68 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 %69 = load i32, i32* %68, align 4 %70 = bitcast { i64, i32 }* %agg.tmp59.coerce to i8* %71 = bitcast %struct.dim3* %agg.tmp59 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %70, i8* align 4 %71, i64 12, i1 false) %72 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp59.coerce, i32 0, i32 0 %73 = load i64, i64* %72, align 4 %74 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp59.coerce, i32 0, i32 1 %75 = load i32, i32* %74, align 4 %call60 = call i32 @__cudaPushCallConfiguration(i64 %67, i32 %69, i64 %73, i32 %75, i64 0, i8* null) %tobool = icmp ne i32 %call60, 0 br i1 %tobool, label %kcall.end, label %kcall.configok kcall.configok: ; preds = %for.end29 %76 = load float*, float** %input_cuda, align 8 %77 = load float*, float** %output_hidden_cuda, align 8 %78 = load float*, float** %input_hidden_cuda, align 8 %79 = load float*, float** %hidden_partial_sum, align 8 %80 = load i32, i32* %in, align 4 %81 = load i32, i32* %hid, align 4 call void @_Z22bpnn_layerforward_CUDAPfS_S_S_ii(float* %76, float* %77, float* %78, float* %79, i32 %80, i32 %81) br label %kcall.end kcall.end: ; preds = %kcall.configok, %for.end29 %call61 = call i32 @cudaThreadSynchronize() %call62 = call i32 @cudaGetLastError() store i32 %call62, i32* %error, align 4 %82 = load i32, i32* %error, align 4 %cmp63 = icmp ne i32 %82, 0 br i1 %cmp63, label %if.then, label %if.end if.then: ; preds = %kcall.end %83 = load i32, i32* %error, align 4 %call64 = call i8* @cudaGetErrorString(i32 %83) %call65 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), i8* %call64) call void @exit(i32 1) #8 unreachable if.end: ; preds = %kcall.end %84 = load float*, float** %partial_sum, align 8 %85 = bitcast float* %84 to i8* %86 = load float*, float** %hidden_partial_sum, align 8 %87 = bitcast float* %86 to i8* %88 = load i32, i32* @num_blocks, align 4 %mul66 = mul i32 %88, 16 %conv67 = zext i32 %mul66 to i64 %mul68 = mul i64 %conv67, 4 %call69 = call i32 @cudaMemcpy(i8* %85, i8* %87, i64 %mul68, i32 2) store i32 1, i32* %j70, align 4 br label %for.cond71 for.cond71: ; preds = %for.inc98, %if.end %89 = load i32, i32* %j70, align 4 %90 = load i32, i32* %hid, align 4 %cmp72 = icmp sle i32 %89, %90 br i1 %cmp72, label %for.body73, label %for.end100 for.body73: ; preds = %for.cond71 store float 0.000000e+00, float* %sum, align 4 store i32 0, i32* %k74, align 4 br label %for.cond75 for.cond75: ; preds = %for.inc83, %for.body73 %91 = load i32, i32* %k74, align 4 %92 = load i32, i32* @num_blocks, align 4 %cmp76 = icmp ult i32 %91, %92 br i1 %cmp76, label %for.body77, label %for.end85 for.body77: ; preds = %for.cond75 %93 = load float*, float** %partial_sum, align 8 %94 = load i32, i32* %k74, align 4 %95 = load i32, i32* %hid, align 4 %mul78 = mul nsw i32 %94, %95 %96 = load i32, i32* %j70, align 4 %add79 = add nsw i32 %mul78, %96 %sub = sub nsw i32 %add79, 1 %idxprom80 = sext i32 %sub to i64 %arrayidx81 = getelementptr inbounds float, float* %93, i64 %idxprom80 %97 = load float, float* %arrayidx81, align 4 %98 = load float, float* %sum, align 4 %add82 = fadd contract float %98, %97 store float %add82, float* %sum, align 4 br label %for.inc83 for.inc83: ; preds = %for.body77 %99 = load i32, i32* %k74, align 4 %inc84 = add nsw i32 %99, 1 store i32 %inc84, i32* %k74, align 4 br label %for.cond75 for.end85: ; preds = %for.cond75 %100 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %input_weights86 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %100, i32 0, i32 9 %101 = load float**, float*** %input_weights86, align 8 %arrayidx87 = getelementptr inbounds float*, float** %101, i64 0 %102 = load float*, float** %arrayidx87, align 8 %103 = load i32, i32* %j70, align 4 %idxprom88 = sext i32 %103 to i64 %arrayidx89 = getelementptr inbounds float, float* %102, i64 %idxprom88 %104 = load float, float* %arrayidx89, align 4 %105 = load float, float* %sum, align 4 %add90 = fadd contract float %105, %104 store float %add90, float* %sum, align 4 %106 = load float, float* %sum, align 4 %fneg = fneg float %106 %call91 = call float @_ZSt3expf(float %fneg) %conv92 = fpext float %call91 to double %add93 = fadd contract double 1.000000e+00, %conv92 %div94 = fdiv double 1.000000e+00, %add93 %conv95 = fptrunc double %div94 to float %107 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_units = getelementptr inbounds %struct.BPNN, %struct.BPNN* %107, i32 0, i32 4 %108 = load float*, float** %hidden_units, align 8 %109 = load i32, i32* %j70, align 4 %idxprom96 = sext i32 %109 to i64 %arrayidx97 = getelementptr inbounds float, float* %108, i64 %idxprom96 store float %conv95, float* %arrayidx97, align 4 br label %for.inc98 for.inc98: ; preds = %for.end85 %110 = load i32, i32* %j70, align 4 %inc99 = add nsw i32 %110, 1 store i32 %inc99, i32* %j70, align 4 br label %for.cond71 for.end100: ; preds = %for.cond71 %111 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_units101 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %111, i32 0, i32 4 %112 = load float*, float** %hidden_units101, align 8 %113 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %output_units = getelementptr inbounds %struct.BPNN, %struct.BPNN* %113, i32 0, i32 5 %114 = load float*, float** %output_units, align 8 %115 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_weights = getelementptr inbounds %struct.BPNN, %struct.BPNN* %115, i32 0, i32 10 %116 = load float**, float*** %hidden_weights, align 8 %117 = load i32, i32* %hid, align 4 %118 = load i32, i32* %out, align 4 call void @bpnn_layerforward(float* %112, float* %114, float** %116, i32 %117, i32 %118) %119 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %output_delta = getelementptr inbounds %struct.BPNN, %struct.BPNN* %119, i32 0, i32 7 %120 = load float*, float** %output_delta, align 8 %121 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %target = getelementptr inbounds %struct.BPNN, %struct.BPNN* %121, i32 0, i32 8 %122 = load float*, float** %target, align 8 %123 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %output_units102 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %123, i32 0, i32 5 %124 = load float*, float** %output_units102, align 8 %125 = load i32, i32* %out, align 4 call void @bpnn_output_error(float* %120, float* %122, float* %124, i32 %125, float* %out_err) %126 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_delta = getelementptr inbounds %struct.BPNN, %struct.BPNN* %126, i32 0, i32 6 %127 = load float*, float** %hidden_delta, align 8 %128 = load i32, i32* %hid, align 4 %129 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %output_delta103 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %129, i32 0, i32 7 %130 = load float*, float** %output_delta103, align 8 %131 = load i32, i32* %out, align 4 %132 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_weights104 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %132, i32 0, i32 10 %133 = load float**, float*** %hidden_weights104, align 8 %134 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_units105 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %134, i32 0, i32 4 %135 = load float*, float** %hidden_units105, align 8 call void @bpnn_hidden_error(float* %127, i32 %128, float* %130, i32 %131, float** %133, float* %135, float* %hid_err) %136 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %output_delta106 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %136, i32 0, i32 7 %137 = load float*, float** %output_delta106, align 8 %138 = load i32, i32* %out, align 4 %139 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_units107 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %139, i32 0, i32 4 %140 = load float*, float** %hidden_units107, align 8 %141 = load i32, i32* %hid, align 4 %142 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_weights108 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %142, i32 0, i32 10 %143 = load float**, float*** %hidden_weights108, align 8 %144 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_prev_weights = getelementptr inbounds %struct.BPNN, %struct.BPNN* %144, i32 0, i32 12 %145 = load float**, float*** %hidden_prev_weights, align 8 call void @bpnn_adjust_weights(float* %137, i32 %138, float* %140, i32 %141, float** %143, float** %145) %146 = bitcast float** %hidden_delta_cuda to i8** %147 = load i32, i32* %hid, align 4 %add109 = add nsw i32 %147, 1 %conv110 = sext i32 %add109 to i64 %mul111 = mul i64 %conv110, 4 %call112 = call i32 @cudaMalloc(i8** %146, i64 %mul111) %148 = bitcast float** %input_prev_weights_cuda to i8** %149 = load i32, i32* %in, align 4 %add113 = add nsw i32 %149, 1 %150 = load i32, i32* %hid, align 4 %add114 = add nsw i32 %150, 1 %mul115 = mul nsw i32 %add113, %add114 %conv116 = sext i32 %mul115 to i64 %mul117 = mul i64 %conv116, 4 %call118 = call i32 @cudaMalloc(i8** %148, i64 %mul117) %151 = load float*, float** %hidden_delta_cuda, align 8 %152 = bitcast float* %151 to i8* %153 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %hidden_delta119 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %153, i32 0, i32 6 %154 = load float*, float** %hidden_delta119, align 8 %155 = bitcast float* %154 to i8* %156 = load i32, i32* %hid, align 4 %add120 = add nsw i32 %156, 1 %conv121 = sext i32 %add120 to i64 %mul122 = mul i64 %conv121, 4 %call123 = call i32 @cudaMemcpy(i8* %152, i8* %155, i64 %mul122, i32 1) %157 = load float*, float** %input_prev_weights_cuda, align 8 %158 = bitcast float* %157 to i8* %159 = load float*, float** %input_weights_prev_one_dim, align 8 %160 = bitcast float* %159 to i8* %161 = load i32, i32* %in, align 4 %add124 = add nsw i32 %161, 1 %162 = load i32, i32* %hid, align 4 %add125 = add nsw i32 %162, 1 %mul126 = mul nsw i32 %add124, %add125 %conv127 = sext i32 %mul126 to i64 %mul128 = mul i64 %conv127, 4 %call129 = call i32 @cudaMemcpy(i8* %158, i8* %160, i64 %mul128, i32 1) %163 = load float*, float** %input_hidden_cuda, align 8 %164 = bitcast float* %163 to i8* %165 = load float*, float** %input_weights_one_dim, align 8 %166 = bitcast float* %165 to i8* %167 = load i32, i32* %in, align 4 %add130 = add nsw i32 %167, 1 %168 = load i32, i32* %hid, align 4 %add131 = add nsw i32 %168, 1 %mul132 = mul nsw i32 %add130, %add131 %conv133 = sext i32 %mul132 to i64 %mul134 = mul i64 %conv133, 4 %call135 = call i32 @cudaMemcpy(i8* %164, i8* %166, i64 %mul134, i32 1) %169 = bitcast %struct.dim3* %agg.tmp136 to i8* %170 = bitcast %struct.dim3* %grid to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %169, i8* align 4 %170, i64 12, i1 false) %171 = bitcast %struct.dim3* %agg.tmp137 to i8* %172 = bitcast %struct.dim3* %threads to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %171, i8* align 4 %172, i64 12, i1 false) %173 = bitcast { i64, i32 }* %agg.tmp136.coerce to i8* %174 = bitcast %struct.dim3* %agg.tmp136 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %173, i8* align 4 %174, i64 12, i1 false) %175 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp136.coerce, i32 0, i32 0 %176 = load i64, i64* %175, align 4 %177 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp136.coerce, i32 0, i32 1 %178 = load i32, i32* %177, align 4 %179 = bitcast { i64, i32 }* %agg.tmp137.coerce to i8* %180 = bitcast %struct.dim3* %agg.tmp137 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %179, i8* align 4 %180, i64 12, i1 false) %181 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp137.coerce, i32 0, i32 0 %182 = load i64, i64* %181, align 4 %183 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp137.coerce, i32 0, i32 1 %184 = load i32, i32* %183, align 4 %call138 = call i32 @__cudaPushCallConfiguration(i64 %176, i32 %178, i64 %182, i32 %184, i64 0, i8* null) %tobool139 = icmp ne i32 %call138, 0 br i1 %tobool139, label %kcall.end141, label %kcall.configok140 kcall.configok140: ; preds = %for.end100 %185 = load float*, float** %hidden_delta_cuda, align 8 %186 = load i32, i32* %hid, align 4 %187 = load float*, float** %input_cuda, align 8 %188 = load i32, i32* %in, align 4 %189 = load float*, float** %input_hidden_cuda, align 8 %190 = load float*, float** %input_prev_weights_cuda, align 8 call void @_Z24bpnn_adjust_weights_cudaPfiS_iS_S_(float* %185, i32 %186, float* %187, i32 %188, float* %189, float* %190) br label %kcall.end141 kcall.end141: ; preds = %kcall.configok140, %for.end100 %191 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 %input_units142 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %191, i32 0, i32 3 %192 = load float*, float** %input_units142, align 8 %193 = bitcast float* %192 to i8* %194 = load float*, float** %input_cuda, align 8 %195 = bitcast float* %194 to i8* %196 = load i32, i32* %in, align 4 %add143 = add nsw i32 %196, 1 %conv144 = sext i32 %add143 to i64 %mul145 = mul i64 %conv144, 4 %call146 = call i32 @cudaMemcpy(i8* %193, i8* %195, i64 %mul145, i32 2) %197 = load float*, float** %input_weights_one_dim, align 8 %198 = bitcast float* %197 to i8* %199 = load float*, float** %input_hidden_cuda, align 8 %200 = bitcast float* %199 to i8* %201 = load i32, i32* %in, align 4 %add147 = add nsw i32 %201, 1 %202 = load i32, i32* %hid, align 4 %add148 = add nsw i32 %202, 1 %mul149 = mul nsw i32 %add147, %add148 %conv150 = sext i32 %mul149 to i64 %mul151 = mul i64 %conv150, 4 %call152 = call i32 @cudaMemcpy(i8* %198, i8* %200, i64 %mul151, i32 2) store i32 0, i32* %i, align 4 br label %for.cond153 for.cond153: ; preds = %for.inc163, %kcall.end141 %203 = load i32, i32* %i, align 4 %204 = load i32, i32* %in, align 4 %add154 = add nsw i32 %204, 1 %205 = load i32, i32* %hid, align 4 %add155 = add nsw i32 %205, 1 %mul156 = mul nsw i32 %add154, %add155 %cmp157 = icmp slt i32 %203, %mul156 br i1 %cmp157, label %for.body158, label %for.end165 for.body158: ; preds = %for.cond153 %206 = load float*, float** %input_weights_one_dim, align 8 %207 = load i32, i32* %i, align 4 %idxprom159 = sext i32 %207 to i64 %arrayidx160 = getelementptr inbounds float, float* %206, i64 %idxprom159 %208 = load float, float* %arrayidx160, align 4 %conv161 = fpext float %208 to double %call162 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), double %conv161) br label %for.inc163 for.inc163: ; preds = %for.body158 %209 = load i32, i32* %i, align 4 %inc164 = add nsw i32 %209, 1 store i32 %inc164, i32* %i, align 4 br label %for.cond153 for.end165: ; preds = %for.cond153 %call166 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i64 0, i64 0)) %210 = load float*, float** %input_cuda, align 8 %211 = bitcast float* %210 to i8* %call167 = call i32 @cudaFree(i8* %211) %212 = load float*, float** %output_hidden_cuda, align 8 %213 = bitcast float* %212 to i8* %call168 = call i32 @cudaFree(i8* %213) %214 = load float*, float** %input_hidden_cuda, align 8 %215 = bitcast float* %214 to i8* %call169 = call i32 @cudaFree(i8* %215) %216 = load float*, float** %hidden_partial_sum, align 8 %217 = bitcast float* %216 to i8* %call170 = call i32 @cudaFree(i8* %217) %218 = load float*, float** %input_prev_weights_cuda, align 8 %219 = bitcast float* %218 to i8* %call171 = call i32 @cudaFree(i8* %219) %220 = load float*, float** %hidden_delta_cuda, align 8 %221 = bitcast float* %220 to i8* %call172 = call i32 @cudaFree(i8* %221) %222 = load float*, float** %partial_sum, align 8 %223 = bitcast float* %222 to i8* call void @free(i8* %223) #7 %224 = load float*, float** %input_weights_one_dim, align 8 %225 = bitcast float* %224 to i8* call void @free(i8* %225) #7 %226 = load float*, float** %input_weights_prev_one_dim, align 8 %227 = bitcast float* %226 to i8* call void @free(i8* %227) #7 ret void } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #2 comdat align 2 { entry: %this.addr = alloca %struct.dim3*, align 8 %vx.addr = alloca i32, align 4 %vy.addr = alloca i32, align 4 %vz.addr = alloca i32, align 4 store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 store i32 %vx, i32* %vx.addr, align 4 store i32 %vy, i32* %vy.addr, align 4 store i32 %vz, i32* %vz.addr, align 4 %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 %0 = load i32, i32* %vx.addr, align 4 store i32 %0, i32* %x, align 4 %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 %1 = load i32, i32* %vy.addr, align 4 store i32 %1, i32* %y, align 4 %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 %2 = load i32, i32* %vz.addr, align 4 store i32 %2, i32* %z, align 4 ret void } ; Function Attrs: nounwind declare dso_local noalias i8* @malloc(i64) #3 declare dso_local i32 @cudaMalloc(i8**, i64) #5 declare dso_local i32 @printf(i8*, ...) #5 declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #5 declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #5 declare dso_local i32 @cudaThreadSynchronize() #5 declare dso_local i32 @cudaGetLastError() #5 declare dso_local i8* @cudaGetErrorString(i32) #5 ; Function Attrs: noreturn nounwind declare dso_local void @exit(i32) #6 ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local float @_ZSt3expf(float %__x) #2 comdat { entry: %__x.addr = alloca float, align 4 store float %__x, float* %__x.addr, align 4 %0 = load float, float* %__x.addr, align 4 %call = call float @expf(float %0) #7 ret float %call } declare dso_local void @bpnn_layerforward(float*, float*, float**, i32, i32) #5 declare dso_local void @bpnn_output_error(float*, float*, float*, i32, float*) #5 declare dso_local void @bpnn_hidden_error(float*, i32, float*, i32, float**, float*, float*) #5 declare dso_local void @bpnn_adjust_weights(float*, i32, float*, i32, float**, float**) #5 declare dso_local i32 @cudaFree(i8*) #5 ; Function Attrs: nounwind declare dso_local void @free(i8*) #3 ; Function Attrs: nounwind declare dso_local float @expf(float) #3 define internal void @__cuda_register_globals(i8** %0) { entry: %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, float*, float*, float*, i32, i32)* @_Z22bpnn_layerforward_CUDAPfS_S_S_ii to i8*), i8* getelementptr inbounds ([37 x i8], [37 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([37 x i8], [37 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) %2 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, i32, float*, i32, float*, float*)* @_Z24bpnn_adjust_weights_cudaPfiS_iS_S_ to i8*), i8* getelementptr inbounds ([39 x i8], [39 x i8]* @1, i64 0, i64 0), i8* getelementptr inbounds ([39 x i8], [39 x i8]* @1, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) ret void } declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) declare dso_local i8** @__cudaRegisterFatBinary(i8*) define internal void @__cuda_module_ctor(i8* %0) { entry: %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) store i8** %1, i8*** @__cuda_gpubin_handle, align 8 call void @__cuda_register_globals(i8** %1) call void @__cudaRegisterFatBinaryEnd(i8** %1) %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) ret void } declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) declare dso_local void @__cudaUnregisterFatBinary(i8**) define internal void @__cuda_module_dtor(i8* %0) { entry: %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 call void @__cudaUnregisterFatBinary(i8** %1) ret void } declare dso_local i32 @atexit(void (i8*)*) attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind willreturn } attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #4 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #5 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #6 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #7 = { nounwind } attributes #8 = { noreturn nounwind } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} !1 = !{i32 1, !"wchar_size", i32 4} !2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}