; ModuleID = 'bfs-host-x86_64-unknown-linux-gnu.bc' source_filename = "bfs.cu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } %struct.Node = type { i32, i32 } %struct.dim3 = type { i32, i32, i32 } %struct.CUstream_st = type opaque $_ZN4dim3C2Ejjj = comdat any @no_of_nodes = dso_local global i32 0, align 4 @edge_list_size = dso_local global i32 0, align 4 @fp = dso_local global %struct._IO_FILE* null, align 8 @stderr = external dso_local global %struct._IO_FILE*, align 8 @.str = private unnamed_addr constant [24 x i8] c"Usage: %s \0A\00", align 1 @.str.1 = private unnamed_addr constant [14 x i8] c"Reading File\0A\00", align 1 @.str.2 = private unnamed_addr constant [2 x i8] c"r\00", align 1 @.str.3 = private unnamed_addr constant [26 x i8] c"Error Reading graph file\0A\00", align 1 @.str.4 = private unnamed_addr constant [3 x i8] c"%d\00", align 1 @.str.5 = private unnamed_addr constant [6 x i8] c"%d %d\00", align 1 @.str.6 = private unnamed_addr constant [11 x i8] c"Read File\0A\00", align 1 @.str.7 = private unnamed_addr constant [33 x i8] c"Copied Everything to GPU memory\0A\00", align 1 @.str.8 = private unnamed_addr constant [27 x i8] c"Start traversing the tree\0A\00", align 1 @.str.9 = private unnamed_addr constant [26 x i8] c"Kernel Executed %d times\0A\00", align 1 @.str.10 = private unnamed_addr constant [11 x i8] c"result.txt\00", align 1 @.str.11 = private unnamed_addr constant [2 x i8] c"w\00", align 1 @.str.12 = private unnamed_addr constant [13 x i8] c"%d) cost:%d\0A\00", align 1 @.str.13 = private unnamed_addr constant [29 x i8] c"Result stored in result.txt\0A\00", align 1 @0 = private unnamed_addr constant [30 x i8] c"_Z6KernelP4NodePiPbS2_S2_S1_i\00", align 1 @1 = private unnamed_addr constant [20 x i8] c"_Z7Kernel2PbS_S_S_i\00", align 1 @2 = private constant [15329 x i8] c"P\EDU\BA\01\00\10\00\D0;\00\00\00\00\00\00\02\00\01\01@\00\00\00H2\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\A01\00\00\00\00\00\00\A0.\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0C\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z7Kernel2PbS_S_S_i\00.nv.info._Z7Kernel2PbS_S_S_i\00.nv.shared._Z7Kernel2PbS_S_S_i\00.nv.global\00.nv.constant0._Z7Kernel2PbS_S_S_i\00.text._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.info._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.shared._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.constant0._Z6KernelP4NodePiPbS2_S2_S1_i\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z7Kernel2PbS_S_S_i\00.text._Z7Kernel2PbS_S_S_i\00.nv.info._Z7Kernel2PbS_S_S_i\00.nv.shared._Z7Kernel2PbS_S_S_i\00.nv.global\00blockIdx\00threadIdx\00.nv.constant0._Z7Kernel2PbS_S_S_i\00_param\00_Z6KernelP4NodePiPbS2_S2_S1_i\00.text._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.info._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.shared._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.constant0._Z6KernelP4NodePiPbS2_S2_S1_i\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00F\00\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\9C\00\00\00\03\00\0B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A7\00\00\00\01\00\0B\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\B0\00\00\00\01\00\0B\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\BA\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\01\00\00\03\00\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00u\01\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\09\00\00\00\00\00\00\00\00\00\80\0D\00\00\00\00\00\00\E3\00\00\00\12\10\0A\00\00\00\00\00\00\00\00\00\80\18\00\00\00\00\00\00\04/\08\00\09\00\00\00\13\00\00\00\04#\08\00\09\00\00\00\00\00\00\00\04\12\08\00\09\00\00\00@\00\00\00\04\11\08\00\09\00\00\00@\00\00\00\04/\08\00\08\00\00\00\0F\00\00\00\04#\08\00\08\00\00\00\00\00\00\00\04\12\08\00\08\00\00\00(\00\00\00\04\11\08\00\08\00\00\00(\00\00\00\010\00\00\01*\00\00\04\0A\08\00\05\00\00\00@\01$\00\03\19$\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\C8\04\00\00\04\1C\04\00H\0D\00\00\04\1E\04\000\00\00\00\010\00\00\01*\00\00\04\0A\08\00\07\00\00\00@\014\00\03\194\00\04\17\0C\00\00\00\00\00\06\000\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\A8\06\00\00\04\1C\04\008\18\00\00\04\1E\04\00p\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F0\16visible .entry _Z6KernelP4NodePiPbS2_\03\0061_i\A6\04\00\A0\00\0F+\00\0A\0E\8D\04\0F3\00\15\1F13\00\1F\1F23\00\1F\1F33\00\1F\1F43\00\1F\1753\00/323\00\12\1F6\8F\04\13O6[64\8F\04\15\96pred %p<5\90\04\9B16 %rs<7>\B3\04-18\B4\04/50\B5\04\0C\1F6\B5\04\18\00b\03\0F\06\01\12\0F\9E\03\00\1F6<\00\14\1F5<\00\00\1F5<\00\14\0F\14\05\01\1F4<\00\14\1F3P\05\01\0F<\00\14\0F\E3\04\02\0F<\00\14\0F\CA\04\01\0Fh\01\15#0]\BD\01#to\BB\14\04B\00\117w\04\04\DC\01\0A\1C\00\118\1C\00\1F7;\00\05\119\1F\00\1F5;\00\02!10\1D\00\1F9<\00\05!11 \00\1F4=\00\03\122O\05\1F1>\00\06\143-\05\0F>\00\01\124>\00\1F3>\00\06\145\98\05\0F>\00\01\126>\00\1F5>\00\06\147\EC\05\0F>\00\01\023\01/17 \06\03\1F8!\06\02*16\17\00\03\22\06?d14$\06\03*12\18\00\03%\06:d10\18\00\134w\00\1A8T\06\154\8E\10\09*\0B\F4\00%ctaid.x;\0Ashl.bk\06\02F\0B\08,\00\00_\01\12t*\00Qadd.s\15\00$5,/\00\1A4n\00\125\9D\00\115\BC\02\02A\00%6,\1B\00\07\16\00%7,\9F\00\92;\0Asetp.ge]\002p1,6\00\F2\0E%r7;\0A@%p1 bra LBB6_9;\0Abra.uni\10\0021;\0A\08\00\11:Z\00\03\96\01%9,Z\01\01r\00\02\B4\008d20\8A\00\01\CD\00\03\93\03$1,8\00\01'\00\02\A7\00\108L\05\00r\03\01\22\00\002\00$ndc\05#2,\1D\00\131\BD\00\22eq\1B\003p2, \00\8F1;\0A@!%p2\BD\00\07\132\BD\00\182\BD\00/22\BD\00\04\1F3\BD\00\05$4,8\00\01'\00\03\BC\01\02\A8\0033, &\02\128\C8\00\02T\028s3;s\00$5,\B8\02\09r\00\09\B9\01\01&\02\030\00$7, \00\1A3\8B\00$8,P\00\01'\00\07\EF\01\138H\01+8]0\02\02\FD\02\1B8\1B\01\133\1B\01&3:C\00%9,3\00\09\BE\00\1F9\BE\00\02/30\BE\00\04431, \00\0A\BE\00432,P\00\01'\00\07\BE\00\2210\BF\00X32+4]\18\00\141\18\00\18]\1E\03\02\D1\04\02\14\05,11\DF\02\223,\CD\00\00(\00\01\E0\02\163#\02\0C\08\01\134\08\01\184#\02\143\0B\02\1A8\F2\00\184!\01\08\F2\00$5, \00\1A2\F2\00$6,P\00\01'\00\08\DA\00\133\DA\00\1B6\B1\01\136\12\13\09B\02537,\9C\04\09\93\00%8,6\00\0Az\00$9,8\00\01'\00\07r\03\134y\00\1A9r\03#5,\1D\00\0Dr\03#4, \00\111N\01\164N\01\1B6N\01\135N\01\185N\01\144)\02\1A4A\02/41A\02\04442, \00\0AO\01443,Q\00\01'\00\08O\01\03\D6\00*43)\02#5,\1D\00\191{\00\1847\01\08{\00$5, \00\0B{\00\196{\00\175\B4\01\00\1D\00\02\B3\01(5;\F7\00%7,g\06\09|\00\0F\B3\01\05449,8\00\01'\00\09h\04\126\98\01\05h\04\2249h\04\0C\82\01\136\82\01*6:\18\00\137\18\00\177\F0\03(16\D0\02\075\01\01\82\00\161s\00\0BL\04/17M\04\04\1B8u\00\139\18\00/9:9\0D\09\127n\09P2PbS_\02\00\0D/\0D\0D!\00\0E%\0D\0F)\00\0B\1F1)\00\15\1F2)\00\15\1C3\A1\0C\0E)\00\0F&\11\1A\1E7&\11\0F\97\0C\0E\1D3\97\0C\1C5\97\0C\0E\96\0C/26\96\0C\0C\1F7\96\0C\1E\0E\FB\00\0F\14\0C\0D\0EV\01\0F\0A\0C\0D\0E\B1\01\0F\00\0C\0D\0E\0C\02\0F\F6\0B\0D\0Eg\02\0F\EC\0B\0D\1F5t\0B\08\196\CD\0B\0F'\0C\04\1F3'\0C'\1F2'\0C)\1F1'\0C\0D\0Fm\0B\01\1F2m\0B\03\1F0m\0B\03\1F8l\0B\03\1E6\90\11\0F<\0BW/36<\0B\06/36<\0B\01/32<\0B\1797_3\17\05\137<\0B\1B7<\0B\0F\\\08\03(14\89\00\07\16\06\03]\0D%13\C3\0C\0C;\0B/15;\0B.\0E\BC\00\132\BC\00\09;\0B/16\0A\0A\02/17\BC\00\05(8,\03\0E\1C7:\0B\09\D2\06.18:\0B\0Fj\0C\0F/36j\0C\0D\07\9C\0B\1F1\9C\0B\01\182\A7\07\07-\00\1F2-\00\01\0F\BD\01\03/24\8E\00\05$5,7\00\01'\00\09\01\01\1C4;\0C\125r\00\1B4\8B\01\133\8B\01\B03:\0Aret;\0A\0A}\0A\00\00\00", section ".nv_fatbin", align 8 @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([15329 x i8], [15329 x i8]* @2, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 @__cuda_gpubin_handle = internal global i8** null, align 8 @llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] ; Function Attrs: noinline optnone uwtable define dso_local void @_Z6KernelP4NodePiPbS2_S2_S1_i(%struct.Node* %g_graph_nodes, i32* %g_graph_edges, i8* %g_graph_mask, i8* %g_updating_graph_mask, i8* %g_graph_visited, i32* %g_cost, i32 %no_of_nodes) #0 { entry: %g_graph_nodes.addr = alloca %struct.Node*, align 8 %g_graph_edges.addr = alloca i32*, align 8 %g_graph_mask.addr = alloca i8*, align 8 %g_updating_graph_mask.addr = alloca i8*, align 8 %g_graph_visited.addr = alloca i8*, align 8 %g_cost.addr = alloca i32*, align 8 %no_of_nodes.addr = alloca i32, align 4 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store %struct.Node* %g_graph_nodes, %struct.Node** %g_graph_nodes.addr, align 8 store i32* %g_graph_edges, i32** %g_graph_edges.addr, align 8 store i8* %g_graph_mask, i8** %g_graph_mask.addr, align 8 store i8* %g_updating_graph_mask, i8** %g_updating_graph_mask.addr, align 8 store i8* %g_graph_visited, i8** %g_graph_visited.addr, align 8 store i32* %g_cost, i32** %g_cost.addr, align 8 store i32 %no_of_nodes, i32* %no_of_nodes.addr, align 4 %kernel_args = alloca i8*, i64 7, align 16 %0 = bitcast %struct.Node** %g_graph_nodes.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast i32** %g_graph_edges.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast i8** %g_graph_mask.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = bitcast i8** %g_updating_graph_mask.addr to i8* %7 = getelementptr i8*, i8** %kernel_args, i32 3 store i8* %6, i8** %7 %8 = bitcast i8** %g_graph_visited.addr to i8* %9 = getelementptr i8*, i8** %kernel_args, i32 4 store i8* %8, i8** %9 %10 = bitcast i32** %g_cost.addr to i8* %11 = getelementptr i8*, i8** %kernel_args, i32 5 store i8* %10, i8** %11 %12 = bitcast i32* %no_of_nodes.addr to i8* %13 = getelementptr i8*, i8** %kernel_args, i32 6 store i8* %12, i8** %13 %14 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %15 = load i64, i64* %shmem_size, align 8 %16 = load i8*, i8** %stream, align 8 %17 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %18 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %17, i8* align 8 %18, i64 12, i1 false) %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %20 = load i64, i64* %19, align 8 %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %22 = load i32, i32* %21, align 8 %23 = bitcast { i64, i32 }* %block_dim.coerce to i8* %24 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %23, i8* align 8 %24, i64 12, i1 false) %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %26 = load i64, i64* %25, align 8 %27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %28 = load i32, i32* %27, align 8 %29 = bitcast i8* %16 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (%struct.Node*, i32*, i8*, i8*, i8*, i32*, i32)* @_Z6KernelP4NodePiPbS2_S2_S1_i to i8*), i64 %20, i32 %22, i64 %26, i32 %28, i8** %kernel_args, i64 %15, %struct.CUstream_st* %29) br label %setup.end setup.end: ; preds = %entry ret void } declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z7Kernel2PbS_S_S_i(i8* %g_graph_mask, i8* %g_updating_graph_mask, i8* %g_graph_visited, i8* %g_over, i32 %no_of_nodes) #0 { entry: %g_graph_mask.addr = alloca i8*, align 8 %g_updating_graph_mask.addr = alloca i8*, align 8 %g_graph_visited.addr = alloca i8*, align 8 %g_over.addr = alloca i8*, align 8 %no_of_nodes.addr = alloca i32, align 4 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store i8* %g_graph_mask, i8** %g_graph_mask.addr, align 8 store i8* %g_updating_graph_mask, i8** %g_updating_graph_mask.addr, align 8 store i8* %g_graph_visited, i8** %g_graph_visited.addr, align 8 store i8* %g_over, i8** %g_over.addr, align 8 store i32 %no_of_nodes, i32* %no_of_nodes.addr, align 4 %kernel_args = alloca i8*, i64 5, align 16 %0 = bitcast i8** %g_graph_mask.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast i8** %g_updating_graph_mask.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast i8** %g_graph_visited.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = bitcast i8** %g_over.addr to i8* %7 = getelementptr i8*, i8** %kernel_args, i32 3 store i8* %6, i8** %7 %8 = bitcast i32* %no_of_nodes.addr to i8* %9 = getelementptr i8*, i8** %kernel_args, i32 4 store i8* %8, i8** %9 %10 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %11 = load i64, i64* %shmem_size, align 8 %12 = load i8*, i8** %stream, align 8 %13 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %14 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %13, i8* align 8 %14, i64 12, i1 false) %15 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %16 = load i64, i64* %15, align 8 %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %18 = load i32, i32* %17, align 8 %19 = bitcast { i64, i32 }* %block_dim.coerce to i8* %20 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %19, i8* align 8 %20, i64 12, i1 false) %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %22 = load i64, i64* %21, align 8 %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %24 = load i32, i32* %23, align 8 %25 = bitcast i8* %12 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i8*, i8*, i8*, i8*, i32)* @_Z7Kernel2PbS_S_S_i to i8*), i64 %16, i32 %18, i64 %22, i32 %24, i8** %kernel_args, i64 %11, %struct.CUstream_st* %25) br label %setup.end setup.end: ; preds = %entry ret void } ; Function Attrs: noinline norecurse optnone uwtable define dso_local i32 @main(i32 %argc, i8** %argv) #2 { entry: %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %call = call i32 @cudaSetDevice(i32 0) store i32 0, i32* @no_of_nodes, align 4 store i32 0, i32* @edge_list_size, align 4 %0 = load i32, i32* %argc.addr, align 4 %1 = load i8**, i8*** %argv.addr, align 8 call void @_Z8BFSGraphiPPc(i32 %0, i8** %1) ret i32 0 } declare dso_local i32 @cudaSetDevice(i32) #3 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z8BFSGraphiPPc(i32 %argc, i8** %argv) #0 { entry: %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 %input_f = alloca i8*, align 8 %source = alloca i32, align 4 %num_of_blocks = alloca i32, align 4 %num_of_threads_per_block = alloca i32, align 4 %h_graph_nodes = alloca %struct.Node*, align 8 %h_graph_mask = alloca i8*, align 8 %h_updating_graph_mask = alloca i8*, align 8 %h_graph_visited = alloca i8*, align 8 %start = alloca i32, align 4 %edgeno = alloca i32, align 4 %i = alloca i32, align 4 %id = alloca i32, align 4 %cost = alloca i32, align 4 %h_graph_edges = alloca i32*, align 8 %i41 = alloca i32, align 4 %d_graph_nodes = alloca %struct.Node*, align 8 %d_graph_edges = alloca i32*, align 8 %d_graph_mask = alloca i8*, align 8 %d_updating_graph_mask = alloca i8*, align 8 %d_graph_visited = alloca i8*, align 8 %h_cost = alloca i32*, align 8 %i90 = alloca i32, align 4 %d_cost = alloca i32*, align 8 %d_over = alloca i8*, align 8 %grid = alloca %struct.dim3, align 4 %threads = alloca %struct.dim3, align 4 %k = alloca i32, align 4 %stop = alloca i8, align 1 %agg.tmp = alloca %struct.dim3, align 4 %agg.tmp111 = alloca %struct.dim3, align 4 %agg.tmp.coerce = alloca { i64, i32 }, align 4 %agg.tmp111.coerce = alloca { i64, i32 }, align 4 %agg.tmp115 = alloca %struct.dim3, align 4 %agg.tmp116 = alloca %struct.dim3, align 4 %agg.tmp115.coerce = alloca { i64, i32 }, align 4 %agg.tmp116.coerce = alloca { i64, i32 }, align 4 %fpo = alloca %struct._IO_FILE*, align 8 %i130 = alloca i32, align 4 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %0 = load i32, i32* %argc.addr, align 4 %cmp = icmp ne i32 %0, 2 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %1 = load i32, i32* %argc.addr, align 4 %2 = load i8**, i8*** %argv.addr, align 8 call void @_Z5UsageiPPc(i32 %1, i8** %2) call void @exit(i32 0) #8 unreachable if.end: ; preds = %entry %3 = load i8**, i8*** %argv.addr, align 8 %arrayidx = getelementptr inbounds i8*, i8** %3, i64 1 %4 = load i8*, i8** %arrayidx, align 8 store i8* %4, i8** %input_f, align 8 %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0)) %5 = load i8*, i8** %input_f, align 8 %call1 = call %struct._IO_FILE* @fopen(i8* %5, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i64 0, i64 0)) store %struct._IO_FILE* %call1, %struct._IO_FILE** @fp, align 8 %6 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 %tobool = icmp ne %struct._IO_FILE* %6, null br i1 %tobool, label %if.end4, label %if.then2 if.then2: ; preds = %if.end %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.3, i64 0, i64 0)) br label %return if.end4: ; preds = %if.end store i32 0, i32* %source, align 4 %7 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.4, i64 0, i64 0), i32* @no_of_nodes) store i32 1, i32* %num_of_blocks, align 4 %8 = load i32, i32* @no_of_nodes, align 4 store i32 %8, i32* %num_of_threads_per_block, align 4 %9 = load i32, i32* @no_of_nodes, align 4 %cmp6 = icmp sgt i32 %9, 512 br i1 %cmp6, label %if.then7, label %if.end9 if.then7: ; preds = %if.end4 %10 = load i32, i32* @no_of_nodes, align 4 %conv = sitofp i32 %10 to double %div = fdiv double %conv, 5.120000e+02 %11 = call double @llvm.ceil.f64(double %div) %conv8 = fptosi double %11 to i32 store i32 %conv8, i32* %num_of_blocks, align 4 store i32 512, i32* %num_of_threads_per_block, align 4 br label %if.end9 if.end9: ; preds = %if.then7, %if.end4 %12 = load i32, i32* @no_of_nodes, align 4 %conv10 = sext i32 %12 to i64 %mul = mul i64 8, %conv10 %call11 = call noalias i8* @malloc(i64 %mul) #9 %13 = bitcast i8* %call11 to %struct.Node* store %struct.Node* %13, %struct.Node** %h_graph_nodes, align 8 %14 = load i32, i32* @no_of_nodes, align 4 %conv12 = sext i32 %14 to i64 %mul13 = mul i64 1, %conv12 %call14 = call noalias i8* @malloc(i64 %mul13) #9 store i8* %call14, i8** %h_graph_mask, align 8 %15 = load i32, i32* @no_of_nodes, align 4 %conv15 = sext i32 %15 to i64 %mul16 = mul i64 1, %conv15 %call17 = call noalias i8* @malloc(i64 %mul16) #9 store i8* %call17, i8** %h_updating_graph_mask, align 8 %16 = load i32, i32* @no_of_nodes, align 4 %conv18 = sext i32 %16 to i64 %mul19 = mul i64 1, %conv18 %call20 = call noalias i8* @malloc(i64 %mul19) #9 store i8* %call20, i8** %h_graph_visited, align 8 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %if.end9 %17 = load i32, i32* %i, align 4 %18 = load i32, i32* @no_of_nodes, align 4 %cmp21 = icmp ult i32 %17, %18 br i1 %cmp21, label %for.body, label %for.end for.body: ; preds = %for.cond %19 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 %call22 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %19, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.5, i64 0, i64 0), i32* %start, i32* %edgeno) %20 = load i32, i32* %start, align 4 %21 = load %struct.Node*, %struct.Node** %h_graph_nodes, align 8 %22 = load i32, i32* %i, align 4 %idxprom = zext i32 %22 to i64 %arrayidx23 = getelementptr inbounds %struct.Node, %struct.Node* %21, i64 %idxprom %starting = getelementptr inbounds %struct.Node, %struct.Node* %arrayidx23, i32 0, i32 0 store i32 %20, i32* %starting, align 4 %23 = load i32, i32* %edgeno, align 4 %24 = load %struct.Node*, %struct.Node** %h_graph_nodes, align 8 %25 = load i32, i32* %i, align 4 %idxprom24 = zext i32 %25 to i64 %arrayidx25 = getelementptr inbounds %struct.Node, %struct.Node* %24, i64 %idxprom24 %no_of_edges = getelementptr inbounds %struct.Node, %struct.Node* %arrayidx25, i32 0, i32 1 store i32 %23, i32* %no_of_edges, align 4 %26 = load i8*, i8** %h_graph_mask, align 8 %27 = load i32, i32* %i, align 4 %idxprom26 = zext i32 %27 to i64 %arrayidx27 = getelementptr inbounds i8, i8* %26, i64 %idxprom26 store i8 0, i8* %arrayidx27, align 1 %28 = load i8*, i8** %h_updating_graph_mask, align 8 %29 = load i32, i32* %i, align 4 %idxprom28 = zext i32 %29 to i64 %arrayidx29 = getelementptr inbounds i8, i8* %28, i64 %idxprom28 store i8 0, i8* %arrayidx29, align 1 %30 = load i8*, i8** %h_graph_visited, align 8 %31 = load i32, i32* %i, align 4 %idxprom30 = zext i32 %31 to i64 %arrayidx31 = getelementptr inbounds i8, i8* %30, i64 %idxprom30 store i8 0, i8* %arrayidx31, align 1 br label %for.inc for.inc: ; preds = %for.body %32 = load i32, i32* %i, align 4 %inc = add i32 %32, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond %33 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 %call32 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %33, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.4, i64 0, i64 0), i32* %source) store i32 0, i32* %source, align 4 %34 = load i8*, i8** %h_graph_mask, align 8 %35 = load i32, i32* %source, align 4 %idxprom33 = sext i32 %35 to i64 %arrayidx34 = getelementptr inbounds i8, i8* %34, i64 %idxprom33 store i8 1, i8* %arrayidx34, align 1 %36 = load i8*, i8** %h_graph_visited, align 8 %37 = load i32, i32* %source, align 4 %idxprom35 = sext i32 %37 to i64 %arrayidx36 = getelementptr inbounds i8, i8* %36, i64 %idxprom35 store i8 1, i8* %arrayidx36, align 1 %38 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 %call37 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %38, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.4, i64 0, i64 0), i32* @edge_list_size) %39 = load i32, i32* @edge_list_size, align 4 %conv38 = sext i32 %39 to i64 %mul39 = mul i64 4, %conv38 %call40 = call noalias i8* @malloc(i64 %mul39) #9 %40 = bitcast i8* %call40 to i32* store i32* %40, i32** %h_graph_edges, align 8 store i32 0, i32* %i41, align 4 br label %for.cond42 for.cond42: ; preds = %for.inc49, %for.end %41 = load i32, i32* %i41, align 4 %42 = load i32, i32* @edge_list_size, align 4 %cmp43 = icmp slt i32 %41, %42 br i1 %cmp43, label %for.body44, label %for.end51 for.body44: ; preds = %for.cond42 %43 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 %call45 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %43, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.4, i64 0, i64 0), i32* %id) %44 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 %call46 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %44, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.4, i64 0, i64 0), i32* %cost) %45 = load i32, i32* %id, align 4 %46 = load i32*, i32** %h_graph_edges, align 8 %47 = load i32, i32* %i41, align 4 %idxprom47 = sext i32 %47 to i64 %arrayidx48 = getelementptr inbounds i32, i32* %46, i64 %idxprom47 store i32 %45, i32* %arrayidx48, align 4 br label %for.inc49 for.inc49: ; preds = %for.body44 %48 = load i32, i32* %i41, align 4 %inc50 = add nsw i32 %48, 1 store i32 %inc50, i32* %i41, align 4 br label %for.cond42 for.end51: ; preds = %for.cond42 %49 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 %tobool52 = icmp ne %struct._IO_FILE* %49, null br i1 %tobool52, label %if.then53, label %if.end55 if.then53: ; preds = %for.end51 %50 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 %call54 = call i32 @fclose(%struct._IO_FILE* %50) br label %if.end55 if.end55: ; preds = %if.then53, %for.end51 %call56 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.6, i64 0, i64 0)) %51 = bitcast %struct.Node** %d_graph_nodes to i8** %52 = load i32, i32* @no_of_nodes, align 4 %conv57 = sext i32 %52 to i64 %mul58 = mul i64 8, %conv57 %call59 = call i32 @cudaMalloc(i8** %51, i64 %mul58) %53 = load %struct.Node*, %struct.Node** %d_graph_nodes, align 8 %54 = bitcast %struct.Node* %53 to i8* %55 = load %struct.Node*, %struct.Node** %h_graph_nodes, align 8 %56 = bitcast %struct.Node* %55 to i8* %57 = load i32, i32* @no_of_nodes, align 4 %conv60 = sext i32 %57 to i64 %mul61 = mul i64 8, %conv60 %call62 = call i32 @cudaMemcpy(i8* %54, i8* %56, i64 %mul61, i32 1) %58 = bitcast i32** %d_graph_edges to i8** %59 = load i32, i32* @edge_list_size, align 4 %conv63 = sext i32 %59 to i64 %mul64 = mul i64 4, %conv63 %call65 = call i32 @cudaMalloc(i8** %58, i64 %mul64) %60 = load i32*, i32** %d_graph_edges, align 8 %61 = bitcast i32* %60 to i8* %62 = load i32*, i32** %h_graph_edges, align 8 %63 = bitcast i32* %62 to i8* %64 = load i32, i32* @edge_list_size, align 4 %conv66 = sext i32 %64 to i64 %mul67 = mul i64 4, %conv66 %call68 = call i32 @cudaMemcpy(i8* %61, i8* %63, i64 %mul67, i32 1) %65 = load i32, i32* @no_of_nodes, align 4 %conv69 = sext i32 %65 to i64 %mul70 = mul i64 1, %conv69 %call71 = call i32 @cudaMalloc(i8** %d_graph_mask, i64 %mul70) %66 = load i8*, i8** %d_graph_mask, align 8 %67 = load i8*, i8** %h_graph_mask, align 8 %68 = load i32, i32* @no_of_nodes, align 4 %conv72 = sext i32 %68 to i64 %mul73 = mul i64 1, %conv72 %call74 = call i32 @cudaMemcpy(i8* %66, i8* %67, i64 %mul73, i32 1) %69 = load i32, i32* @no_of_nodes, align 4 %conv75 = sext i32 %69 to i64 %mul76 = mul i64 1, %conv75 %call77 = call i32 @cudaMalloc(i8** %d_updating_graph_mask, i64 %mul76) %70 = load i8*, i8** %d_updating_graph_mask, align 8 %71 = load i8*, i8** %h_updating_graph_mask, align 8 %72 = load i32, i32* @no_of_nodes, align 4 %conv78 = sext i32 %72 to i64 %mul79 = mul i64 1, %conv78 %call80 = call i32 @cudaMemcpy(i8* %70, i8* %71, i64 %mul79, i32 1) %73 = load i32, i32* @no_of_nodes, align 4 %conv81 = sext i32 %73 to i64 %mul82 = mul i64 1, %conv81 %call83 = call i32 @cudaMalloc(i8** %d_graph_visited, i64 %mul82) %74 = load i8*, i8** %d_graph_visited, align 8 %75 = load i8*, i8** %h_graph_visited, align 8 %76 = load i32, i32* @no_of_nodes, align 4 %conv84 = sext i32 %76 to i64 %mul85 = mul i64 1, %conv84 %call86 = call i32 @cudaMemcpy(i8* %74, i8* %75, i64 %mul85, i32 1) %77 = load i32, i32* @no_of_nodes, align 4 %conv87 = sext i32 %77 to i64 %mul88 = mul i64 4, %conv87 %call89 = call noalias i8* @malloc(i64 %mul88) #9 %78 = bitcast i8* %call89 to i32* store i32* %78, i32** %h_cost, align 8 store i32 0, i32* %i90, align 4 br label %for.cond91 for.cond91: ; preds = %for.inc96, %if.end55 %79 = load i32, i32* %i90, align 4 %80 = load i32, i32* @no_of_nodes, align 4 %cmp92 = icmp slt i32 %79, %80 br i1 %cmp92, label %for.body93, label %for.end98 for.body93: ; preds = %for.cond91 %81 = load i32*, i32** %h_cost, align 8 %82 = load i32, i32* %i90, align 4 %idxprom94 = sext i32 %82 to i64 %arrayidx95 = getelementptr inbounds i32, i32* %81, i64 %idxprom94 store i32 -1, i32* %arrayidx95, align 4 br label %for.inc96 for.inc96: ; preds = %for.body93 %83 = load i32, i32* %i90, align 4 %inc97 = add nsw i32 %83, 1 store i32 %inc97, i32* %i90, align 4 br label %for.cond91 for.end98: ; preds = %for.cond91 %84 = load i32*, i32** %h_cost, align 8 %85 = load i32, i32* %source, align 4 %idxprom99 = sext i32 %85 to i64 %arrayidx100 = getelementptr inbounds i32, i32* %84, i64 %idxprom99 store i32 0, i32* %arrayidx100, align 4 %86 = bitcast i32** %d_cost to i8** %87 = load i32, i32* @no_of_nodes, align 4 %conv101 = sext i32 %87 to i64 %mul102 = mul i64 4, %conv101 %call103 = call i32 @cudaMalloc(i8** %86, i64 %mul102) %88 = load i32*, i32** %d_cost, align 8 %89 = bitcast i32* %88 to i8* %90 = load i32*, i32** %h_cost, align 8 %91 = bitcast i32* %90 to i8* %92 = load i32, i32* @no_of_nodes, align 4 %conv104 = sext i32 %92 to i64 %mul105 = mul i64 4, %conv104 %call106 = call i32 @cudaMemcpy(i8* %89, i8* %91, i64 %mul105, i32 1) %call107 = call i32 @cudaMalloc(i8** %d_over, i64 1) %call108 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.7, i64 0, i64 0)) %93 = load i32, i32* %num_of_blocks, align 4 call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid, i32 %93, i32 1, i32 1) %94 = load i32, i32* %num_of_threads_per_block, align 4 call void @_ZN4dim3C2Ejjj(%struct.dim3* %threads, i32 %94, i32 1, i32 1) store i32 0, i32* %k, align 4 %call109 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.8, i64 0, i64 0)) br label %do.body do.body: ; preds = %do.cond, %for.end98 store i8 0, i8* %stop, align 1 %95 = load i8*, i8** %d_over, align 8 %call110 = call i32 @cudaMemcpy(i8* %95, i8* %stop, i64 1, i32 1) %96 = bitcast %struct.dim3* %agg.tmp to i8* %97 = bitcast %struct.dim3* %grid to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %96, i8* align 4 %97, i64 12, i1 false) %98 = bitcast %struct.dim3* %agg.tmp111 to i8* %99 = bitcast %struct.dim3* %threads to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %98, i8* align 4 %99, i64 12, i1 false) %100 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* %101 = bitcast %struct.dim3* %agg.tmp to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %100, i8* align 4 %101, i64 12, i1 false) %102 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 %103 = load i64, i64* %102, align 4 %104 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 %105 = load i32, i32* %104, align 4 %106 = bitcast { i64, i32 }* %agg.tmp111.coerce to i8* %107 = bitcast %struct.dim3* %agg.tmp111 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %106, i8* align 4 %107, i64 12, i1 false) %108 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp111.coerce, i32 0, i32 0 %109 = load i64, i64* %108, align 4 %110 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp111.coerce, i32 0, i32 1 %111 = load i32, i32* %110, align 4 %call112 = call i32 @__cudaPushCallConfiguration(i64 %103, i32 %105, i64 %109, i32 %111, i64 0, i8* null) %tobool113 = icmp ne i32 %call112, 0 br i1 %tobool113, label %kcall.end, label %kcall.configok kcall.configok: ; preds = %do.body %112 = load %struct.Node*, %struct.Node** %d_graph_nodes, align 8 %113 = load i32*, i32** %d_graph_edges, align 8 %114 = load i8*, i8** %d_graph_mask, align 8 %115 = load i8*, i8** %d_updating_graph_mask, align 8 %116 = load i8*, i8** %d_graph_visited, align 8 %117 = load i32*, i32** %d_cost, align 8 %118 = load i32, i32* @no_of_nodes, align 4 call void @_Z6KernelP4NodePiPbS2_S2_S1_i(%struct.Node* %112, i32* %113, i8* %114, i8* %115, i8* %116, i32* %117, i32 %118) br label %kcall.end kcall.end: ; preds = %kcall.configok, %do.body %call114 = call i32 @cudaDeviceSynchronize() %119 = bitcast %struct.dim3* %agg.tmp115 to i8* %120 = bitcast %struct.dim3* %grid to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %119, i8* align 4 %120, i64 12, i1 false) %121 = bitcast %struct.dim3* %agg.tmp116 to i8* %122 = bitcast %struct.dim3* %threads to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %121, i8* align 4 %122, i64 12, i1 false) %123 = bitcast { i64, i32 }* %agg.tmp115.coerce to i8* %124 = bitcast %struct.dim3* %agg.tmp115 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %123, i8* align 4 %124, i64 12, i1 false) %125 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp115.coerce, i32 0, i32 0 %126 = load i64, i64* %125, align 4 %127 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp115.coerce, i32 0, i32 1 %128 = load i32, i32* %127, align 4 %129 = bitcast { i64, i32 }* %agg.tmp116.coerce to i8* %130 = bitcast %struct.dim3* %agg.tmp116 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %129, i8* align 4 %130, i64 12, i1 false) %131 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp116.coerce, i32 0, i32 0 %132 = load i64, i64* %131, align 4 %133 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp116.coerce, i32 0, i32 1 %134 = load i32, i32* %133, align 4 %call117 = call i32 @__cudaPushCallConfiguration(i64 %126, i32 %128, i64 %132, i32 %134, i64 0, i8* null) %tobool118 = icmp ne i32 %call117, 0 br i1 %tobool118, label %kcall.end120, label %kcall.configok119 kcall.configok119: ; preds = %kcall.end %135 = load i8*, i8** %d_graph_mask, align 8 %136 = load i8*, i8** %d_updating_graph_mask, align 8 %137 = load i8*, i8** %d_graph_visited, align 8 %138 = load i8*, i8** %d_over, align 8 %139 = load i32, i32* @no_of_nodes, align 4 call void @_Z7Kernel2PbS_S_S_i(i8* %135, i8* %136, i8* %137, i8* %138, i32 %139) br label %kcall.end120 kcall.end120: ; preds = %kcall.configok119, %kcall.end %call121 = call i32 @cudaDeviceSynchronize() %140 = load i8*, i8** %d_over, align 8 %call122 = call i32 @cudaMemcpy(i8* %stop, i8* %140, i64 1, i32 2) %141 = load i32, i32* %k, align 4 %inc123 = add nsw i32 %141, 1 store i32 %inc123, i32* %k, align 4 br label %do.cond do.cond: ; preds = %kcall.end120 %142 = load i8, i8* %stop, align 1 %tobool124 = trunc i8 %142 to i1 br i1 %tobool124, label %do.body, label %do.end do.end: ; preds = %do.cond %143 = load i32, i32* %k, align 4 %call125 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.9, i64 0, i64 0), i32 %143) %144 = load i32*, i32** %h_cost, align 8 %145 = bitcast i32* %144 to i8* %146 = load i32*, i32** %d_cost, align 8 %147 = bitcast i32* %146 to i8* %148 = load i32, i32* @no_of_nodes, align 4 %conv126 = sext i32 %148 to i64 %mul127 = mul i64 4, %conv126 %call128 = call i32 @cudaMemcpy(i8* %145, i8* %147, i64 %mul127, i32 2) %call129 = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.10, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.11, i64 0, i64 0)) store %struct._IO_FILE* %call129, %struct._IO_FILE** %fpo, align 8 store i32 0, i32* %i130, align 4 br label %for.cond131 for.cond131: ; preds = %for.inc137, %do.end %149 = load i32, i32* %i130, align 4 %150 = load i32, i32* @no_of_nodes, align 4 %cmp132 = icmp slt i32 %149, %150 br i1 %cmp132, label %for.body133, label %for.end139 for.body133: ; preds = %for.cond131 %151 = load %struct._IO_FILE*, %struct._IO_FILE** %fpo, align 8 %152 = load i32, i32* %i130, align 4 %153 = load i32*, i32** %h_cost, align 8 %154 = load i32, i32* %i130, align 4 %idxprom134 = sext i32 %154 to i64 %arrayidx135 = getelementptr inbounds i32, i32* %153, i64 %idxprom134 %155 = load i32, i32* %arrayidx135, align 4 %call136 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %151, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.12, i64 0, i64 0), i32 %152, i32 %155) br label %for.inc137 for.inc137: ; preds = %for.body133 %156 = load i32, i32* %i130, align 4 %inc138 = add nsw i32 %156, 1 store i32 %inc138, i32* %i130, align 4 br label %for.cond131 for.end139: ; preds = %for.cond131 %157 = load %struct._IO_FILE*, %struct._IO_FILE** %fpo, align 8 %call140 = call i32 @fclose(%struct._IO_FILE* %157) %call141 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str.13, i64 0, i64 0)) %158 = load %struct.Node*, %struct.Node** %h_graph_nodes, align 8 %159 = bitcast %struct.Node* %158 to i8* call void @free(i8* %159) #9 %160 = load i32*, i32** %h_graph_edges, align 8 %161 = bitcast i32* %160 to i8* call void @free(i8* %161) #9 %162 = load i8*, i8** %h_graph_mask, align 8 call void @free(i8* %162) #9 %163 = load i8*, i8** %h_updating_graph_mask, align 8 call void @free(i8* %163) #9 %164 = load i8*, i8** %h_graph_visited, align 8 call void @free(i8* %164) #9 %165 = load i32*, i32** %h_cost, align 8 %166 = bitcast i32* %165 to i8* call void @free(i8* %166) #9 %167 = load %struct.Node*, %struct.Node** %d_graph_nodes, align 8 %168 = bitcast %struct.Node* %167 to i8* %call142 = call i32 @cudaFree(i8* %168) %169 = load i32*, i32** %d_graph_edges, align 8 %170 = bitcast i32* %169 to i8* %call143 = call i32 @cudaFree(i8* %170) %171 = load i8*, i8** %d_graph_mask, align 8 %call144 = call i32 @cudaFree(i8* %171) %172 = load i8*, i8** %d_updating_graph_mask, align 8 %call145 = call i32 @cudaFree(i8* %172) %173 = load i8*, i8** %d_graph_visited, align 8 %call146 = call i32 @cudaFree(i8* %173) %174 = load i32*, i32** %d_cost, align 8 %175 = bitcast i32* %174 to i8* %call147 = call i32 @cudaFree(i8* %175) br label %return return: ; preds = %for.end139, %if.then2 ret void } ; Function Attrs: noinline optnone uwtable define dso_local void @_Z5UsageiPPc(i32 %argc, i8** %argv) #0 { entry: %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %1 = load i8**, i8*** %argv.addr, align 8 %arrayidx = getelementptr inbounds i8*, i8** %1, i64 0 %2 = load i8*, i8** %arrayidx, align 8 %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str, i64 0, i64 0), i8* %2) ret void } declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #3 ; Function Attrs: noreturn nounwind declare dso_local void @exit(i32) #4 declare dso_local i32 @printf(i8*, ...) #3 declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #3 declare dso_local i32 @fscanf(%struct._IO_FILE*, i8*, ...) #3 ; Function Attrs: nounwind readnone speculatable willreturn declare double @llvm.ceil.f64(double) #5 ; Function Attrs: nounwind declare dso_local noalias i8* @malloc(i64) #6 declare dso_local i32 @fclose(%struct._IO_FILE*) #3 declare dso_local i32 @cudaMalloc(i8**, i64) #3 declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #3 ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #7 comdat align 2 { entry: %this.addr = alloca %struct.dim3*, align 8 %vx.addr = alloca i32, align 4 %vy.addr = alloca i32, align 4 %vz.addr = alloca i32, align 4 store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 store i32 %vx, i32* %vx.addr, align 4 store i32 %vy, i32* %vy.addr, align 4 store i32 %vz, i32* %vz.addr, align 4 %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 %0 = load i32, i32* %vx.addr, align 4 store i32 %0, i32* %x, align 4 %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 %1 = load i32, i32* %vy.addr, align 4 store i32 %1, i32* %y, align 4 %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 %2 = load i32, i32* %vz.addr, align 4 store i32 %2, i32* %z, align 4 ret void } declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #3 declare dso_local i32 @cudaDeviceSynchronize() #3 ; Function Attrs: nounwind declare dso_local void @free(i8*) #6 declare dso_local i32 @cudaFree(i8*) #3 define internal void @__cuda_register_globals(i8** %0) { entry: %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (%struct.Node*, i32*, i8*, i8*, i8*, i32*, i32)* @_Z6KernelP4NodePiPbS2_S2_S1_i to i8*), i8* getelementptr inbounds ([30 x i8], [30 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([30 x i8], [30 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) %2 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i8*, i8*, i8*, i8*, i32)* @_Z7Kernel2PbS_S_S_i to i8*), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @1, i64 0, i64 0), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @1, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) ret void } declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) declare dso_local i8** @__cudaRegisterFatBinary(i8*) define internal void @__cuda_module_ctor(i8* %0) { entry: %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) store i8** %1, i8*** @__cuda_gpubin_handle, align 8 call void @__cuda_register_globals(i8** %1) call void @__cudaRegisterFatBinaryEnd(i8** %1) %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) ret void } declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) declare dso_local void @__cudaUnregisterFatBinary(i8**) define internal void @__cuda_module_dtor(i8* %0) { entry: %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 call void @__cudaUnregisterFatBinary(i8** %1) ret void } declare dso_local i32 @atexit(void (i8*)*) attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind willreturn } attributes #2 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #4 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #5 = { nounwind readnone speculatable willreturn } attributes #6 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #7 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #8 = { noreturn nounwind } attributes #9 = { nounwind } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} !1 = !{i32 1, !"wchar_size", i32 4} !2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}