; ModuleID = 'streamcluster_cuda_cpu-host-x86_64-unknown-linux-gnu.bc' source_filename = "streamcluster_cuda_cpu.cu" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" %"class.std::ios_base::Init" = type { i8 } %struct.Point = type { float, float*, i64, float } %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } %struct.dim3 = type { i32, i32, i32 } %struct.CUstream_st = type opaque %struct.Points = type { i64, i32, %struct.Point* } %struct.timeval = type { i64, i64 } %struct.timezone = type { i32, i32 } %union.pthread_barrier_t = type { i64, [24 x i8] } %struct.pkmedian_arg_t = type { %struct.Points*, i64, i64, i64*, i32, %union.pthread_barrier_t* } %class.PStream = type { i32 (...)** } %class.SimStream = type { %class.PStream, i64 } %class.FileStream = type { %class.PStream, %struct._IO_FILE* } $_ZN4dim3C2Ejjj = comdat any $_ZSt3logf = comdat any $_ZN9SimStreamC2El = comdat any $_ZN10FileStreamC2EPc = comdat any $_ZN7PStreamC2Ev = comdat any $_ZN9SimStream4readEPfii = comdat any $_ZN9SimStream6ferrorEv = comdat any $_ZN9SimStream4feofEv = comdat any $_ZN9SimStreamD2Ev = comdat any $_ZN9SimStreamD0Ev = comdat any $_ZN7PStreamD2Ev = comdat any $_ZN7PStreamD0Ev = comdat any $__clang_call_terminate = comdat any $_ZN10FileStream4readEPfii = comdat any $_ZN10FileStream6ferrorEv = comdat any $_ZN10FileStream4feofEv = comdat any $_ZN10FileStreamD2Ev = comdat any $_ZN10FileStreamD0Ev = comdat any $_ZTV9SimStream = comdat any $_ZTS9SimStream = comdat any $_ZTS7PStream = comdat any $_ZTI7PStream = comdat any $_ZTI9SimStream = comdat any $_ZTV7PStream = comdat any $_ZTV10FileStream = comdat any $_ZTS10FileStream = comdat any $_ZTI10FileStream = comdat any @_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1 @__dso_handle = external hidden global i8 @work_mem_h = dso_local global float* null, align 8 @coord_h = dso_local global float* null, align 8 @work_mem_d = dso_local global float* null, align 8 @coord_d = dso_local global float* null, align 8 @center_table_d = dso_local global i32* null, align 8 @switch_membership_d = dso_local global i8* null, align 8 @p = dso_local global %struct.Point* null, align 8 @stderr = external dso_local global %struct._IO_FILE*, align 8 @.str = private unnamed_addr constant [42 x i8] c"Cuda error in file '%s' in line %i : %s.\0A\00", align 1 @.str.1 = private unnamed_addr constant [24 x i8] c"./streamcluster_cuda.cu\00", align 1 @_ZL4iter = internal global i32 0, align 4 @.str.2 = private unnamed_addr constant [18 x i8] c"kernel error: %s\0A\00", align 1 @isCoordChanged = dso_local global i8 0, align 1 @serial_t = dso_local global double 0.000000e+00, align 8 @cpu_to_gpu_t = dso_local global double 0.000000e+00, align 8 @gpu_to_cpu_t = dso_local global double 0.000000e+00, align 8 @alloc_t = dso_local global double 0.000000e+00, align 8 @kernel_t = dso_local global double 0.000000e+00, align 8 @free_t = dso_local global double 0.000000e+00, align 8 @time_local_search = dso_local global double 0.000000e+00, align 8 @time_speedy = dso_local global double 0.000000e+00, align 8 @time_select_feasible = dso_local global double 0.000000e+00, align 8 @time_gain = dso_local global double 0.000000e+00, align 8 @time_shuffle = dso_local global double 0.000000e+00, align 8 @time_gain_dist = dso_local global double 0.000000e+00, align 8 @time_gain_init = dso_local global double 0.000000e+00, align 8 @.str.3 = private unnamed_addr constant [2 x i8] c"w\00", align 1 @.str.4 = private unnamed_addr constant [4 x i8] c"%d \00", align 1 @_ZL5nproc = internal global i32 0, align 4 @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE9totalcost = internal global float 0.000000e+00, align 4 @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE4open = internal global i8 0, align 1 @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE5costs = internal global float* null, align 8 @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i = internal global i32 0, align 4 @_ZL9is_center = internal global i8* null, align 8 @_ZL12center_table = internal global i32* null, align 8 @_ZL17switch_membership = internal global i8* null, align 8 @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k = internal global i64 0, align 8 @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE8feasible = internal global i32* null, align 8 @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE11numfeasible = internal global i32 0, align 4 @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs = internal global float* null, align 8 @.str.5 = private unnamed_addr constant [18 x i8] c"error opening %s\0A\00", align 1 @.str.6 = private unnamed_addr constant [4 x i8] c"%u\0A\00", align 1 @.str.7 = private unnamed_addr constant [5 x i8] c"%lf\0A\00", align 1 @.str.8 = private unnamed_addr constant [5 x i8] c"%lf \00", align 1 @.str.9 = private unnamed_addr constant [3 x i8] c"\0A\0A\00", align 1 @.str.10 = private unnamed_addr constant [32 x i8] c"not enough memory for a chunk!\0A\00", align 1 @.str.11 = private unnamed_addr constant [16 x i8] c"read %d points\0A\00", align 1 @.str.12 = private unnamed_addr constant [21 x i8] c"error reading data!\0A\00", align 1 @.str.13 = private unnamed_addr constant [21 x i8] c"finish local search\0A\00", align 1 @.str.14 = private unnamed_addr constant [33 x i8] c"oops! no more space for centers\0A\00", align 1 @.str.15 = private unnamed_addr constant [24 x i8] c"PARSEC Benchmark Suite\0A\00", align 1 @.str.16 = private unnamed_addr constant [64 x i8] c"usage: %s k1 k2 d n chunksize clustersize infile outfile nproc\0A\00", align 1 @.str.17 = private unnamed_addr constant [47 x i8] c" k1: Min. number of centers allowed\0A\00", align 1 @.str.18 = private unnamed_addr constant [47 x i8] c" k2: Max. number of centers allowed\0A\00", align 1 @.str.19 = private unnamed_addr constant [45 x i8] c" d: Dimension of each data point\0A\00", align 1 @.str.20 = private unnamed_addr constant [38 x i8] c" n: Number of data points\0A\00", align 1 @.str.21 = private unnamed_addr constant [57 x i8] c" chunksize: Number of data points to handle per step\0A\00", align 1 @.str.22 = private unnamed_addr constant [55 x i8] c" clustersize: Maximum number of intermediate centers\0A\00", align 1 @.str.23 = private unnamed_addr constant [37 x i8] c" infile: Input file (if n<=0)\0A\00", align 1 @.str.24 = private unnamed_addr constant [28 x i8] c" outfile: Output file\0A\00", align 1 @.str.25 = private unnamed_addr constant [41 x i8] c" nproc: Number of threads to use\0A\00", align 1 @.str.26 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 @.str.27 = private unnamed_addr constant [77 x i8] c"if n > 0, points will be randomly generated instead of reading from infile.\0A\00", align 1 @.str.28 = private unnamed_addr constant [13 x i8] c"time = %lfs\0A\00", align 1 @.str.29 = private unnamed_addr constant [19 x i8] c"time pgain = %lfs\0A\00", align 1 @.str.30 = private unnamed_addr constant [24 x i8] c"time pgain_dist = %lfs\0A\00", align 1 @.str.31 = private unnamed_addr constant [24 x i8] c"time pgain_init = %lfs\0A\00", align 1 @.str.32 = private unnamed_addr constant [21 x i8] c"time pselect = %lfs\0A\00", align 1 @.str.33 = private unnamed_addr constant [21 x i8] c"time pspeedy = %lfs\0A\00", align 1 @.str.34 = private unnamed_addr constant [22 x i8] c"time pshuffle = %lfs\0A\00", align 1 @.str.35 = private unnamed_addr constant [25 x i8] c"time localSearch = %lfs\0A\00", align 1 @.str.36 = private unnamed_addr constant [34 x i8] c"====CUDA Timing info (pgain)====\0A\00", align 1 @.str.37 = private unnamed_addr constant [20 x i8] c"time serial = %lfs\0A\00", align 1 @.str.38 = private unnamed_addr constant [36 x i8] c"time CPU to GPU memory copy = %lfs\0A\00", align 1 @.str.39 = private unnamed_addr constant [41 x i8] c"time GPU to CPU memory copy back = %lfs\0A\00", align 1 @.str.40 = private unnamed_addr constant [24 x i8] c"time GPU malloc = %lfs\0A\00", align 1 @.str.41 = private unnamed_addr constant [22 x i8] c"time GPU free = %lfs\0A\00", align 1 @.str.42 = private unnamed_addr constant [20 x i8] c"time kernel = %lfs\0A\00", align 1 @_ZTV9SimStream = linkonce_odr dso_local unnamed_addr constant { [7 x i8*] } { [7 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI9SimStream to i8*), i8* bitcast (i64 (%class.SimStream*, float*, i32, i32)* @_ZN9SimStream4readEPfii to i8*), i8* bitcast (i32 (%class.SimStream*)* @_ZN9SimStream6ferrorEv to i8*), i8* bitcast (i32 (%class.SimStream*)* @_ZN9SimStream4feofEv to i8*), i8* bitcast (void (%class.SimStream*)* @_ZN9SimStreamD2Ev to i8*), i8* bitcast (void (%class.SimStream*)* @_ZN9SimStreamD0Ev to i8*)] }, comdat, align 8 @_ZTVN10__cxxabiv120__si_class_type_infoE = external dso_local global i8* @_ZTS9SimStream = linkonce_odr dso_local constant [11 x i8] c"9SimStream\00", comdat, align 1 @_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8* @_ZTS7PStream = linkonce_odr dso_local constant [9 x i8] c"7PStream\00", comdat, align 1 @_ZTI7PStream = linkonce_odr dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @_ZTS7PStream, i32 0, i32 0) }, comdat, align 8 @_ZTI9SimStream = linkonce_odr dso_local constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTS9SimStream, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI7PStream to i8*) }, comdat, align 8 @_ZTV7PStream = linkonce_odr dso_local unnamed_addr constant { [7 x i8*] } { [7 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI7PStream to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*), i8* bitcast (void (%class.PStream*)* @_ZN7PStreamD2Ev to i8*), i8* bitcast (void (%class.PStream*)* @_ZN7PStreamD0Ev to i8*)] }, comdat, align 8 @_ZTV10FileStream = linkonce_odr dso_local unnamed_addr constant { [7 x i8*] } { [7 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI10FileStream to i8*), i8* bitcast (i64 (%class.FileStream*, float*, i32, i32)* @_ZN10FileStream4readEPfii to i8*), i8* bitcast (i32 (%class.FileStream*)* @_ZN10FileStream6ferrorEv to i8*), i8* bitcast (i32 (%class.FileStream*)* @_ZN10FileStream4feofEv to i8*), i8* bitcast (void (%class.FileStream*)* @_ZN10FileStreamD2Ev to i8*), i8* bitcast (void (%class.FileStream*)* @_ZN10FileStreamD0Ev to i8*)] }, comdat, align 8 @.str.43 = private unnamed_addr constant [3 x i8] c"rb\00", align 1 @.str.44 = private unnamed_addr constant [24 x i8] c"error opening file %s\0A.\00", align 1 @_ZTS10FileStream = linkonce_odr dso_local constant [13 x i8] c"10FileStream\00", comdat, align 1 @_ZTI10FileStream = linkonce_odr dso_local constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([13 x i8], [13 x i8]* @_ZTS10FileStream, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI7PStream to i8*) }, comdat, align 8 @.str.45 = private unnamed_addr constant [21 x i8] c"closing file stream\0A\00", align 1 @0 = private unnamed_addr constant [45 x i8] c"_Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00", align 1 @1 = private constant [15713 x i8] \00\00\A5\00\07\AE\02\01-\00\01\16\00\1B6\ED\01\134\ED\01\183\ED\01/30c\01\03/31c\01\04432, \00\0Ac\01433,Q\00\01'\00\07\C7\00\127\C6\00\2233\15\01\05\1A\12\188\F7\01\09x\01$9,\1F\12(8;\AE\00\1B4u\01\04\18\00%5,\E7\08\09\18\00\156z\00+16\C8\00\02+\02\1D3!\13438,S\00\01'\00\09\16\01\139P\00\0Cx\02\03\F5\00\1C9M\00441,\B8\00\01'\00\08\DC\01\130x\02\1D1\DC\01#1, \009%f9\DB\01\2241\DB\01\1F1\DB\01\04*4:\18\00\135\18\00\B05:\0Aret;\0A\0A}\0A\00\00", section ".nv_fatbin", align 8 @__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([15713 x i8], [15713 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 @__cuda_gpubin_handle = internal global i8** null, align 8 @llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_streamcluster_cuda_cpu.cu, i8* null }, { i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] ; Function Attrs: noinline uwtable define internal void @__cxx_global_var_init() #0 section ".text.startup" { entry: call void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"* @_ZStL8__ioinit) %0 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%"class.std::ios_base::Init"*)* @_ZNSt8ios_base4InitD1Ev to void (i8*)*), i8* getelementptr inbounds (%"class.std::ios_base::Init", %"class.std::ios_base::Init"* @_ZStL8__ioinit, i32 0, i32 0), i8* @__dso_handle) #2 ret void } declare dso_local void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1 declare dso_local void @_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1 ; Function Attrs: nounwind declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #2 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z19kernel_compute_costiilP5PointiiPfS1_PiPb(i32 %num, i32 %dim, i64 %x, %struct.Point* %p, i32 %K, i32 %stride, float* %coord_d, float* %work_mem_d, i32* %center_table_d, i8* %switch_membership_d) #3 { entry: %num.addr = alloca i32, align 4 %dim.addr = alloca i32, align 4 %x.addr = alloca i64, align 8 %p.addr = alloca %struct.Point*, align 8 %K.addr = alloca i32, align 4 %stride.addr = alloca i32, align 4 %coord_d.addr = alloca float*, align 8 %work_mem_d.addr = alloca float*, align 8 %center_table_d.addr = alloca i32*, align 8 %switch_membership_d.addr = alloca i8*, align 8 %grid_dim = alloca %struct.dim3, align 8 %block_dim = alloca %struct.dim3, align 8 %shmem_size = alloca i64, align 8 %stream = alloca i8*, align 8 %grid_dim.coerce = alloca { i64, i32 }, align 8 %block_dim.coerce = alloca { i64, i32 }, align 8 store i32 %num, i32* %num.addr, align 4 store i32 %dim, i32* %dim.addr, align 4 store i64 %x, i64* %x.addr, align 8 store %struct.Point* %p, %struct.Point** %p.addr, align 8 store i32 %K, i32* %K.addr, align 4 store i32 %stride, i32* %stride.addr, align 4 store float* %coord_d, float** %coord_d.addr, align 8 store float* %work_mem_d, float** %work_mem_d.addr, align 8 store i32* %center_table_d, i32** %center_table_d.addr, align 8 store i8* %switch_membership_d, i8** %switch_membership_d.addr, align 8 %kernel_args = alloca i8*, i64 10, align 16 %0 = bitcast i32* %num.addr to i8* %1 = getelementptr i8*, i8** %kernel_args, i32 0 store i8* %0, i8** %1 %2 = bitcast i32* %dim.addr to i8* %3 = getelementptr i8*, i8** %kernel_args, i32 1 store i8* %2, i8** %3 %4 = bitcast i64* %x.addr to i8* %5 = getelementptr i8*, i8** %kernel_args, i32 2 store i8* %4, i8** %5 %6 = bitcast %struct.Point** %p.addr to i8* %7 = getelementptr i8*, i8** %kernel_args, i32 3 store i8* %6, i8** %7 %8 = bitcast i32* %K.addr to i8* %9 = getelementptr i8*, i8** %kernel_args, i32 4 store i8* %8, i8** %9 %10 = bitcast i32* %stride.addr to i8* %11 = getelementptr i8*, i8** %kernel_args, i32 5 store i8* %10, i8** %11 %12 = bitcast float** %coord_d.addr to i8* %13 = getelementptr i8*, i8** %kernel_args, i32 6 store i8* %12, i8** %13 %14 = bitcast float** %work_mem_d.addr to i8* %15 = getelementptr i8*, i8** %kernel_args, i32 7 store i8* %14, i8** %15 %16 = bitcast i32** %center_table_d.addr to i8* %17 = getelementptr i8*, i8** %kernel_args, i32 8 store i8* %16, i8** %17 %18 = bitcast i8** %switch_membership_d.addr to i8* %19 = getelementptr i8*, i8** %kernel_args, i32 9 store i8* %18, i8** %19 %20 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) %21 = load i64, i64* %shmem_size, align 8 %22 = load i8*, i8** %stream, align 8 %23 = bitcast { i64, i32 }* %grid_dim.coerce to i8* %24 = bitcast %struct.dim3* %grid_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %23, i8* align 8 %24, i64 12, i1 false) %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 %26 = load i64, i64* %25, align 8 %27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 %28 = load i32, i32* %27, align 8 %29 = bitcast { i64, i32 }* %block_dim.coerce to i8* %30 = bitcast %struct.dim3* %block_dim to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %29, i8* align 8 %30, i64 12, i1 false) %31 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 %32 = load i64, i64* %31, align 8 %33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 %34 = load i32, i32* %33, align 8 %35 = bitcast i8* %22 to %struct.CUstream_st* %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32, i32, i64, %struct.Point*, i32, i32, float*, float*, i32*, i8*)* @_Z19kernel_compute_costiilP5PointiiPfS1_PiPb to i8*), i64 %26, i32 %28, i64 %32, i32 %34, i8** %kernel_args, i64 %21, %struct.CUstream_st* %35) br label %setup.end setup.end: ; preds = %entry ret void } declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) ; Function Attrs: argmemonly nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #4 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z11allocDevMemii(i32 %num, i32 %dim) #3 { entry: %num.addr = alloca i32, align 4 %dim.addr = alloca i32, align 4 %err = alloca i32, align 4 %err4 = alloca i32, align 4 %err15 = alloca i32, align 4 %err26 = alloca i32, align 4 store i32 %num, i32* %num.addr, align 4 store i32 %dim, i32* %dim.addr, align 4 br label %do.body do.body: ; preds = %entry %0 = load i32, i32* %num.addr, align 4 %conv = sext i32 %0 to i64 %mul = mul i64 %conv, 4 %call = call i32 @cudaMalloc(i8** bitcast (i32** @center_table_d to i8**), i64 %mul) store i32 %call, i32* %err, align 4 %1 = load i32, i32* %err, align 4 %cmp = icmp ne i32 0, %1 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %do.body %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %3 = load i32, i32* %err, align 4 %call1 = call i8* @cudaGetErrorString(i32 %3) %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 91, i8* %call1) call void @exit(i32 1) #15 unreachable if.end: ; preds = %do.body br label %do.end do.end: ; preds = %if.end br label %do.body3 do.body3: ; preds = %do.end %4 = load i32, i32* %num.addr, align 4 %conv5 = sext i32 %4 to i64 %mul6 = mul i64 %conv5, 1 %call7 = call i32 @cudaMalloc(i8** @switch_membership_d, i64 %mul6) store i32 %call7, i32* %err4, align 4 %5 = load i32, i32* %err4, align 4 %cmp8 = icmp ne i32 0, %5 br i1 %cmp8, label %if.then9, label %if.end12 if.then9: ; preds = %do.body3 %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %7 = load i32, i32* %err4, align 4 %call10 = call i8* @cudaGetErrorString(i32 %7) %call11 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 92, i8* %call10) call void @exit(i32 1) #15 unreachable if.end12: ; preds = %do.body3 br label %do.end13 do.end13: ; preds = %if.end12 br label %do.body14 do.body14: ; preds = %do.end13 %8 = load i32, i32* %num.addr, align 4 %conv16 = sext i32 %8 to i64 %mul17 = mul i64 %conv16, 32 %call18 = call i32 @cudaMalloc(i8** bitcast (%struct.Point** @p to i8**), i64 %mul17) store i32 %call18, i32* %err15, align 4 %9 = load i32, i32* %err15, align 4 %cmp19 = icmp ne i32 0, %9 br i1 %cmp19, label %if.then20, label %if.end23 if.then20: ; preds = %do.body14 %10 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %11 = load i32, i32* %err15, align 4 %call21 = call i8* @cudaGetErrorString(i32 %11) %call22 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %10, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 93, i8* %call21) call void @exit(i32 1) #15 unreachable if.end23: ; preds = %do.body14 br label %do.end24 do.end24: ; preds = %if.end23 br label %do.body25 do.body25: ; preds = %do.end24 %12 = load i32, i32* %num.addr, align 4 %13 = load i32, i32* %dim.addr, align 4 %mul27 = mul nsw i32 %12, %13 %conv28 = sext i32 %mul27 to i64 %mul29 = mul i64 %conv28, 4 %call30 = call i32 @cudaMalloc(i8** bitcast (float** @coord_d to i8**), i64 %mul29) store i32 %call30, i32* %err26, align 4 %14 = load i32, i32* %err26, align 4 %cmp31 = icmp ne i32 0, %14 br i1 %cmp31, label %if.then32, label %if.end35 if.then32: ; preds = %do.body25 %15 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %16 = load i32, i32* %err26, align 4 %call33 = call i8* @cudaGetErrorString(i32 %16) %call34 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %15, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 94, i8* %call33) call void @exit(i32 1) #15 unreachable if.end35: ; preds = %do.body25 br label %do.end36 do.end36: ; preds = %if.end35 ret void } declare dso_local i32 @cudaMalloc(i8**, i64) #1 declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1 declare dso_local i8* @cudaGetErrorString(i32) #1 ; Function Attrs: noreturn nounwind declare dso_local void @exit(i32) #5 ; Function Attrs: noinline nounwind optnone uwtable define dso_local void @_Z12allocHostMemii(i32 %num, i32 %dim) #6 { entry: %num.addr = alloca i32, align 4 %dim.addr = alloca i32, align 4 store i32 %num, i32* %num.addr, align 4 store i32 %dim, i32* %dim.addr, align 4 %0 = load i32, i32* %num.addr, align 4 %1 = load i32, i32* %dim.addr, align 4 %mul = mul nsw i32 %0, %1 %conv = sext i32 %mul to i64 %mul1 = mul i64 %conv, 4 %call = call noalias i8* @malloc(i64 %mul1) #2 %2 = bitcast i8* %call to float* store float* %2, float** @coord_h, align 8 ret void } ; Function Attrs: nounwind declare dso_local noalias i8* @malloc(i64) #7 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z10freeDevMemv() #3 { entry: %err = alloca i32, align 4 %err4 = alloca i32, align 4 %err13 = alloca i32, align 4 %err22 = alloca i32, align 4 br label %do.body do.body: ; preds = %entry %0 = load i32*, i32** @center_table_d, align 8 %1 = bitcast i32* %0 to i8* %call = call i32 @cudaFree(i8* %1) store i32 %call, i32* %err, align 4 %2 = load i32, i32* %err, align 4 %cmp = icmp ne i32 0, %2 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %do.body %3 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %4 = load i32, i32* %err, align 4 %call1 = call i8* @cudaGetErrorString(i32 %4) %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 108, i8* %call1) call void @exit(i32 1) #15 unreachable if.end: ; preds = %do.body br label %do.end do.end: ; preds = %if.end br label %do.body3 do.body3: ; preds = %do.end %5 = load i8*, i8** @switch_membership_d, align 8 %call5 = call i32 @cudaFree(i8* %5) store i32 %call5, i32* %err4, align 4 %6 = load i32, i32* %err4, align 4 %cmp6 = icmp ne i32 0, %6 br i1 %cmp6, label %if.then7, label %if.end10 if.then7: ; preds = %do.body3 %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %8 = load i32, i32* %err4, align 4 %call8 = call i8* @cudaGetErrorString(i32 %8) %call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 109, i8* %call8) call void @exit(i32 1) #15 unreachable if.end10: ; preds = %do.body3 br label %do.end11 do.end11: ; preds = %if.end10 br label %do.body12 do.body12: ; preds = %do.end11 %9 = load %struct.Point*, %struct.Point** @p, align 8 %10 = bitcast %struct.Point* %9 to i8* %call14 = call i32 @cudaFree(i8* %10) store i32 %call14, i32* %err13, align 4 %11 = load i32, i32* %err13, align 4 %cmp15 = icmp ne i32 0, %11 br i1 %cmp15, label %if.then16, label %if.end19 if.then16: ; preds = %do.body12 %12 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %13 = load i32, i32* %err13, align 4 %call17 = call i8* @cudaGetErrorString(i32 %13) %call18 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %12, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 110, i8* %call17) call void @exit(i32 1) #15 unreachable if.end19: ; preds = %do.body12 br label %do.end20 do.end20: ; preds = %if.end19 br label %do.body21 do.body21: ; preds = %do.end20 %14 = load float*, float** @coord_d, align 8 %15 = bitcast float* %14 to i8* %call23 = call i32 @cudaFree(i8* %15) store i32 %call23, i32* %err22, align 4 %16 = load i32, i32* %err22, align 4 %cmp24 = icmp ne i32 0, %16 br i1 %cmp24, label %if.then25, label %if.end28 if.then25: ; preds = %do.body21 %17 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %18 = load i32, i32* %err22, align 4 %call26 = call i8* @cudaGetErrorString(i32 %18) %call27 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %17, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 111, i8* %call26) call void @exit(i32 1) #15 unreachable if.end28: ; preds = %do.body21 br label %do.end29 do.end29: ; preds = %if.end28 ret void } declare dso_local i32 @cudaFree(i8*) #1 ; Function Attrs: noinline nounwind optnone uwtable define dso_local void @_Z11freeHostMemv() #6 { entry: %0 = load float*, float** @coord_h, align 8 %1 = bitcast float* %0 to i8* call void @free(i8* %1) #2 ret void } ; Function Attrs: nounwind declare dso_local void @free(i8*) #7 ; Function Attrs: noinline optnone uwtable define dso_local float @_Z5pgainlP6PointsfPliPbPiS2_bPdS4_S4_S4_S4_S4_(i64 %x, %struct.Points* %points, float %z, i64* %numcenters, i32 %kmax, i8* %is_center, i32* %center_table, i8* %switch_membership, i1 zeroext %isCoordChanged, double* %serial_t, double* %cpu_to_gpu_t, double* %gpu_to_cpu_t, double* %alloc_t, double* %kernel_t, double* %free_t) #3 { entry: %x.addr = alloca i64, align 8 %points.addr = alloca %struct.Points*, align 8 %z.addr = alloca float, align 4 %numcenters.addr = alloca i64*, align 8 %kmax.addr = alloca i32, align 4 %is_center.addr = alloca i8*, align 8 %center_table.addr = alloca i32*, align 8 %switch_membership.addr = alloca i8*, align 8 %isCoordChanged.addr = alloca i8, align 1 %serial_t.addr = alloca double*, align 8 %cpu_to_gpu_t.addr = alloca double*, align 8 %gpu_to_cpu_t.addr = alloca double*, align 8 %alloc_t.addr = alloca double*, align 8 %kernel_t.addr = alloca double*, align 8 %free_t.addr = alloca double*, align 8 %error = alloca i32, align 4 %stride = alloca i32, align 4 %K = alloca i32, align 4 %num = alloca i32, align 4 %dim = alloca i32, align 4 %nThread = alloca i32, align 4 %count = alloca i32, align 4 %i = alloca i32, align 4 %i17 = alloca i32, align 4 %j = alloca i32, align 4 %err = alloca i32, align 4 %err57 = alloca i32, align 4 %err70 = alloca i32, align 4 %err81 = alloca i32, align 4 %err93 = alloca i32, align 4 %err104 = alloca i32, align 4 %num_blocks = alloca i32, align 4 %num_blocks_y = alloca i32, align 4 %num_blocks_x = alloca i32, align 4 %grid_size = alloca %struct.dim3, align 4 %agg.tmp = alloca %struct.dim3, align 4 %agg.tmp130 = alloca %struct.dim3, align 4 %agg.tmp.coerce = alloca { i64, i32 }, align 4 %agg.tmp130.coerce = alloca { i64, i32 }, align 4 %err141 = alloca i32, align 4 %err154 = alloca i32, align 4 %number_of_centers_to_close = alloca i32, align 4 %gl_cost_of_opening_x = alloca float, align 4 %gl_lower = alloca float*, align 8 %i167 = alloca i32, align 4 %low = alloca float, align 4 %j175 = alloca i32, align 4 %i213 = alloca i32, align 4 %close_center = alloca i8, align 1 %agg.tmp231 = alloca %struct.Point, align 8 %agg.tmp235 = alloca %struct.Point, align 8 %i254 = alloca i32, align 4 %err285 = alloca i32, align 4 store i64 %x, i64* %x.addr, align 8 store %struct.Points* %points, %struct.Points** %points.addr, align 8 store float %z, float* %z.addr, align 4 store i64* %numcenters, i64** %numcenters.addr, align 8 store i32 %kmax, i32* %kmax.addr, align 4 store i8* %is_center, i8** %is_center.addr, align 8 store i32* %center_table, i32** %center_table.addr, align 8 store i8* %switch_membership, i8** %switch_membership.addr, align 8 %frombool = zext i1 %isCoordChanged to i8 store i8 %frombool, i8* %isCoordChanged.addr, align 1 store double* %serial_t, double** %serial_t.addr, align 8 store double* %cpu_to_gpu_t, double** %cpu_to_gpu_t.addr, align 8 store double* %gpu_to_cpu_t, double** %gpu_to_cpu_t.addr, align 8 store double* %alloc_t, double** %alloc_t.addr, align 8 store double* %kernel_t, double** %kernel_t.addr, align 8 store double* %free_t, double** %free_t.addr, align 8 %0 = load i64*, i64** %numcenters.addr, align 8 %1 = load i64, i64* %0, align 8 %add = add nsw i64 %1, 1 %conv = trunc i64 %add to i32 store i32 %conv, i32* %stride, align 4 %2 = load i64*, i64** %numcenters.addr, align 8 %3 = load i64, i64* %2, align 8 %conv1 = trunc i64 %3 to i32 store i32 %conv1, i32* %K, align 4 %4 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num2 = getelementptr inbounds %struct.Points, %struct.Points* %4, i32 0, i32 0 %5 = load i64, i64* %num2, align 8 %conv3 = trunc i64 %5 to i32 store i32 %conv3, i32* %num, align 4 %6 = load %struct.Points*, %struct.Points** %points.addr, align 8 %dim4 = getelementptr inbounds %struct.Points, %struct.Points* %6, i32 0, i32 1 %7 = load i32, i32* %dim4, align 8 store i32 %7, i32* %dim, align 4 %8 = load i32, i32* %num, align 4 store i32 %8, i32* %nThread, align 4 %9 = load i32, i32* %stride, align 4 %10 = load i32, i32* %nThread, align 4 %add5 = add nsw i32 %10, 1 %mul = mul nsw i32 %9, %add5 %conv6 = sext i32 %mul to i64 %mul7 = mul i64 %conv6, 4 %call = call noalias i8* @malloc(i64 %mul7) #2 %11 = bitcast i8* %call to float* store float* %11, float** @work_mem_h, align 8 %12 = load i32, i32* @_ZL4iter, align 4 %cmp = icmp eq i32 %12, 0 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %13 = load i32, i32* %num, align 4 %14 = load i32, i32* %dim, align 4 call void @_Z12allocHostMemii(i32 %13, i32 %14) br label %if.end if.end: ; preds = %if.then, %entry store i32 0, i32* %count, align 4 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %if.end %15 = load i32, i32* %i, align 4 %16 = load i32, i32* %num, align 4 %cmp8 = icmp slt i32 %15, %16 br i1 %cmp8, label %for.body, label %for.end for.body: ; preds = %for.cond %17 = load i8*, i8** %is_center.addr, align 8 %18 = load i32, i32* %i, align 4 %idxprom = sext i32 %18 to i64 %arrayidx = getelementptr inbounds i8, i8* %17, i64 %idxprom %19 = load i8, i8* %arrayidx, align 1 %tobool = trunc i8 %19 to i1 br i1 %tobool, label %if.then9, label %if.end12 if.then9: ; preds = %for.body %20 = load i32, i32* %count, align 4 %inc = add nsw i32 %20, 1 store i32 %inc, i32* %count, align 4 %21 = load i32*, i32** %center_table.addr, align 8 %22 = load i32, i32* %i, align 4 %idxprom10 = sext i32 %22 to i64 %arrayidx11 = getelementptr inbounds i32, i32* %21, i64 %idxprom10 store i32 %20, i32* %arrayidx11, align 4 br label %if.end12 if.end12: ; preds = %if.then9, %for.body br label %for.inc for.inc: ; preds = %if.end12 %23 = load i32, i32* %i, align 4 %inc13 = add nsw i32 %23, 1 store i32 %inc13, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond %24 = load i8, i8* %isCoordChanged.addr, align 1 %tobool14 = trunc i8 %24 to i1 br i1 %tobool14, label %if.then16, label %lor.lhs.false lor.lhs.false: ; preds = %for.end %25 = load i32, i32* @_ZL4iter, align 4 %cmp15 = icmp eq i32 %25, 0 br i1 %cmp15, label %if.then16, label %if.end38 if.then16: ; preds = %lor.lhs.false, %for.end store i32 0, i32* %i17, align 4 br label %for.cond18 for.cond18: ; preds = %for.inc35, %if.then16 %26 = load i32, i32* %i17, align 4 %27 = load i32, i32* %dim, align 4 %cmp19 = icmp slt i32 %26, %27 br i1 %cmp19, label %for.body20, label %for.end37 for.body20: ; preds = %for.cond18 store i32 0, i32* %j, align 4 br label %for.cond21 for.cond21: ; preds = %for.inc32, %for.body20 %28 = load i32, i32* %j, align 4 %29 = load i32, i32* %num, align 4 %cmp22 = icmp slt i32 %28, %29 br i1 %cmp22, label %for.body23, label %for.end34 for.body23: ; preds = %for.cond21 %30 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p = getelementptr inbounds %struct.Points, %struct.Points* %30, i32 0, i32 2 %31 = load %struct.Point*, %struct.Point** %p, align 8 %32 = load i32, i32* %j, align 4 %idxprom24 = sext i32 %32 to i64 %arrayidx25 = getelementptr inbounds %struct.Point, %struct.Point* %31, i64 %idxprom24 %coord = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx25, i32 0, i32 1 %33 = load float*, float** %coord, align 8 %34 = load i32, i32* %i17, align 4 %idxprom26 = sext i32 %34 to i64 %arrayidx27 = getelementptr inbounds float, float* %33, i64 %idxprom26 %35 = load float, float* %arrayidx27, align 4 %36 = load float*, float** @coord_h, align 8 %37 = load i32, i32* %num, align 4 %38 = load i32, i32* %i17, align 4 %mul28 = mul nsw i32 %37, %38 %39 = load i32, i32* %j, align 4 %add29 = add nsw i32 %mul28, %39 %idxprom30 = sext i32 %add29 to i64 %arrayidx31 = getelementptr inbounds float, float* %36, i64 %idxprom30 store float %35, float* %arrayidx31, align 4 br label %for.inc32 for.inc32: ; preds = %for.body23 %40 = load i32, i32* %j, align 4 %inc33 = add nsw i32 %40, 1 store i32 %inc33, i32* %j, align 4 br label %for.cond21 for.end34: ; preds = %for.cond21 br label %for.inc35 for.inc35: ; preds = %for.end34 %41 = load i32, i32* %i17, align 4 %inc36 = add nsw i32 %41, 1 store i32 %inc36, i32* %i17, align 4 br label %for.cond18 for.end37: ; preds = %for.cond18 br label %if.end38 if.end38: ; preds = %for.end37, %lor.lhs.false br label %do.body do.body: ; preds = %if.end38 %42 = load i32, i32* %stride, align 4 %43 = load i32, i32* %nThread, align 4 %add39 = add nsw i32 %43, 1 %mul40 = mul nsw i32 %42, %add39 %conv41 = sext i32 %mul40 to i64 %mul42 = mul i64 %conv41, 4 %call43 = call i32 @cudaMalloc(i8** bitcast (float** @work_mem_d to i8**), i64 %mul42) store i32 %call43, i32* %err, align 4 %44 = load i32, i32* %err, align 4 %cmp44 = icmp ne i32 0, %44 br i1 %cmp44, label %if.then45, label %if.end48 if.then45: ; preds = %do.body %45 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %46 = load i32, i32* %err, align 4 %call46 = call i8* @cudaGetErrorString(i32 %46) %call47 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %45, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 184, i8* %call46) call void @exit(i32 1) #15 unreachable if.end48: ; preds = %do.body br label %do.end do.end: ; preds = %if.end48 %47 = load i32, i32* @_ZL4iter, align 4 %cmp49 = icmp eq i32 %47, 0 br i1 %cmp49, label %if.then50, label %if.end51 if.then50: ; preds = %do.end %48 = load i32, i32* %num, align 4 %49 = load i32, i32* %dim, align 4 call void @_Z11allocDevMemii(i32 %48, i32 %49) br label %if.end51 if.end51: ; preds = %if.then50, %do.end %50 = load i8, i8* %isCoordChanged.addr, align 1 %tobool52 = trunc i8 %50 to i1 br i1 %tobool52, label %if.then55, label %lor.lhs.false53 lor.lhs.false53: ; preds = %if.end51 %51 = load i32, i32* @_ZL4iter, align 4 %cmp54 = icmp eq i32 %51, 0 br i1 %cmp54, label %if.then55, label %if.end68 if.then55: ; preds = %lor.lhs.false53, %if.end51 br label %do.body56 do.body56: ; preds = %if.then55 %52 = load float*, float** @coord_d, align 8 %53 = bitcast float* %52 to i8* %54 = load float*, float** @coord_h, align 8 %55 = bitcast float* %54 to i8* %56 = load i32, i32* %num, align 4 %57 = load i32, i32* %dim, align 4 %mul58 = mul nsw i32 %56, %57 %conv59 = sext i32 %mul58 to i64 %mul60 = mul i64 %conv59, 4 %call61 = call i32 @cudaMemcpy(i8* %53, i8* %55, i64 %mul60, i32 1) store i32 %call61, i32* %err57, align 4 %58 = load i32, i32* %err57, align 4 %cmp62 = icmp ne i32 0, %58 br i1 %cmp62, label %if.then63, label %if.end66 if.then63: ; preds = %do.body56 %59 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %60 = load i32, i32* %err57, align 4 %call64 = call i8* @cudaGetErrorString(i32 %60) %call65 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %59, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 205, i8* %call64) call void @exit(i32 1) #15 unreachable if.end66: ; preds = %do.body56 br label %do.end67 do.end67: ; preds = %if.end66 br label %if.end68 if.end68: ; preds = %do.end67, %lor.lhs.false53 br label %do.body69 do.body69: ; preds = %if.end68 %61 = load i32*, i32** @center_table_d, align 8 %62 = bitcast i32* %61 to i8* %63 = load i32*, i32** %center_table.addr, align 8 %64 = bitcast i32* %63 to i8* %65 = load i32, i32* %num, align 4 %conv71 = sext i32 %65 to i64 %mul72 = mul i64 %conv71, 4 %call73 = call i32 @cudaMemcpy(i8* %62, i8* %64, i64 %mul72, i32 1) store i32 %call73, i32* %err70, align 4 %66 = load i32, i32* %err70, align 4 %cmp74 = icmp ne i32 0, %66 br i1 %cmp74, label %if.then75, label %if.end78 if.then75: ; preds = %do.body69 %67 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %68 = load i32, i32* %err70, align 4 %call76 = call i8* @cudaGetErrorString(i32 %68) %call77 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %67, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 208, i8* %call76) call void @exit(i32 1) #15 unreachable if.end78: ; preds = %do.body69 br label %do.end79 do.end79: ; preds = %if.end78 br label %do.body80 do.body80: ; preds = %do.end79 %69 = load %struct.Point*, %struct.Point** @p, align 8 %70 = bitcast %struct.Point* %69 to i8* %71 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p82 = getelementptr inbounds %struct.Points, %struct.Points* %71, i32 0, i32 2 %72 = load %struct.Point*, %struct.Point** %p82, align 8 %73 = bitcast %struct.Point* %72 to i8* %74 = load i32, i32* %num, align 4 %conv83 = sext i32 %74 to i64 %mul84 = mul i64 %conv83, 32 %call85 = call i32 @cudaMemcpy(i8* %70, i8* %73, i64 %mul84, i32 1) store i32 %call85, i32* %err81, align 4 %75 = load i32, i32* %err81, align 4 %cmp86 = icmp ne i32 0, %75 br i1 %cmp86, label %if.then87, label %if.end90 if.then87: ; preds = %do.body80 %76 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %77 = load i32, i32* %err81, align 4 %call88 = call i8* @cudaGetErrorString(i32 %77) %call89 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %76, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 210, i8* %call88) call void @exit(i32 1) #15 unreachable if.end90: ; preds = %do.body80 br label %do.end91 do.end91: ; preds = %if.end90 br label %do.body92 do.body92: ; preds = %do.end91 %78 = load i8*, i8** @switch_membership_d, align 8 %79 = load i32, i32* %num, align 4 %conv94 = sext i32 %79 to i64 %mul95 = mul i64 %conv94, 1 %call96 = call i32 @cudaMemset(i8* %78, i32 0, i64 %mul95) store i32 %call96, i32* %err93, align 4 %80 = load i32, i32* %err93, align 4 %cmp97 = icmp ne i32 0, %80 br i1 %cmp97, label %if.then98, label %if.end101 if.then98: ; preds = %do.body92 %81 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %82 = load i32, i32* %err93, align 4 %call99 = call i8* @cudaGetErrorString(i32 %82) %call100 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %81, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 213, i8* %call99) call void @exit(i32 1) #15 unreachable if.end101: ; preds = %do.body92 br label %do.end102 do.end102: ; preds = %if.end101 br label %do.body103 do.body103: ; preds = %do.end102 %83 = load float*, float** @work_mem_d, align 8 %84 = bitcast float* %83 to i8* %85 = load i32, i32* %stride, align 4 %86 = load i32, i32* %nThread, align 4 %add105 = add nsw i32 %86, 1 %mul106 = mul nsw i32 %85, %add105 %conv107 = sext i32 %mul106 to i64 %mul108 = mul i64 %conv107, 4 %call109 = call i32 @cudaMemset(i8* %84, i32 0, i64 %mul108) store i32 %call109, i32* %err104, align 4 %87 = load i32, i32* %err104, align 4 %cmp110 = icmp ne i32 0, %87 br i1 %cmp110, label %if.then111, label %if.end114 if.then111: ; preds = %do.body103 %88 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %89 = load i32, i32* %err104, align 4 %call112 = call i8* @cudaGetErrorString(i32 %89) %call113 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %88, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 215, i8* %call112) call void @exit(i32 1) #15 unreachable if.end114: ; preds = %do.body103 br label %do.end115 do.end115: ; preds = %if.end114 %90 = load i32, i32* %num, align 4 %add116 = add nsw i32 %90, 512 %sub = sub nsw i32 %add116, 1 %conv117 = sitofp i32 %sub to float %div = fdiv float %conv117, 5.120000e+02 %conv118 = fptosi float %div to i32 store i32 %conv118, i32* %num_blocks, align 4 %91 = load i32, i32* %num_blocks, align 4 %add119 = add nsw i32 %91, 65536 %sub120 = sub nsw i32 %add119, 1 %conv121 = sitofp i32 %sub120 to float %div122 = fdiv float %conv121, 6.553600e+04 %conv123 = fptosi float %div122 to i32 store i32 %conv123, i32* %num_blocks_y, align 4 %92 = load i32, i32* %num_blocks, align 4 %93 = load i32, i32* %num_blocks_y, align 4 %add124 = add nsw i32 %92, %93 %sub125 = sub nsw i32 %add124, 1 %conv126 = sitofp i32 %sub125 to float %94 = load i32, i32* %num_blocks_y, align 4 %conv127 = sitofp i32 %94 to float %div128 = fdiv float %conv126, %conv127 %conv129 = fptosi float %div128 to i32 store i32 %conv129, i32* %num_blocks_x, align 4 %95 = load i32, i32* %num_blocks_x, align 4 %96 = load i32, i32* %num_blocks_y, align 4 call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid_size, i32 %95, i32 %96, i32 1) %97 = bitcast %struct.dim3* %agg.tmp to i8* %98 = bitcast %struct.dim3* %grid_size to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %97, i8* align 4 %98, i64 12, i1 false) call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp130, i32 512, i32 1, i32 1) %99 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* %100 = bitcast %struct.dim3* %agg.tmp to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %99, i8* align 4 %100, i64 12, i1 false) %101 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 %102 = load i64, i64* %101, align 4 %103 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 %104 = load i32, i32* %103, align 4 %105 = bitcast { i64, i32 }* %agg.tmp130.coerce to i8* %106 = bitcast %struct.dim3* %agg.tmp130 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %105, i8* align 4 %106, i64 12, i1 false) %107 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp130.coerce, i32 0, i32 0 %108 = load i64, i64* %107, align 4 %109 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp130.coerce, i32 0, i32 1 %110 = load i32, i32* %109, align 4 %call131 = call i32 @__cudaPushCallConfiguration(i64 %102, i32 %104, i64 %108, i32 %110, i64 0, i8* null) %tobool132 = icmp ne i32 %call131, 0 br i1 %tobool132, label %kcall.end, label %kcall.configok kcall.configok: ; preds = %do.end115 %111 = load i32, i32* %num, align 4 %112 = load i32, i32* %dim, align 4 %113 = load i64, i64* %x.addr, align 8 %114 = load %struct.Point*, %struct.Point** @p, align 8 %115 = load i32, i32* %K, align 4 %116 = load i32, i32* %stride, align 4 %117 = load float*, float** @coord_d, align 8 %118 = load float*, float** @work_mem_d, align 8 %119 = load i32*, i32** @center_table_d, align 8 %120 = load i8*, i8** @switch_membership_d, align 8 call void @_Z19kernel_compute_costiilP5PointiiPfS1_PiPb(i32 %111, i32 %112, i64 %113, %struct.Point* %114, i32 %115, i32 %116, float* %117, float* %118, i32* %119, i8* %120) br label %kcall.end kcall.end: ; preds = %kcall.configok, %do.end115 %call133 = call i32 @cudaThreadSynchronize() %call134 = call i32 @cudaGetLastError() store i32 %call134, i32* %error, align 4 %121 = load i32, i32* %error, align 4 %cmp135 = icmp ne i32 %121, 0 br i1 %cmp135, label %if.then136, label %if.end139 if.then136: ; preds = %kcall.end %122 = load i32, i32* %error, align 4 %call137 = call i8* @cudaGetErrorString(i32 %122) %call138 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.2, i64 0, i64 0), i8* %call137) call void @exit(i32 1) #15 unreachable if.end139: ; preds = %kcall.end br label %do.body140 do.body140: ; preds = %if.end139 %123 = load float*, float** @work_mem_h, align 8 %124 = bitcast float* %123 to i8* %125 = load float*, float** @work_mem_d, align 8 %126 = bitcast float* %125 to i8* %127 = load i32, i32* %stride, align 4 %128 = load i32, i32* %nThread, align 4 %add142 = add nsw i32 %128, 1 %mul143 = mul nsw i32 %127, %add142 %conv144 = sext i32 %mul143 to i64 %mul145 = mul i64 %conv144, 4 %call146 = call i32 @cudaMemcpy(i8* %124, i8* %126, i64 %mul145, i32 2) store i32 %call146, i32* %err141, align 4 %129 = load i32, i32* %err141, align 4 %cmp147 = icmp ne i32 0, %129 br i1 %cmp147, label %if.then148, label %if.end151 if.then148: ; preds = %do.body140 %130 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %131 = load i32, i32* %err141, align 4 %call149 = call i8* @cudaGetErrorString(i32 %131) %call150 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %130, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 273, i8* %call149) call void @exit(i32 1) #15 unreachable if.end151: ; preds = %do.body140 br label %do.end152 do.end152: ; preds = %if.end151 br label %do.body153 do.body153: ; preds = %do.end152 %132 = load i8*, i8** %switch_membership.addr, align 8 %133 = load i8*, i8** @switch_membership_d, align 8 %134 = load i32, i32* %num, align 4 %conv155 = sext i32 %134 to i64 %mul156 = mul i64 %conv155, 1 %call157 = call i32 @cudaMemcpy(i8* %132, i8* %133, i64 %mul156, i32 2) store i32 %call157, i32* %err154, align 4 %135 = load i32, i32* %err154, align 4 %cmp158 = icmp ne i32 0, %135 br i1 %cmp158, label %if.then159, label %if.end162 if.then159: ; preds = %do.body153 %136 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %137 = load i32, i32* %err154, align 4 %call160 = call i8* @cudaGetErrorString(i32 %137) %call161 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %136, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 275, i8* %call160) call void @exit(i32 1) #15 unreachable if.end162: ; preds = %do.body153 br label %do.end163 do.end163: ; preds = %if.end162 store i32 0, i32* %number_of_centers_to_close, align 4 %138 = load float, float* %z.addr, align 4 store float %138, float* %gl_cost_of_opening_x, align 4 %139 = load float*, float** @work_mem_h, align 8 %140 = load i32, i32* %stride, align 4 %141 = load i32, i32* %nThread, align 4 %mul164 = mul nsw i32 %140, %141 %idxprom165 = sext i32 %mul164 to i64 %arrayidx166 = getelementptr inbounds float, float* %139, i64 %idxprom165 store float* %arrayidx166, float** %gl_lower, align 8 store i32 0, i32* %i167, align 4 br label %for.cond168 for.cond168: ; preds = %for.inc208, %do.end163 %142 = load i32, i32* %i167, align 4 %143 = load i32, i32* %num, align 4 %cmp169 = icmp slt i32 %142, %143 br i1 %cmp169, label %for.body170, label %for.end210 for.body170: ; preds = %for.cond168 %144 = load i8*, i8** %is_center.addr, align 8 %145 = load i32, i32* %i167, align 4 %idxprom171 = sext i32 %145 to i64 %arrayidx172 = getelementptr inbounds i8, i8* %144, i64 %idxprom171 %146 = load i8, i8* %arrayidx172, align 1 %tobool173 = trunc i8 %146 to i1 br i1 %tobool173, label %if.then174, label %if.end202 if.then174: ; preds = %for.body170 %147 = load float, float* %z.addr, align 4 store float %147, float* %low, align 4 store i32 0, i32* %j175, align 4 br label %for.cond176 for.cond176: ; preds = %for.inc186, %if.then174 %148 = load i32, i32* %j175, align 4 %149 = load i32, i32* %num, align 4 %cmp177 = icmp slt i32 %148, %149 br i1 %cmp177, label %for.body178, label %for.end188 for.body178: ; preds = %for.cond176 %150 = load float*, float** @work_mem_h, align 8 %151 = load i32, i32* %j175, align 4 %152 = load i32, i32* %stride, align 4 %mul179 = mul nsw i32 %151, %152 %153 = load i32*, i32** %center_table.addr, align 8 %154 = load i32, i32* %i167, align 4 %idxprom180 = sext i32 %154 to i64 %arrayidx181 = getelementptr inbounds i32, i32* %153, i64 %idxprom180 %155 = load i32, i32* %arrayidx181, align 4 %add182 = add nsw i32 %mul179, %155 %idxprom183 = sext i32 %add182 to i64 %arrayidx184 = getelementptr inbounds float, float* %150, i64 %idxprom183 %156 = load float, float* %arrayidx184, align 4 %157 = load float, float* %low, align 4 %add185 = fadd contract float %157, %156 store float %add185, float* %low, align 4 br label %for.inc186 for.inc186: ; preds = %for.body178 %158 = load i32, i32* %j175, align 4 %inc187 = add nsw i32 %158, 1 store i32 %inc187, i32* %j175, align 4 br label %for.cond176 for.end188: ; preds = %for.cond176 %159 = load float, float* %low, align 4 %160 = load float*, float** %gl_lower, align 8 %161 = load i32*, i32** %center_table.addr, align 8 %162 = load i32, i32* %i167, align 4 %idxprom189 = sext i32 %162 to i64 %arrayidx190 = getelementptr inbounds i32, i32* %161, i64 %idxprom189 %163 = load i32, i32* %arrayidx190, align 4 %idxprom191 = sext i32 %163 to i64 %arrayidx192 = getelementptr inbounds float, float* %160, i64 %idxprom191 store float %159, float* %arrayidx192, align 4 %164 = load float, float* %low, align 4 %cmp193 = fcmp ogt float %164, 0.000000e+00 br i1 %cmp193, label %if.then194, label %if.end201 if.then194: ; preds = %for.end188 %165 = load i32, i32* %number_of_centers_to_close, align 4 %inc195 = add nsw i32 %165, 1 store i32 %inc195, i32* %number_of_centers_to_close, align 4 %166 = load float, float* %low, align 4 %167 = load float*, float** @work_mem_h, align 8 %168 = load i32, i32* %i167, align 4 %169 = load i32, i32* %stride, align 4 %mul196 = mul nsw i32 %168, %169 %170 = load i32, i32* %K, align 4 %add197 = add nsw i32 %mul196, %170 %idxprom198 = sext i32 %add197 to i64 %arrayidx199 = getelementptr inbounds float, float* %167, i64 %idxprom198 %171 = load float, float* %arrayidx199, align 4 %sub200 = fsub contract float %171, %166 store float %sub200, float* %arrayidx199, align 4 br label %if.end201 if.end201: ; preds = %if.then194, %for.end188 br label %if.end202 if.end202: ; preds = %if.end201, %for.body170 %172 = load float*, float** @work_mem_h, align 8 %173 = load i32, i32* %i167, align 4 %174 = load i32, i32* %stride, align 4 %mul203 = mul nsw i32 %173, %174 %175 = load i32, i32* %K, align 4 %add204 = add nsw i32 %mul203, %175 %idxprom205 = sext i32 %add204 to i64 %arrayidx206 = getelementptr inbounds float, float* %172, i64 %idxprom205 %176 = load float, float* %arrayidx206, align 4 %177 = load float, float* %gl_cost_of_opening_x, align 4 %add207 = fadd contract float %177, %176 store float %add207, float* %gl_cost_of_opening_x, align 4 br label %for.inc208 for.inc208: ; preds = %if.end202 %178 = load i32, i32* %i167, align 4 %inc209 = add nsw i32 %178, 1 store i32 %inc209, i32* %i167, align 4 br label %for.cond168 for.end210: ; preds = %for.cond168 %179 = load float, float* %gl_cost_of_opening_x, align 4 %cmp211 = fcmp olt float %179, 0.000000e+00 br i1 %cmp211, label %if.then212, label %if.else if.then212: ; preds = %for.end210 store i32 0, i32* %i213, align 4 br label %for.cond214 for.cond214: ; preds = %for.inc251, %if.then212 %180 = load i32, i32* %i213, align 4 %181 = load i32, i32* %num, align 4 %cmp215 = icmp slt i32 %180, %181 br i1 %cmp215, label %for.body216, label %for.end253 for.body216: ; preds = %for.cond214 %182 = load float*, float** %gl_lower, align 8 %183 = load i32*, i32** %center_table.addr, align 8 %184 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p217 = getelementptr inbounds %struct.Points, %struct.Points* %184, i32 0, i32 2 %185 = load %struct.Point*, %struct.Point** %p217, align 8 %186 = load i32, i32* %i213, align 4 %idxprom218 = sext i32 %186 to i64 %arrayidx219 = getelementptr inbounds %struct.Point, %struct.Point* %185, i64 %idxprom218 %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx219, i32 0, i32 2 %187 = load i64, i64* %assign, align 8 %arrayidx220 = getelementptr inbounds i32, i32* %183, i64 %187 %188 = load i32, i32* %arrayidx220, align 4 %idxprom221 = sext i32 %188 to i64 %arrayidx222 = getelementptr inbounds float, float* %182, i64 %idxprom221 %189 = load float, float* %arrayidx222, align 4 %cmp223 = fcmp ogt float %189, 0.000000e+00 %frombool224 = zext i1 %cmp223 to i8 store i8 %frombool224, i8* %close_center, align 1 %190 = load i8*, i8** %switch_membership.addr, align 8 %191 = load i32, i32* %i213, align 4 %idxprom225 = sext i32 %191 to i64 %arrayidx226 = getelementptr inbounds i8, i8* %190, i64 %idxprom225 %192 = load i8, i8* %arrayidx226, align 1 %tobool227 = trunc i8 %192 to i1 br i1 %tobool227, label %if.then230, label %lor.lhs.false228 lor.lhs.false228: ; preds = %for.body216 %193 = load i8, i8* %close_center, align 1 %tobool229 = trunc i8 %193 to i1 br i1 %tobool229, label %if.then230, label %if.end250 if.then230: ; preds = %lor.lhs.false228, %for.body216 %194 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p232 = getelementptr inbounds %struct.Points, %struct.Points* %194, i32 0, i32 2 %195 = load %struct.Point*, %struct.Point** %p232, align 8 %196 = load i32, i32* %i213, align 4 %idxprom233 = sext i32 %196 to i64 %arrayidx234 = getelementptr inbounds %struct.Point, %struct.Point* %195, i64 %idxprom233 %197 = bitcast %struct.Point* %agg.tmp231 to i8* %198 = bitcast %struct.Point* %arrayidx234 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %197, i8* align 8 %198, i64 32, i1 false) %199 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p236 = getelementptr inbounds %struct.Points, %struct.Points* %199, i32 0, i32 2 %200 = load %struct.Point*, %struct.Point** %p236, align 8 %201 = load i64, i64* %x.addr, align 8 %arrayidx237 = getelementptr inbounds %struct.Point, %struct.Point* %200, i64 %201 %202 = bitcast %struct.Point* %agg.tmp235 to i8* %203 = bitcast %struct.Point* %arrayidx237 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %202, i8* align 8 %203, i64 32, i1 false) %204 = load i32, i32* %dim, align 4 %call238 = call float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %agg.tmp231, %struct.Point* byval(%struct.Point) align 8 %agg.tmp235, i32 %204) %205 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p239 = getelementptr inbounds %struct.Points, %struct.Points* %205, i32 0, i32 2 %206 = load %struct.Point*, %struct.Point** %p239, align 8 %207 = load i32, i32* %i213, align 4 %idxprom240 = sext i32 %207 to i64 %arrayidx241 = getelementptr inbounds %struct.Point, %struct.Point* %206, i64 %idxprom240 %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx241, i32 0, i32 0 %208 = load float, float* %weight, align 8 %mul242 = fmul contract float %call238, %208 %209 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p243 = getelementptr inbounds %struct.Points, %struct.Points* %209, i32 0, i32 2 %210 = load %struct.Point*, %struct.Point** %p243, align 8 %211 = load i32, i32* %i213, align 4 %idxprom244 = sext i32 %211 to i64 %arrayidx245 = getelementptr inbounds %struct.Point, %struct.Point* %210, i64 %idxprom244 %cost = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx245, i32 0, i32 3 store float %mul242, float* %cost, align 8 %212 = load i64, i64* %x.addr, align 8 %213 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p246 = getelementptr inbounds %struct.Points, %struct.Points* %213, i32 0, i32 2 %214 = load %struct.Point*, %struct.Point** %p246, align 8 %215 = load i32, i32* %i213, align 4 %idxprom247 = sext i32 %215 to i64 %arrayidx248 = getelementptr inbounds %struct.Point, %struct.Point* %214, i64 %idxprom247 %assign249 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx248, i32 0, i32 2 store i64 %212, i64* %assign249, align 8 br label %if.end250 if.end250: ; preds = %if.then230, %lor.lhs.false228 br label %for.inc251 for.inc251: ; preds = %if.end250 %216 = load i32, i32* %i213, align 4 %inc252 = add nsw i32 %216, 1 store i32 %inc252, i32* %i213, align 4 br label %for.cond214 for.end253: ; preds = %for.cond214 store i32 0, i32* %i254, align 4 br label %for.cond255 for.cond255: ; preds = %for.inc270, %for.end253 %217 = load i32, i32* %i254, align 4 %218 = load i32, i32* %num, align 4 %cmp256 = icmp slt i32 %217, %218 br i1 %cmp256, label %for.body257, label %for.end272 for.body257: ; preds = %for.cond255 %219 = load i8*, i8** %is_center.addr, align 8 %220 = load i32, i32* %i254, align 4 %idxprom258 = sext i32 %220 to i64 %arrayidx259 = getelementptr inbounds i8, i8* %219, i64 %idxprom258 %221 = load i8, i8* %arrayidx259, align 1 %tobool260 = trunc i8 %221 to i1 br i1 %tobool260, label %land.lhs.true, label %if.end269 land.lhs.true: ; preds = %for.body257 %222 = load float*, float** %gl_lower, align 8 %223 = load i32*, i32** %center_table.addr, align 8 %224 = load i32, i32* %i254, align 4 %idxprom261 = sext i32 %224 to i64 %arrayidx262 = getelementptr inbounds i32, i32* %223, i64 %idxprom261 %225 = load i32, i32* %arrayidx262, align 4 %idxprom263 = sext i32 %225 to i64 %arrayidx264 = getelementptr inbounds float, float* %222, i64 %idxprom263 %226 = load float, float* %arrayidx264, align 4 %cmp265 = fcmp ogt float %226, 0.000000e+00 br i1 %cmp265, label %if.then266, label %if.end269 if.then266: ; preds = %land.lhs.true %227 = load i8*, i8** %is_center.addr, align 8 %228 = load i32, i32* %i254, align 4 %idxprom267 = sext i32 %228 to i64 %arrayidx268 = getelementptr inbounds i8, i8* %227, i64 %idxprom267 store i8 0, i8* %arrayidx268, align 1 br label %if.end269 if.end269: ; preds = %if.then266, %land.lhs.true, %for.body257 br label %for.inc270 for.inc270: ; preds = %if.end269 %229 = load i32, i32* %i254, align 4 %inc271 = add nsw i32 %229, 1 store i32 %inc271, i32* %i254, align 4 br label %for.cond255 for.end272: ; preds = %for.cond255 %230 = load i64, i64* %x.addr, align 8 %cmp273 = icmp sge i64 %230, 0 br i1 %cmp273, label %land.lhs.true274, label %if.end279 land.lhs.true274: ; preds = %for.end272 %231 = load i64, i64* %x.addr, align 8 %232 = load i32, i32* %num, align 4 %conv275 = sext i32 %232 to i64 %cmp276 = icmp slt i64 %231, %conv275 br i1 %cmp276, label %if.then277, label %if.end279 if.then277: ; preds = %land.lhs.true274 %233 = load i8*, i8** %is_center.addr, align 8 %234 = load i64, i64* %x.addr, align 8 %arrayidx278 = getelementptr inbounds i8, i8* %233, i64 %234 store i8 1, i8* %arrayidx278, align 1 br label %if.end279 if.end279: ; preds = %if.then277, %land.lhs.true274, %for.end272 %235 = load i64*, i64** %numcenters.addr, align 8 %236 = load i64, i64* %235, align 8 %add280 = add nsw i64 %236, 1 %237 = load i32, i32* %number_of_centers_to_close, align 4 %conv281 = sext i32 %237 to i64 %sub282 = sub nsw i64 %add280, %conv281 %238 = load i64*, i64** %numcenters.addr, align 8 store i64 %sub282, i64* %238, align 8 br label %if.end283 if.else: ; preds = %for.end210 store float 0.000000e+00, float* %gl_cost_of_opening_x, align 4 br label %if.end283 if.end283: ; preds = %if.else, %if.end279 %239 = load float*, float** @work_mem_h, align 8 %240 = bitcast float* %239 to i8* call void @free(i8* %240) #2 br label %do.body284 do.body284: ; preds = %if.end283 %241 = load float*, float** @work_mem_d, align 8 %242 = bitcast float* %241 to i8* %call286 = call i32 @cudaFree(i8* %242) store i32 %call286, i32* %err285, align 4 %243 = load i32, i32* %err285, align 4 %cmp287 = icmp ne i32 0, %243 br i1 %cmp287, label %if.then288, label %if.end291 if.then288: ; preds = %do.body284 %244 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %245 = load i32, i32* %err285, align 4 %call289 = call i8* @cudaGetErrorString(i32 %245) %call290 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %244, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 353, i8* %call289) call void @exit(i32 1) #15 unreachable if.end291: ; preds = %do.body284 br label %do.end292 do.end292: ; preds = %if.end291 %246 = load i32, i32* @_ZL4iter, align 4 %inc293 = add nsw i32 %246, 1 store i32 %inc293, i32* @_ZL4iter, align 4 %247 = load float, float* %gl_cost_of_opening_x, align 4 %fneg = fneg float %247 ret float %fneg } declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #1 declare dso_local i32 @cudaMemset(i8*, i32, i64) #1 ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %struct.dim3*, align 8 %vx.addr = alloca i32, align 4 %vy.addr = alloca i32, align 4 %vz.addr = alloca i32, align 4 store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 store i32 %vx, i32* %vx.addr, align 4 store i32 %vy, i32* %vy.addr, align 4 store i32 %vz, i32* %vz.addr, align 4 %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 %0 = load i32, i32* %vx.addr, align 4 store i32 %0, i32* %x, align 4 %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 %1 = load i32, i32* %vy.addr, align 4 store i32 %1, i32* %y, align 4 %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 %2 = load i32, i32* %vz.addr, align 4 store i32 %2, i32* %z, align 4 ret void } declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #1 declare dso_local i32 @cudaThreadSynchronize() #1 declare dso_local i32 @cudaGetLastError() #1 declare dso_local i32 @printf(i8*, ...) #1 ; Function Attrs: noinline nounwind optnone uwtable define dso_local float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %p1, %struct.Point* byval(%struct.Point) align 8 %p2, i32 %dim) #6 { entry: %dim.addr = alloca i32, align 4 %i = alloca i32, align 4 %result = alloca float, align 4 store i32 %dim, i32* %dim.addr, align 4 store float 0.000000e+00, float* %result, align 4 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry %0 = load i32, i32* %i, align 4 %1 = load i32, i32* %dim.addr, align 4 %cmp = icmp slt i32 %0, %1 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %coord = getelementptr inbounds %struct.Point, %struct.Point* %p1, i32 0, i32 1 %2 = load float*, float** %coord, align 8 %3 = load i32, i32* %i, align 4 %idxprom = sext i32 %3 to i64 %arrayidx = getelementptr inbounds float, float* %2, i64 %idxprom %4 = load float, float* %arrayidx, align 4 %coord1 = getelementptr inbounds %struct.Point, %struct.Point* %p2, i32 0, i32 1 %5 = load float*, float** %coord1, align 8 %6 = load i32, i32* %i, align 4 %idxprom2 = sext i32 %6 to i64 %arrayidx3 = getelementptr inbounds float, float* %5, i64 %idxprom2 %7 = load float, float* %arrayidx3, align 4 %sub = fsub contract float %4, %7 %coord4 = getelementptr inbounds %struct.Point, %struct.Point* %p1, i32 0, i32 1 %8 = load float*, float** %coord4, align 8 %9 = load i32, i32* %i, align 4 %idxprom5 = sext i32 %9 to i64 %arrayidx6 = getelementptr inbounds float, float* %8, i64 %idxprom5 %10 = load float, float* %arrayidx6, align 4 %coord7 = getelementptr inbounds %struct.Point, %struct.Point* %p2, i32 0, i32 1 %11 = load float*, float** %coord7, align 8 %12 = load i32, i32* %i, align 4 %idxprom8 = sext i32 %12 to i64 %arrayidx9 = getelementptr inbounds float, float* %11, i64 %idxprom8 %13 = load float, float* %arrayidx9, align 4 %sub10 = fsub contract float %10, %13 %mul = fmul contract float %sub, %sub10 %14 = load float, float* %result, align 4 %add = fadd contract float %14, %mul store float %add, float* %result, align 4 br label %for.inc for.inc: ; preds = %for.body %15 = load i32, i32* %i, align 4 %inc = add nsw i32 %15, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond %16 = load float, float* %result, align 4 ret float %16 } ; Function Attrs: noinline optnone uwtable define dso_local void @_Z9inttofileiPc(i32 %data, i8* %filename) #3 { entry: %data.addr = alloca i32, align 4 %filename.addr = alloca i8*, align 8 %fp = alloca %struct._IO_FILE*, align 8 store i32 %data, i32* %data.addr, align 4 store i8* %filename, i8** %filename.addr, align 8 %0 = load i8*, i8** %filename.addr, align 8 %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i64 0, i64 0)) store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 %1 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %2 = load i32, i32* %data.addr, align 4 %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.4, i64 0, i64 0), i32 %2) %3 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call2 = call i32 @fclose(%struct._IO_FILE* %3) ret void } declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #1 declare dso_local i32 @fclose(%struct._IO_FILE*) #1 ; Function Attrs: noinline nounwind optnone uwtable define dso_local double @_Z7gettimev() #6 { entry: %t = alloca %struct.timeval, align 8 %call = call i32 @gettimeofday(%struct.timeval* %t, %struct.timezone* null) #2 %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %t, i32 0, i32 0 %0 = load i64, i64* %tv_sec, align 8 %conv = sitofp i64 %0 to double %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %t, i32 0, i32 1 %1 = load i64, i64* %tv_usec, align 8 %conv1 = sitofp i64 %1 to double %mul = fmul contract double %conv1, 0x3EB0C6F7A0B5ED8D %add = fadd contract double %conv, %mul ret double %add } ; Function Attrs: nounwind declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #7 ; Function Attrs: noinline nounwind optnone uwtable define dso_local i32 @_Z11isIdenticalPfS_i(float* %i, float* %j, i32 %D) #6 { entry: %retval = alloca i32, align 4 %i.addr = alloca float*, align 8 %j.addr = alloca float*, align 8 %D.addr = alloca i32, align 4 %a = alloca i32, align 4 %equal = alloca i32, align 4 store float* %i, float** %i.addr, align 8 store float* %j, float** %j.addr, align 8 store i32 %D, i32* %D.addr, align 4 store i32 0, i32* %a, align 4 store i32 1, i32* %equal, align 4 br label %while.cond while.cond: ; preds = %if.end, %entry %0 = load i32, i32* %equal, align 4 %tobool = icmp ne i32 %0, 0 br i1 %tobool, label %land.rhs, label %land.end land.rhs: ; preds = %while.cond %1 = load i32, i32* %a, align 4 %2 = load i32, i32* %D.addr, align 4 %cmp = icmp slt i32 %1, %2 br label %land.end land.end: ; preds = %land.rhs, %while.cond %3 = phi i1 [ false, %while.cond ], [ %cmp, %land.rhs ] br i1 %3, label %while.body, label %while.end while.body: ; preds = %land.end %4 = load float*, float** %i.addr, align 8 %5 = load i32, i32* %a, align 4 %idxprom = sext i32 %5 to i64 %arrayidx = getelementptr inbounds float, float* %4, i64 %idxprom %6 = load float, float* %arrayidx, align 4 %7 = load float*, float** %j.addr, align 8 %8 = load i32, i32* %a, align 4 %idxprom1 = sext i32 %8 to i64 %arrayidx2 = getelementptr inbounds float, float* %7, i64 %idxprom1 %9 = load float, float* %arrayidx2, align 4 %cmp3 = fcmp une float %6, %9 br i1 %cmp3, label %if.then, label %if.else if.then: ; preds = %while.body store i32 0, i32* %equal, align 4 br label %if.end if.else: ; preds = %while.body %10 = load i32, i32* %a, align 4 %inc = add nsw i32 %10, 1 store i32 %inc, i32* %a, align 4 br label %if.end if.end: ; preds = %if.else, %if.then br label %while.cond while.end: ; preds = %land.end %11 = load i32, i32* %equal, align 4 %tobool4 = icmp ne i32 %11, 0 br i1 %tobool4, label %if.then5, label %if.else6 if.then5: ; preds = %while.end store i32 1, i32* %retval, align 4 br label %return if.else6: ; preds = %while.end store i32 0, i32* %retval, align 4 br label %return return: ; preds = %if.else6, %if.then5 %12 = load i32, i32* %retval, align 4 ret i32 %12 } ; Function Attrs: noinline nounwind optnone uwtable define dso_local void @_Z7shuffleP6Points(%struct.Points* %points) #6 { entry: %points.addr = alloca %struct.Points*, align 8 %t1 = alloca double, align 8 %i = alloca i64, align 8 %j = alloca i64, align 8 %temp = alloca %struct.Point, align 8 %t2 = alloca double, align 8 store %struct.Points* %points, %struct.Points** %points.addr, align 8 %call = call double @_Z7gettimev() store double %call, double* %t1, align 8 store i64 0, i64* %i, align 8 br label %for.cond for.cond: ; preds = %for.inc, %entry %0 = load i64, i64* %i, align 8 %1 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num = getelementptr inbounds %struct.Points, %struct.Points* %1, i32 0, i32 0 %2 = load i64, i64* %num, align 8 %sub = sub nsw i64 %2, 1 %cmp = icmp slt i64 %0, %sub br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %call1 = call i64 @lrand48() #2 %3 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num2 = getelementptr inbounds %struct.Points, %struct.Points* %3, i32 0, i32 0 %4 = load i64, i64* %num2, align 8 %5 = load i64, i64* %i, align 8 %sub3 = sub nsw i64 %4, %5 %rem = srem i64 %call1, %sub3 %6 = load i64, i64* %i, align 8 %add = add nsw i64 %rem, %6 store i64 %add, i64* %j, align 8 %7 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p = getelementptr inbounds %struct.Points, %struct.Points* %7, i32 0, i32 2 %8 = load %struct.Point*, %struct.Point** %p, align 8 %9 = load i64, i64* %i, align 8 %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %8, i64 %9 %10 = bitcast %struct.Point* %temp to i8* %11 = bitcast %struct.Point* %arrayidx to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %10, i8* align 8 %11, i64 32, i1 false) %12 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p4 = getelementptr inbounds %struct.Points, %struct.Points* %12, i32 0, i32 2 %13 = load %struct.Point*, %struct.Point** %p4, align 8 %14 = load i64, i64* %j, align 8 %arrayidx5 = getelementptr inbounds %struct.Point, %struct.Point* %13, i64 %14 %15 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p6 = getelementptr inbounds %struct.Points, %struct.Points* %15, i32 0, i32 2 %16 = load %struct.Point*, %struct.Point** %p6, align 8 %17 = load i64, i64* %i, align 8 %arrayidx7 = getelementptr inbounds %struct.Point, %struct.Point* %16, i64 %17 %18 = bitcast %struct.Point* %arrayidx7 to i8* %19 = bitcast %struct.Point* %arrayidx5 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %18, i8* align 8 %19, i64 32, i1 false) %20 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p8 = getelementptr inbounds %struct.Points, %struct.Points* %20, i32 0, i32 2 %21 = load %struct.Point*, %struct.Point** %p8, align 8 %22 = load i64, i64* %j, align 8 %arrayidx9 = getelementptr inbounds %struct.Point, %struct.Point* %21, i64 %22 %23 = bitcast %struct.Point* %arrayidx9 to i8* %24 = bitcast %struct.Point* %temp to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %23, i8* align 8 %24, i64 32, i1 false) br label %for.inc for.inc: ; preds = %for.body %25 = load i64, i64* %i, align 8 %inc = add nsw i64 %25, 1 store i64 %inc, i64* %i, align 8 br label %for.cond for.end: ; preds = %for.cond %call10 = call double @_Z7gettimev() store double %call10, double* %t2, align 8 %26 = load double, double* %t2, align 8 %27 = load double, double* %t1, align 8 %sub11 = fsub contract double %26, %27 %28 = load double, double* @time_shuffle, align 8 %add12 = fadd contract double %28, %sub11 store double %add12, double* @time_shuffle, align 8 ret void } ; Function Attrs: nounwind declare dso_local i64 @lrand48() #7 ; Function Attrs: noinline nounwind optnone uwtable define dso_local void @_Z10intshufflePii(i32* %intarray, i32 %length) #6 { entry: %intarray.addr = alloca i32*, align 8 %length.addr = alloca i32, align 4 %t1 = alloca double, align 8 %i = alloca i64, align 8 %j = alloca i64, align 8 %temp = alloca i32, align 4 %t2 = alloca double, align 8 store i32* %intarray, i32** %intarray.addr, align 8 store i32 %length, i32* %length.addr, align 4 %call = call double @_Z7gettimev() store double %call, double* %t1, align 8 store i64 0, i64* %i, align 8 br label %for.cond for.cond: ; preds = %for.inc, %entry %0 = load i64, i64* %i, align 8 %1 = load i32, i32* %length.addr, align 4 %conv = sext i32 %1 to i64 %cmp = icmp slt i64 %0, %conv br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %call1 = call i64 @lrand48() #2 %2 = load i32, i32* %length.addr, align 4 %conv2 = sext i32 %2 to i64 %3 = load i64, i64* %i, align 8 %sub = sub nsw i64 %conv2, %3 %rem = srem i64 %call1, %sub %4 = load i64, i64* %i, align 8 %add = add nsw i64 %rem, %4 store i64 %add, i64* %j, align 8 %5 = load i32*, i32** %intarray.addr, align 8 %6 = load i64, i64* %i, align 8 %arrayidx = getelementptr inbounds i32, i32* %5, i64 %6 %7 = load i32, i32* %arrayidx, align 4 store i32 %7, i32* %temp, align 4 %8 = load i32*, i32** %intarray.addr, align 8 %9 = load i64, i64* %j, align 8 %arrayidx3 = getelementptr inbounds i32, i32* %8, i64 %9 %10 = load i32, i32* %arrayidx3, align 4 %11 = load i32*, i32** %intarray.addr, align 8 %12 = load i64, i64* %i, align 8 %arrayidx4 = getelementptr inbounds i32, i32* %11, i64 %12 store i32 %10, i32* %arrayidx4, align 4 %13 = load i32, i32* %temp, align 4 %14 = load i32*, i32** %intarray.addr, align 8 %15 = load i64, i64* %j, align 8 %arrayidx5 = getelementptr inbounds i32, i32* %14, i64 %15 store i32 %13, i32* %arrayidx5, align 4 br label %for.inc for.inc: ; preds = %for.body %16 = load i64, i64* %i, align 8 %inc = add nsw i64 %16, 1 store i64 %inc, i64* %i, align 8 br label %for.cond for.end: ; preds = %for.cond %call6 = call double @_Z7gettimev() store double %call6, double* %t2, align 8 %17 = load double, double* %t2, align 8 %18 = load double, double* %t1, align 8 %sub7 = fsub contract double %17, %18 %19 = load double, double* @time_shuffle, align 8 %add8 = fadd contract double %19, %sub7 store double %add8, double* @time_shuffle, align 8 ret void } ; Function Attrs: noinline nounwind optnone uwtable define dso_local float @_Z7pspeedyP6PointsfPliP17pthread_barrier_t(%struct.Points* %points, float %z, i64* %kcenter, i32 %pid, %union.pthread_barrier_t* %barrier) #6 { entry: %points.addr = alloca %struct.Points*, align 8 %z.addr = alloca float, align 4 %kcenter.addr = alloca i64*, align 8 %pid.addr = alloca i32, align 4 %barrier.addr = alloca %union.pthread_barrier_t*, align 8 %t1 = alloca double, align 8 %bsize = alloca i64, align 8 %k1 = alloca i64, align 8 %k2 = alloca i64, align 8 %k = alloca i32, align 4 %distance = alloca float, align 4 %agg.tmp = alloca %struct.Point, align 8 %agg.tmp6 = alloca %struct.Point, align 8 %k33 = alloca i32, align 4 %distance39 = alloca float, align 4 %agg.tmp40 = alloca %struct.Point, align 8 %agg.tmp44 = alloca %struct.Point, align 8 %to_open = alloca i8, align 1 %k95 = alloca i32, align 4 %distance101 = alloca float, align 4 %agg.tmp102 = alloca %struct.Point, align 8 %agg.tmp106 = alloca %struct.Point, align 8 %mytotal = alloca float, align 4 %k146 = alloca i32, align 4 %i = alloca i32, align 4 %t2 = alloca double, align 8 store %struct.Points* %points, %struct.Points** %points.addr, align 8 store float %z, float* %z.addr, align 4 store i64* %kcenter, i64** %kcenter.addr, align 8 store i32 %pid, i32* %pid.addr, align 4 store %union.pthread_barrier_t* %barrier, %union.pthread_barrier_t** %barrier.addr, align 8 %call = call double @_Z7gettimev() store double %call, double* %t1, align 8 %0 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num = getelementptr inbounds %struct.Points, %struct.Points* %0, i32 0, i32 0 %1 = load i64, i64* %num, align 8 %2 = load i32, i32* @_ZL5nproc, align 4 %conv = sext i32 %2 to i64 %div = sdiv i64 %1, %conv store i64 %div, i64* %bsize, align 8 %3 = load i64, i64* %bsize, align 8 %4 = load i32, i32* %pid.addr, align 4 %conv1 = sext i32 %4 to i64 %mul = mul nsw i64 %3, %conv1 store i64 %mul, i64* %k1, align 8 %5 = load i64, i64* %k1, align 8 %6 = load i64, i64* %bsize, align 8 %add = add nsw i64 %5, %6 store i64 %add, i64* %k2, align 8 %7 = load i32, i32* %pid.addr, align 4 %8 = load i32, i32* @_ZL5nproc, align 4 %sub = sub nsw i32 %8, 1 %cmp = icmp eq i32 %7, %sub br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %9 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num2 = getelementptr inbounds %struct.Points, %struct.Points* %9, i32 0, i32 0 %10 = load i64, i64* %num2, align 8 store i64 %10, i64* %k2, align 8 br label %if.end if.end: ; preds = %if.then, %entry %11 = load i64, i64* %k1, align 8 %conv3 = trunc i64 %11 to i32 store i32 %conv3, i32* %k, align 4 br label %for.cond for.cond: ; preds = %for.inc, %if.end %12 = load i32, i32* %k, align 4 %conv4 = sext i32 %12 to i64 %13 = load i64, i64* %k2, align 8 %cmp5 = icmp slt i64 %conv4, %13 br i1 %cmp5, label %for.body, label %for.end for.body: ; preds = %for.cond %14 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p = getelementptr inbounds %struct.Points, %struct.Points* %14, i32 0, i32 2 %15 = load %struct.Point*, %struct.Point** %p, align 8 %16 = load i32, i32* %k, align 4 %idxprom = sext i32 %16 to i64 %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %15, i64 %idxprom %17 = bitcast %struct.Point* %agg.tmp to i8* %18 = bitcast %struct.Point* %arrayidx to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %17, i8* align 8 %18, i64 32, i1 false) %19 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p7 = getelementptr inbounds %struct.Points, %struct.Points* %19, i32 0, i32 2 %20 = load %struct.Point*, %struct.Point** %p7, align 8 %arrayidx8 = getelementptr inbounds %struct.Point, %struct.Point* %20, i64 0 %21 = bitcast %struct.Point* %agg.tmp6 to i8* %22 = bitcast %struct.Point* %arrayidx8 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 32, i1 false) %23 = load %struct.Points*, %struct.Points** %points.addr, align 8 %dim = getelementptr inbounds %struct.Points, %struct.Points* %23, i32 0, i32 1 %24 = load i32, i32* %dim, align 8 %call9 = call float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %agg.tmp, %struct.Point* byval(%struct.Point) align 8 %agg.tmp6, i32 %24) store float %call9, float* %distance, align 4 %25 = load float, float* %distance, align 4 %26 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p10 = getelementptr inbounds %struct.Points, %struct.Points* %26, i32 0, i32 2 %27 = load %struct.Point*, %struct.Point** %p10, align 8 %28 = load i32, i32* %k, align 4 %idxprom11 = sext i32 %28 to i64 %arrayidx12 = getelementptr inbounds %struct.Point, %struct.Point* %27, i64 %idxprom11 %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx12, i32 0, i32 0 %29 = load float, float* %weight, align 8 %mul13 = fmul contract float %25, %29 %30 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p14 = getelementptr inbounds %struct.Points, %struct.Points* %30, i32 0, i32 2 %31 = load %struct.Point*, %struct.Point** %p14, align 8 %32 = load i32, i32* %k, align 4 %idxprom15 = sext i32 %32 to i64 %arrayidx16 = getelementptr inbounds %struct.Point, %struct.Point* %31, i64 %idxprom15 %cost = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx16, i32 0, i32 3 store float %mul13, float* %cost, align 8 %33 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p17 = getelementptr inbounds %struct.Points, %struct.Points* %33, i32 0, i32 2 %34 = load %struct.Point*, %struct.Point** %p17, align 8 %35 = load i32, i32* %k, align 4 %idxprom18 = sext i32 %35 to i64 %arrayidx19 = getelementptr inbounds %struct.Point, %struct.Point* %34, i64 %idxprom18 %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx19, i32 0, i32 2 store i64 0, i64* %assign, align 8 br label %for.inc for.inc: ; preds = %for.body %36 = load i32, i32* %k, align 4 %inc = add nsw i32 %36, 1 store i32 %inc, i32* %k, align 4 br label %for.cond for.end: ; preds = %for.cond %37 = load i32, i32* %pid.addr, align 4 %cmp20 = icmp eq i32 %37, 0 br i1 %cmp20, label %if.then21, label %if.end25 if.then21: ; preds = %for.end %38 = load i64*, i64** %kcenter.addr, align 8 store i64 1, i64* %38, align 8 %39 = load i32, i32* @_ZL5nproc, align 4 %conv22 = sext i32 %39 to i64 %mul23 = mul i64 4, %conv22 %call24 = call noalias i8* @malloc(i64 %mul23) #2 %40 = bitcast i8* %call24 to float* store float* %40, float** @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE5costs, align 8 br label %if.end25 if.end25: ; preds = %if.then21, %for.end %41 = load i32, i32* %pid.addr, align 4 %cmp26 = icmp ne i32 %41, 0 br i1 %cmp26, label %if.then27, label %if.else if.then27: ; preds = %if.end25 br label %while.body while.body: ; preds = %if.then27, %for.end78 %42 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 %conv28 = sext i32 %42 to i64 %43 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num29 = getelementptr inbounds %struct.Points, %struct.Points* %43, i32 0, i32 0 %44 = load i64, i64* %num29, align 8 %cmp30 = icmp sge i64 %conv28, %44 br i1 %cmp30, label %if.then31, label %if.end32 if.then31: ; preds = %while.body br label %while.end if.end32: ; preds = %while.body %45 = load i64, i64* %k1, align 8 %conv34 = trunc i64 %45 to i32 store i32 %conv34, i32* %k33, align 4 br label %for.cond35 for.cond35: ; preds = %for.inc76, %if.end32 %46 = load i32, i32* %k33, align 4 %conv36 = sext i32 %46 to i64 %47 = load i64, i64* %k2, align 8 %cmp37 = icmp slt i64 %conv36, %47 br i1 %cmp37, label %for.body38, label %for.end78 for.body38: ; preds = %for.cond35 %48 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p41 = getelementptr inbounds %struct.Points, %struct.Points* %48, i32 0, i32 2 %49 = load %struct.Point*, %struct.Point** %p41, align 8 %50 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 %idxprom42 = sext i32 %50 to i64 %arrayidx43 = getelementptr inbounds %struct.Point, %struct.Point* %49, i64 %idxprom42 %51 = bitcast %struct.Point* %agg.tmp40 to i8* %52 = bitcast %struct.Point* %arrayidx43 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %51, i8* align 8 %52, i64 32, i1 false) %53 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p45 = getelementptr inbounds %struct.Points, %struct.Points* %53, i32 0, i32 2 %54 = load %struct.Point*, %struct.Point** %p45, align 8 %55 = load i32, i32* %k33, align 4 %idxprom46 = sext i32 %55 to i64 %arrayidx47 = getelementptr inbounds %struct.Point, %struct.Point* %54, i64 %idxprom46 %56 = bitcast %struct.Point* %agg.tmp44 to i8* %57 = bitcast %struct.Point* %arrayidx47 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %56, i8* align 8 %57, i64 32, i1 false) %58 = load %struct.Points*, %struct.Points** %points.addr, align 8 %dim48 = getelementptr inbounds %struct.Points, %struct.Points* %58, i32 0, i32 1 %59 = load i32, i32* %dim48, align 8 %call49 = call float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %agg.tmp40, %struct.Point* byval(%struct.Point) align 8 %agg.tmp44, i32 %59) store float %call49, float* %distance39, align 4 %60 = load float, float* %distance39, align 4 %61 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p50 = getelementptr inbounds %struct.Points, %struct.Points* %61, i32 0, i32 2 %62 = load %struct.Point*, %struct.Point** %p50, align 8 %63 = load i32, i32* %k33, align 4 %idxprom51 = sext i32 %63 to i64 %arrayidx52 = getelementptr inbounds %struct.Point, %struct.Point* %62, i64 %idxprom51 %weight53 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx52, i32 0, i32 0 %64 = load float, float* %weight53, align 8 %mul54 = fmul contract float %60, %64 %65 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p55 = getelementptr inbounds %struct.Points, %struct.Points* %65, i32 0, i32 2 %66 = load %struct.Point*, %struct.Point** %p55, align 8 %67 = load i32, i32* %k33, align 4 %idxprom56 = sext i32 %67 to i64 %arrayidx57 = getelementptr inbounds %struct.Point, %struct.Point* %66, i64 %idxprom56 %cost58 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx57, i32 0, i32 3 %68 = load float, float* %cost58, align 8 %cmp59 = fcmp olt float %mul54, %68 br i1 %cmp59, label %if.then60, label %if.end75 if.then60: ; preds = %for.body38 %69 = load float, float* %distance39, align 4 %70 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p61 = getelementptr inbounds %struct.Points, %struct.Points* %70, i32 0, i32 2 %71 = load %struct.Point*, %struct.Point** %p61, align 8 %72 = load i32, i32* %k33, align 4 %idxprom62 = sext i32 %72 to i64 %arrayidx63 = getelementptr inbounds %struct.Point, %struct.Point* %71, i64 %idxprom62 %weight64 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx63, i32 0, i32 0 %73 = load float, float* %weight64, align 8 %mul65 = fmul contract float %69, %73 %74 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p66 = getelementptr inbounds %struct.Points, %struct.Points* %74, i32 0, i32 2 %75 = load %struct.Point*, %struct.Point** %p66, align 8 %76 = load i32, i32* %k33, align 4 %idxprom67 = sext i32 %76 to i64 %arrayidx68 = getelementptr inbounds %struct.Point, %struct.Point* %75, i64 %idxprom67 %cost69 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx68, i32 0, i32 3 store float %mul65, float* %cost69, align 8 %77 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 %conv70 = sext i32 %77 to i64 %78 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p71 = getelementptr inbounds %struct.Points, %struct.Points* %78, i32 0, i32 2 %79 = load %struct.Point*, %struct.Point** %p71, align 8 %80 = load i32, i32* %k33, align 4 %idxprom72 = sext i32 %80 to i64 %arrayidx73 = getelementptr inbounds %struct.Point, %struct.Point* %79, i64 %idxprom72 %assign74 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx73, i32 0, i32 2 store i64 %conv70, i64* %assign74, align 8 br label %if.end75 if.end75: ; preds = %if.then60, %for.body38 br label %for.inc76 for.inc76: ; preds = %if.end75 %81 = load i32, i32* %k33, align 4 %inc77 = add nsw i32 %81, 1 store i32 %inc77, i32* %k33, align 4 br label %for.cond35 for.end78: ; preds = %for.cond35 br label %while.body while.end: ; preds = %if.then31 br label %if.end145 if.else: ; preds = %if.end25 store i32 1, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 br label %for.cond79 for.cond79: ; preds = %for.inc142, %if.else %82 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 %conv80 = sext i32 %82 to i64 %83 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num81 = getelementptr inbounds %struct.Points, %struct.Points* %83, i32 0, i32 0 %84 = load i64, i64* %num81, align 8 %cmp82 = icmp slt i64 %conv80, %84 br i1 %cmp82, label %for.body83, label %for.end144 for.body83: ; preds = %for.cond79 %call84 = call i64 @lrand48() #2 %conv85 = sitofp i64 %call84 to float %div86 = fdiv float %conv85, 0x41E0000000000000 %85 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p87 = getelementptr inbounds %struct.Points, %struct.Points* %85, i32 0, i32 2 %86 = load %struct.Point*, %struct.Point** %p87, align 8 %87 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 %idxprom88 = sext i32 %87 to i64 %arrayidx89 = getelementptr inbounds %struct.Point, %struct.Point* %86, i64 %idxprom88 %cost90 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx89, i32 0, i32 3 %88 = load float, float* %cost90, align 8 %89 = load float, float* %z.addr, align 4 %div91 = fdiv float %88, %89 %cmp92 = fcmp olt float %div86, %div91 %frombool = zext i1 %cmp92 to i8 store i8 %frombool, i8* %to_open, align 1 %90 = load i8, i8* %to_open, align 1 %tobool = trunc i8 %90 to i1 br i1 %tobool, label %if.then93, label %if.end141 if.then93: ; preds = %for.body83 %91 = load i64*, i64** %kcenter.addr, align 8 %92 = load i64, i64* %91, align 8 %inc94 = add nsw i64 %92, 1 store i64 %inc94, i64* %91, align 8 store i8 1, i8* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE4open, align 1 %93 = load i64, i64* %k1, align 8 %conv96 = trunc i64 %93 to i32 store i32 %conv96, i32* %k95, align 4 br label %for.cond97 for.cond97: ; preds = %for.inc138, %if.then93 %94 = load i32, i32* %k95, align 4 %conv98 = sext i32 %94 to i64 %95 = load i64, i64* %k2, align 8 %cmp99 = icmp slt i64 %conv98, %95 br i1 %cmp99, label %for.body100, label %for.end140 for.body100: ; preds = %for.cond97 %96 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p103 = getelementptr inbounds %struct.Points, %struct.Points* %96, i32 0, i32 2 %97 = load %struct.Point*, %struct.Point** %p103, align 8 %98 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 %idxprom104 = sext i32 %98 to i64 %arrayidx105 = getelementptr inbounds %struct.Point, %struct.Point* %97, i64 %idxprom104 %99 = bitcast %struct.Point* %agg.tmp102 to i8* %100 = bitcast %struct.Point* %arrayidx105 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %99, i8* align 8 %100, i64 32, i1 false) %101 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p107 = getelementptr inbounds %struct.Points, %struct.Points* %101, i32 0, i32 2 %102 = load %struct.Point*, %struct.Point** %p107, align 8 %103 = load i32, i32* %k95, align 4 %idxprom108 = sext i32 %103 to i64 %arrayidx109 = getelementptr inbounds %struct.Point, %struct.Point* %102, i64 %idxprom108 %104 = bitcast %struct.Point* %agg.tmp106 to i8* %105 = bitcast %struct.Point* %arrayidx109 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %104, i8* align 8 %105, i64 32, i1 false) %106 = load %struct.Points*, %struct.Points** %points.addr, align 8 %dim110 = getelementptr inbounds %struct.Points, %struct.Points* %106, i32 0, i32 1 %107 = load i32, i32* %dim110, align 8 %call111 = call float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %agg.tmp102, %struct.Point* byval(%struct.Point) align 8 %agg.tmp106, i32 %107) store float %call111, float* %distance101, align 4 %108 = load float, float* %distance101, align 4 %109 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p112 = getelementptr inbounds %struct.Points, %struct.Points* %109, i32 0, i32 2 %110 = load %struct.Point*, %struct.Point** %p112, align 8 %111 = load i32, i32* %k95, align 4 %idxprom113 = sext i32 %111 to i64 %arrayidx114 = getelementptr inbounds %struct.Point, %struct.Point* %110, i64 %idxprom113 %weight115 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx114, i32 0, i32 0 %112 = load float, float* %weight115, align 8 %mul116 = fmul contract float %108, %112 %113 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p117 = getelementptr inbounds %struct.Points, %struct.Points* %113, i32 0, i32 2 %114 = load %struct.Point*, %struct.Point** %p117, align 8 %115 = load i32, i32* %k95, align 4 %idxprom118 = sext i32 %115 to i64 %arrayidx119 = getelementptr inbounds %struct.Point, %struct.Point* %114, i64 %idxprom118 %cost120 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx119, i32 0, i32 3 %116 = load float, float* %cost120, align 8 %cmp121 = fcmp olt float %mul116, %116 br i1 %cmp121, label %if.then122, label %if.end137 if.then122: ; preds = %for.body100 %117 = load float, float* %distance101, align 4 %118 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p123 = getelementptr inbounds %struct.Points, %struct.Points* %118, i32 0, i32 2 %119 = load %struct.Point*, %struct.Point** %p123, align 8 %120 = load i32, i32* %k95, align 4 %idxprom124 = sext i32 %120 to i64 %arrayidx125 = getelementptr inbounds %struct.Point, %struct.Point* %119, i64 %idxprom124 %weight126 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx125, i32 0, i32 0 %121 = load float, float* %weight126, align 8 %mul127 = fmul contract float %117, %121 %122 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p128 = getelementptr inbounds %struct.Points, %struct.Points* %122, i32 0, i32 2 %123 = load %struct.Point*, %struct.Point** %p128, align 8 %124 = load i32, i32* %k95, align 4 %idxprom129 = sext i32 %124 to i64 %arrayidx130 = getelementptr inbounds %struct.Point, %struct.Point* %123, i64 %idxprom129 %cost131 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx130, i32 0, i32 3 store float %mul127, float* %cost131, align 8 %125 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 %conv132 = sext i32 %125 to i64 %126 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p133 = getelementptr inbounds %struct.Points, %struct.Points* %126, i32 0, i32 2 %127 = load %struct.Point*, %struct.Point** %p133, align 8 %128 = load i32, i32* %k95, align 4 %idxprom134 = sext i32 %128 to i64 %arrayidx135 = getelementptr inbounds %struct.Point, %struct.Point* %127, i64 %idxprom134 %assign136 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx135, i32 0, i32 2 store i64 %conv132, i64* %assign136, align 8 br label %if.end137 if.end137: ; preds = %if.then122, %for.body100 br label %for.inc138 for.inc138: ; preds = %if.end137 %129 = load i32, i32* %k95, align 4 %inc139 = add nsw i32 %129, 1 store i32 %inc139, i32* %k95, align 4 br label %for.cond97 for.end140: ; preds = %for.cond97 store i8 0, i8* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE4open, align 1 br label %if.end141 if.end141: ; preds = %for.end140, %for.body83 br label %for.inc142 for.inc142: ; preds = %if.end141 %130 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 %inc143 = add nsw i32 %130, 1 store i32 %inc143, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 br label %for.cond79 for.end144: ; preds = %for.cond79 store i8 1, i8* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE4open, align 1 br label %if.end145 if.end145: ; preds = %for.end144, %while.end store i8 0, i8* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE4open, align 1 store float 0.000000e+00, float* %mytotal, align 4 %131 = load i64, i64* %k1, align 8 %conv147 = trunc i64 %131 to i32 store i32 %conv147, i32* %k146, align 4 br label %for.cond148 for.cond148: ; preds = %for.inc157, %if.end145 %132 = load i32, i32* %k146, align 4 %conv149 = sext i32 %132 to i64 %133 = load i64, i64* %k2, align 8 %cmp150 = icmp slt i64 %conv149, %133 br i1 %cmp150, label %for.body151, label %for.end159 for.body151: ; preds = %for.cond148 %134 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p152 = getelementptr inbounds %struct.Points, %struct.Points* %134, i32 0, i32 2 %135 = load %struct.Point*, %struct.Point** %p152, align 8 %136 = load i32, i32* %k146, align 4 %idxprom153 = sext i32 %136 to i64 %arrayidx154 = getelementptr inbounds %struct.Point, %struct.Point* %135, i64 %idxprom153 %cost155 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx154, i32 0, i32 3 %137 = load float, float* %cost155, align 8 %138 = load float, float* %mytotal, align 4 %add156 = fadd contract float %138, %137 store float %add156, float* %mytotal, align 4 br label %for.inc157 for.inc157: ; preds = %for.body151 %139 = load i32, i32* %k146, align 4 %inc158 = add nsw i32 %139, 1 store i32 %inc158, i32* %k146, align 4 br label %for.cond148 for.end159: ; preds = %for.cond148 %140 = load float, float* %mytotal, align 4 %141 = load float*, float** @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE5costs, align 8 %142 = load i32, i32* %pid.addr, align 4 %idxprom160 = sext i32 %142 to i64 %arrayidx161 = getelementptr inbounds float, float* %141, i64 %idxprom160 store float %140, float* %arrayidx161, align 4 %143 = load i32, i32* %pid.addr, align 4 %cmp162 = icmp eq i32 %143, 0 br i1 %cmp162, label %if.then163, label %if.end175 if.then163: ; preds = %for.end159 %144 = load float, float* %z.addr, align 4 %145 = load i64*, i64** %kcenter.addr, align 8 %146 = load i64, i64* %145, align 8 %conv164 = sitofp i64 %146 to float %mul165 = fmul contract float %144, %conv164 store float %mul165, float* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE9totalcost, align 4 store i32 0, i32* %i, align 4 br label %for.cond166 for.cond166: ; preds = %for.inc172, %if.then163 %147 = load i32, i32* %i, align 4 %148 = load i32, i32* @_ZL5nproc, align 4 %cmp167 = icmp slt i32 %147, %148 br i1 %cmp167, label %for.body168, label %for.end174 for.body168: ; preds = %for.cond166 %149 = load float*, float** @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE5costs, align 8 %150 = load i32, i32* %i, align 4 %idxprom169 = sext i32 %150 to i64 %arrayidx170 = getelementptr inbounds float, float* %149, i64 %idxprom169 %151 = load float, float* %arrayidx170, align 4 %152 = load float, float* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE9totalcost, align 4 %add171 = fadd contract float %152, %151 store float %add171, float* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE9totalcost, align 4 br label %for.inc172 for.inc172: ; preds = %for.body168 %153 = load i32, i32* %i, align 4 %inc173 = add nsw i32 %153, 1 store i32 %inc173, i32* %i, align 4 br label %for.cond166 for.end174: ; preds = %for.cond166 %154 = load float*, float** @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE5costs, align 8 %155 = bitcast float* %154 to i8* call void @free(i8* %155) #2 br label %if.end175 if.end175: ; preds = %for.end174, %for.end159 %call176 = call double @_Z7gettimev() store double %call176, double* %t2, align 8 %156 = load i32, i32* %pid.addr, align 4 %cmp177 = icmp eq i32 %156, 0 br i1 %cmp177, label %if.then178, label %if.end181 if.then178: ; preds = %if.end175 %157 = load double, double* %t2, align 8 %158 = load double, double* %t1, align 8 %sub179 = fsub contract double %157, %158 %159 = load double, double* @time_speedy, align 8 %add180 = fadd contract double %159, %sub179 store double %add180, double* @time_speedy, align 8 br label %if.end181 if.end181: ; preds = %if.then178, %if.end175 %160 = load float, float* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE9totalcost, align 4 ret float %160 } ; Function Attrs: noinline optnone uwtable define dso_local float @_Z3pFLP6PointsPiifPliflfiP17pthread_barrier_t(%struct.Points* %points, i32* %feasible, i32 %numfeasible, float %z, i64* %k, i32 %kmax, float %cost, i64 %iter, float %e, i32 %pid, %union.pthread_barrier_t* %barrier) #3 { entry: %points.addr = alloca %struct.Points*, align 8 %feasible.addr = alloca i32*, align 8 %numfeasible.addr = alloca i32, align 4 %z.addr = alloca float, align 4 %k.addr = alloca i64*, align 8 %kmax.addr = alloca i32, align 4 %cost.addr = alloca float, align 4 %iter.addr = alloca i64, align 8 %e.addr = alloca float, align 4 %pid.addr = alloca i32, align 4 %barrier.addr = alloca %union.pthread_barrier_t*, align 8 %i = alloca i64, align 8 %x = alloca i64, align 8 %change = alloca float, align 4 %numberOfPoints = alloca i64, align 8 store %struct.Points* %points, %struct.Points** %points.addr, align 8 store i32* %feasible, i32** %feasible.addr, align 8 store i32 %numfeasible, i32* %numfeasible.addr, align 4 store float %z, float* %z.addr, align 4 store i64* %k, i64** %k.addr, align 8 store i32 %kmax, i32* %kmax.addr, align 4 store float %cost, float* %cost.addr, align 4 store i64 %iter, i64* %iter.addr, align 8 store float %e, float* %e.addr, align 4 store i32 %pid, i32* %pid.addr, align 4 store %union.pthread_barrier_t* %barrier, %union.pthread_barrier_t** %barrier.addr, align 8 %0 = load float, float* %cost.addr, align 4 store float %0, float* %change, align 4 br label %while.cond while.cond: ; preds = %for.end, %entry %1 = load float, float* %change, align 4 %2 = load float, float* %cost.addr, align 4 %div = fdiv float %1, %2 %conv = fpext float %div to double %3 = load float, float* %e.addr, align 4 %conv1 = fpext float %3 to double %mul = fmul contract double 1.000000e+00, %conv1 %cmp = fcmp ogt double %conv, %mul br i1 %cmp, label %while.body, label %while.end while.body: ; preds = %while.cond store float 0.000000e+00, float* %change, align 4 %4 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num = getelementptr inbounds %struct.Points, %struct.Points* %4, i32 0, i32 0 %5 = load i64, i64* %num, align 8 store i64 %5, i64* %numberOfPoints, align 8 %6 = load i32, i32* %pid.addr, align 4 %cmp2 = icmp eq i32 %6, 0 br i1 %cmp2, label %if.then, label %if.end if.then: ; preds = %while.body %7 = load i32*, i32** %feasible.addr, align 8 %8 = load i32, i32* %numfeasible.addr, align 4 call void @_Z10intshufflePii(i32* %7, i32 %8) br label %if.end if.end: ; preds = %if.then, %while.body store i64 0, i64* %i, align 8 br label %for.cond for.cond: ; preds = %for.inc, %if.end %9 = load i64, i64* %i, align 8 %10 = load i64, i64* %iter.addr, align 8 %cmp3 = icmp slt i64 %9, %10 br i1 %cmp3, label %for.body, label %for.end for.body: ; preds = %for.cond %11 = load i64, i64* %i, align 8 %12 = load i32, i32* %numfeasible.addr, align 4 %conv4 = sext i32 %12 to i64 %rem = srem i64 %11, %conv4 store i64 %rem, i64* %x, align 8 %13 = load i32*, i32** %feasible.addr, align 8 %14 = load i64, i64* %x, align 8 %arrayidx = getelementptr inbounds i32, i32* %13, i64 %14 %15 = load i32, i32* %arrayidx, align 4 %conv5 = sext i32 %15 to i64 %16 = load %struct.Points*, %struct.Points** %points.addr, align 8 %17 = load float, float* %z.addr, align 4 %18 = load i64*, i64** %k.addr, align 8 %19 = load i32, i32* %kmax.addr, align 4 %20 = load i8*, i8** @_ZL9is_center, align 8 %21 = load i32*, i32** @_ZL12center_table, align 8 %22 = load i8*, i8** @_ZL17switch_membership, align 8 %23 = load i8, i8* @isCoordChanged, align 1 %tobool = trunc i8 %23 to i1 %call = call float @_Z5pgainlP6PointsfPliPbPiS2_bPdS4_S4_S4_S4_S4_(i64 %conv5, %struct.Points* %16, float %17, i64* %18, i32 %19, i8* %20, i32* %21, i8* %22, i1 zeroext %tobool, double* @serial_t, double* @cpu_to_gpu_t, double* @gpu_to_cpu_t, double* @alloc_t, double* @kernel_t, double* @free_t) %24 = load float, float* %change, align 4 %add = fadd contract float %24, %call store float %add, float* %change, align 4 br label %for.inc for.inc: ; preds = %for.body %25 = load i64, i64* %i, align 8 %inc = add nsw i64 %25, 1 store i64 %inc, i64* %i, align 8 br label %for.cond for.end: ; preds = %for.cond %26 = load float, float* %change, align 4 %27 = load float, float* %cost.addr, align 4 %sub = fsub contract float %27, %26 store float %sub, float* %cost.addr, align 4 br label %while.cond while.end: ; preds = %while.cond %28 = load float, float* %cost.addr, align 4 ret float %28 } ; Function Attrs: noinline optnone uwtable define dso_local i32 @_Z19selectfeasible_fastP6PointsPPiiiP17pthread_barrier_t(%struct.Points* %points, i32** %feasible, i32 %kmin, i32 %pid, %union.pthread_barrier_t* %barrier) #3 { entry: %retval = alloca i32, align 4 %points.addr = alloca %struct.Points*, align 8 %feasible.addr = alloca i32**, align 8 %kmin.addr = alloca i32, align 4 %pid.addr = alloca i32, align 4 %barrier.addr = alloca %union.pthread_barrier_t*, align 8 %t1 = alloca double, align 8 %numfeasible = alloca i32, align 4 %accumweight = alloca float*, align 8 %totalweight = alloca float, align 4 %k1 = alloca i64, align 8 %k2 = alloca i64, align 8 %w = alloca float, align 4 %l = alloca i32, align 4 %r = alloca i32, align 4 %k = alloca i32, align 4 %i = alloca i32, align 4 %i29 = alloca i32, align 4 %i49 = alloca i32, align 4 %t2 = alloca double, align 8 store %struct.Points* %points, %struct.Points** %points.addr, align 8 store i32** %feasible, i32*** %feasible.addr, align 8 store i32 %kmin, i32* %kmin.addr, align 4 store i32 %pid, i32* %pid.addr, align 4 store %union.pthread_barrier_t* %barrier, %union.pthread_barrier_t** %barrier.addr, align 8 %call = call double @_Z7gettimev() store double %call, double* %t1, align 8 %0 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num = getelementptr inbounds %struct.Points, %struct.Points* %0, i32 0, i32 0 %1 = load i64, i64* %num, align 8 %conv = trunc i64 %1 to i32 store i32 %conv, i32* %numfeasible, align 4 %2 = load i32, i32* %numfeasible, align 4 %conv1 = sitofp i32 %2 to float %3 = load i32, i32* %kmin.addr, align 4 %mul = mul nsw i32 3, %3 %conv2 = sitofp i32 %mul to float %4 = load i32, i32* %kmin.addr, align 4 %conv3 = sitofp i32 %4 to float %call4 = call float @_ZSt3logf(float %conv3) %mul5 = fmul contract float %conv2, %call4 %cmp = fcmp ogt float %conv1, %mul5 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %5 = load i32, i32* %kmin.addr, align 4 %mul6 = mul nsw i32 3, %5 %conv7 = sitofp i32 %mul6 to float %6 = load i32, i32* %kmin.addr, align 4 %conv8 = sitofp i32 %6 to float %call9 = call float @_ZSt3logf(float %conv8) %mul10 = fmul contract float %conv7, %call9 %conv11 = fptosi float %mul10 to i32 store i32 %conv11, i32* %numfeasible, align 4 br label %if.end if.end: ; preds = %if.then, %entry %7 = load i32, i32* %numfeasible, align 4 %conv12 = sext i32 %7 to i64 %mul13 = mul i64 %conv12, 4 %call14 = call noalias i8* @malloc(i64 %mul13) #2 %8 = bitcast i8* %call14 to i32* %9 = load i32**, i32*** %feasible.addr, align 8 store i32* %8, i32** %9, align 8 store i64 0, i64* %k1, align 8 %10 = load i32, i32* %numfeasible, align 4 %conv15 = sext i32 %10 to i64 store i64 %conv15, i64* %k2, align 8 %11 = load i32, i32* %numfeasible, align 4 %conv16 = sext i32 %11 to i64 %12 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num17 = getelementptr inbounds %struct.Points, %struct.Points* %12, i32 0, i32 0 %13 = load i64, i64* %num17, align 8 %cmp18 = icmp eq i64 %conv16, %13 br i1 %cmp18, label %if.then19, label %if.end23 if.then19: ; preds = %if.end %14 = load i64, i64* %k1, align 8 %conv20 = trunc i64 %14 to i32 store i32 %conv20, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %if.then19 %15 = load i32, i32* %i, align 4 %conv21 = sext i32 %15 to i64 %16 = load i64, i64* %k2, align 8 %cmp22 = icmp slt i64 %conv21, %16 br i1 %cmp22, label %for.body, label %for.end for.body: ; preds = %for.cond %17 = load i32, i32* %i, align 4 %18 = load i32**, i32*** %feasible.addr, align 8 %19 = load i32*, i32** %18, align 8 %20 = load i32, i32* %i, align 4 %idxprom = sext i32 %20 to i64 %arrayidx = getelementptr inbounds i32, i32* %19, i64 %idxprom store i32 %17, i32* %arrayidx, align 4 br label %for.inc for.inc: ; preds = %for.body %21 = load i32, i32* %i, align 4 %inc = add nsw i32 %21, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond %22 = load i32, i32* %numfeasible, align 4 store i32 %22, i32* %retval, align 4 br label %return if.end23: ; preds = %if.end %23 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num24 = getelementptr inbounds %struct.Points, %struct.Points* %23, i32 0, i32 0 %24 = load i64, i64* %num24, align 8 %mul25 = mul i64 4, %24 %call26 = call noalias i8* @malloc(i64 %mul25) #2 %25 = bitcast i8* %call26 to float* store float* %25, float** %accumweight, align 8 %26 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p = getelementptr inbounds %struct.Points, %struct.Points* %26, i32 0, i32 2 %27 = load %struct.Point*, %struct.Point** %p, align 8 %arrayidx27 = getelementptr inbounds %struct.Point, %struct.Point* %27, i64 0 %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx27, i32 0, i32 0 %28 = load float, float* %weight, align 8 %29 = load float*, float** %accumweight, align 8 %arrayidx28 = getelementptr inbounds float, float* %29, i64 0 store float %28, float* %arrayidx28, align 4 store float 0.000000e+00, float* %totalweight, align 4 store i32 1, i32* %i29, align 4 br label %for.cond30 for.cond30: ; preds = %for.inc43, %if.end23 %30 = load i32, i32* %i29, align 4 %conv31 = sext i32 %30 to i64 %31 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num32 = getelementptr inbounds %struct.Points, %struct.Points* %31, i32 0, i32 0 %32 = load i64, i64* %num32, align 8 %cmp33 = icmp slt i64 %conv31, %32 br i1 %cmp33, label %for.body34, label %for.end45 for.body34: ; preds = %for.cond30 %33 = load float*, float** %accumweight, align 8 %34 = load i32, i32* %i29, align 4 %sub = sub nsw i32 %34, 1 %idxprom35 = sext i32 %sub to i64 %arrayidx36 = getelementptr inbounds float, float* %33, i64 %idxprom35 %35 = load float, float* %arrayidx36, align 4 %36 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p37 = getelementptr inbounds %struct.Points, %struct.Points* %36, i32 0, i32 2 %37 = load %struct.Point*, %struct.Point** %p37, align 8 %38 = load i32, i32* %i29, align 4 %idxprom38 = sext i32 %38 to i64 %arrayidx39 = getelementptr inbounds %struct.Point, %struct.Point* %37, i64 %idxprom38 %weight40 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx39, i32 0, i32 0 %39 = load float, float* %weight40, align 8 %add = fadd contract float %35, %39 %40 = load float*, float** %accumweight, align 8 %41 = load i32, i32* %i29, align 4 %idxprom41 = sext i32 %41 to i64 %arrayidx42 = getelementptr inbounds float, float* %40, i64 %idxprom41 store float %add, float* %arrayidx42, align 4 br label %for.inc43 for.inc43: ; preds = %for.body34 %42 = load i32, i32* %i29, align 4 %inc44 = add nsw i32 %42, 1 store i32 %inc44, i32* %i29, align 4 br label %for.cond30 for.end45: ; preds = %for.cond30 %43 = load float*, float** %accumweight, align 8 %44 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num46 = getelementptr inbounds %struct.Points, %struct.Points* %44, i32 0, i32 0 %45 = load i64, i64* %num46, align 8 %sub47 = sub nsw i64 %45, 1 %arrayidx48 = getelementptr inbounds float, float* %43, i64 %sub47 %46 = load float, float* %arrayidx48, align 4 store float %46, float* %totalweight, align 4 %47 = load i64, i64* %k1, align 8 %conv50 = trunc i64 %47 to i32 store i32 %conv50, i32* %i49, align 4 br label %for.cond51 for.cond51: ; preds = %for.inc78, %for.end45 %48 = load i32, i32* %i49, align 4 %conv52 = sext i32 %48 to i64 %49 = load i64, i64* %k2, align 8 %cmp53 = icmp slt i64 %conv52, %49 br i1 %cmp53, label %for.body54, label %for.end80 for.body54: ; preds = %for.cond51 %call55 = call i64 @lrand48() #2 %conv56 = sitofp i64 %call55 to float %div = fdiv float %conv56, 0x41E0000000000000 %50 = load float, float* %totalweight, align 4 %mul57 = fmul contract float %div, %50 store float %mul57, float* %w, align 4 store i32 0, i32* %l, align 4 %51 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num58 = getelementptr inbounds %struct.Points, %struct.Points* %51, i32 0, i32 0 %52 = load i64, i64* %num58, align 8 %sub59 = sub nsw i64 %52, 1 %conv60 = trunc i64 %sub59 to i32 store i32 %conv60, i32* %r, align 4 %53 = load float*, float** %accumweight, align 8 %arrayidx61 = getelementptr inbounds float, float* %53, i64 0 %54 = load float, float* %arrayidx61, align 4 %55 = load float, float* %w, align 4 %cmp62 = fcmp ogt float %54, %55 br i1 %cmp62, label %if.then63, label %if.end66 if.then63: ; preds = %for.body54 %56 = load i32**, i32*** %feasible.addr, align 8 %57 = load i32*, i32** %56, align 8 %58 = load i32, i32* %i49, align 4 %idxprom64 = sext i32 %58 to i64 %arrayidx65 = getelementptr inbounds i32, i32* %57, i64 %idxprom64 store i32 0, i32* %arrayidx65, align 4 br label %for.inc78 if.end66: ; preds = %for.body54 br label %while.cond while.cond: ; preds = %if.end75, %if.end66 %59 = load i32, i32* %l, align 4 %add67 = add nsw i32 %59, 1 %60 = load i32, i32* %r, align 4 %cmp68 = icmp slt i32 %add67, %60 br i1 %cmp68, label %while.body, label %while.end while.body: ; preds = %while.cond %61 = load i32, i32* %l, align 4 %62 = load i32, i32* %r, align 4 %add69 = add nsw i32 %61, %62 %div70 = sdiv i32 %add69, 2 store i32 %div70, i32* %k, align 4 %63 = load float*, float** %accumweight, align 8 %64 = load i32, i32* %k, align 4 %idxprom71 = sext i32 %64 to i64 %arrayidx72 = getelementptr inbounds float, float* %63, i64 %idxprom71 %65 = load float, float* %arrayidx72, align 4 %66 = load float, float* %w, align 4 %cmp73 = fcmp ogt float %65, %66 br i1 %cmp73, label %if.then74, label %if.else if.then74: ; preds = %while.body %67 = load i32, i32* %k, align 4 store i32 %67, i32* %r, align 4 br label %if.end75 if.else: ; preds = %while.body %68 = load i32, i32* %k, align 4 store i32 %68, i32* %l, align 4 br label %if.end75 if.end75: ; preds = %if.else, %if.then74 br label %while.cond while.end: ; preds = %while.cond %69 = load i32, i32* %r, align 4 %70 = load i32**, i32*** %feasible.addr, align 8 %71 = load i32*, i32** %70, align 8 %72 = load i32, i32* %i49, align 4 %idxprom76 = sext i32 %72 to i64 %arrayidx77 = getelementptr inbounds i32, i32* %71, i64 %idxprom76 store i32 %69, i32* %arrayidx77, align 4 br label %for.inc78 for.inc78: ; preds = %while.end, %if.then63 %73 = load i32, i32* %i49, align 4 %inc79 = add nsw i32 %73, 1 store i32 %inc79, i32* %i49, align 4 br label %for.cond51 for.end80: ; preds = %for.cond51 %74 = load float*, float** %accumweight, align 8 %75 = bitcast float* %74 to i8* call void @free(i8* %75) #2 %call81 = call double @_Z7gettimev() store double %call81, double* %t2, align 8 %76 = load double, double* %t2, align 8 %77 = load double, double* %t1, align 8 %sub82 = fsub contract double %76, %77 %78 = load double, double* @time_select_feasible, align 8 %add83 = fadd contract double %78, %sub82 store double %add83, double* @time_select_feasible, align 8 %79 = load i32, i32* %numfeasible, align 4 store i32 %79, i32* %retval, align 4 br label %return return: ; preds = %for.end80, %for.end %80 = load i32, i32* %retval, align 4 ret i32 %80 } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local float @_ZSt3logf(float %__x) #6 comdat { entry: %__x.addr = alloca float, align 4 store float %__x, float* %__x.addr, align 4 %0 = load float, float* %__x.addr, align 4 %call = call float @logf(float %0) #2 ret float %call } ; Function Attrs: noinline optnone uwtable define dso_local float @_Z8pkmedianP6PointsllPliP17pthread_barrier_t(%struct.Points* %points, i64 %kmin, i64 %kmax, i64* %kfinal, i32 %pid, %union.pthread_barrier_t* %barrier) #3 { entry: %retval = alloca float, align 4 %points.addr = alloca %struct.Points*, align 8 %kmin.addr = alloca i64, align 8 %kmax.addr = alloca i64, align 8 %kfinal.addr = alloca i64*, align 8 %pid.addr = alloca i32, align 4 %barrier.addr = alloca %union.pthread_barrier_t*, align 8 %i = alloca i32, align 4 %cost = alloca float, align 4 %lastcost = alloca float, align 4 %hiz = alloca float, align 4 %loz = alloca float, align 4 %z = alloca float, align 4 %numberOfPoints = alloca i64, align 8 %ptDimension = alloca i64, align 8 %bsize = alloca i64, align 8 %k1 = alloca i64, align 8 %k2 = alloca i64, align 8 %myhiz = alloca float, align 4 %kk = alloca i64, align 8 %agg.tmp = alloca %struct.Point, align 8 %agg.tmp10 = alloca %struct.Point, align 8 %i20 = alloca i32, align 4 %kk37 = alloca i64, align 8 %i81 = alloca i32, align 4 store %struct.Points* %points, %struct.Points** %points.addr, align 8 store i64 %kmin, i64* %kmin.addr, align 8 store i64 %kmax, i64* %kmax.addr, align 8 store i64* %kfinal, i64** %kfinal.addr, align 8 store i32 %pid, i32* %pid.addr, align 4 store %union.pthread_barrier_t* %barrier, %union.pthread_barrier_t** %barrier.addr, align 8 %0 = load i32, i32* %pid.addr, align 4 %cmp = icmp eq i32 %0, 0 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %1 = load i32, i32* @_ZL5nproc, align 4 %conv = sext i32 %1 to i64 %call = call noalias i8* @calloc(i64 %conv, i64 4) #2 %2 = bitcast i8* %call to float* store float* %2, float** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs, align 8 br label %if.end if.end: ; preds = %if.then, %entry store float 0.000000e+00, float* %loz, align 4 store float 0.000000e+00, float* %hiz, align 4 %3 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num = getelementptr inbounds %struct.Points, %struct.Points* %3, i32 0, i32 0 %4 = load i64, i64* %num, align 8 store i64 %4, i64* %numberOfPoints, align 8 %5 = load %struct.Points*, %struct.Points** %points.addr, align 8 %dim = getelementptr inbounds %struct.Points, %struct.Points* %5, i32 0, i32 1 %6 = load i32, i32* %dim, align 8 %conv1 = sext i32 %6 to i64 store i64 %conv1, i64* %ptDimension, align 8 %7 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num2 = getelementptr inbounds %struct.Points, %struct.Points* %7, i32 0, i32 0 %8 = load i64, i64* %num2, align 8 %9 = load i32, i32* @_ZL5nproc, align 4 %conv3 = sext i32 %9 to i64 %div = sdiv i64 %8, %conv3 store i64 %div, i64* %bsize, align 8 %10 = load i64, i64* %bsize, align 8 %11 = load i32, i32* %pid.addr, align 4 %conv4 = sext i32 %11 to i64 %mul = mul nsw i64 %10, %conv4 store i64 %mul, i64* %k1, align 8 %12 = load i64, i64* %k1, align 8 %13 = load i64, i64* %bsize, align 8 %add = add nsw i64 %12, %13 store i64 %add, i64* %k2, align 8 %14 = load i32, i32* %pid.addr, align 4 %15 = load i32, i32* @_ZL5nproc, align 4 %sub = sub nsw i32 %15, 1 %cmp5 = icmp eq i32 %14, %sub br i1 %cmp5, label %if.then6, label %if.end8 if.then6: ; preds = %if.end %16 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num7 = getelementptr inbounds %struct.Points, %struct.Points* %16, i32 0, i32 0 %17 = load i64, i64* %num7, align 8 store i64 %17, i64* %k2, align 8 br label %if.end8 if.end8: ; preds = %if.then6, %if.end store float 0.000000e+00, float* %myhiz, align 4 %18 = load i64, i64* %k1, align 8 store i64 %18, i64* %kk, align 8 br label %for.cond for.cond: ; preds = %for.inc, %if.end8 %19 = load i64, i64* %kk, align 8 %20 = load i64, i64* %k2, align 8 %cmp9 = icmp slt i64 %19, %20 br i1 %cmp9, label %for.body, label %for.end for.body: ; preds = %for.cond %21 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p = getelementptr inbounds %struct.Points, %struct.Points* %21, i32 0, i32 2 %22 = load %struct.Point*, %struct.Point** %p, align 8 %23 = load i64, i64* %kk, align 8 %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %22, i64 %23 %24 = bitcast %struct.Point* %agg.tmp to i8* %25 = bitcast %struct.Point* %arrayidx to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %24, i8* align 8 %25, i64 32, i1 false) %26 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p11 = getelementptr inbounds %struct.Points, %struct.Points* %26, i32 0, i32 2 %27 = load %struct.Point*, %struct.Point** %p11, align 8 %arrayidx12 = getelementptr inbounds %struct.Point, %struct.Point* %27, i64 0 %28 = bitcast %struct.Point* %agg.tmp10 to i8* %29 = bitcast %struct.Point* %arrayidx12 to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %28, i8* align 8 %29, i64 32, i1 false) %30 = load i64, i64* %ptDimension, align 8 %conv13 = trunc i64 %30 to i32 %call14 = call float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %agg.tmp, %struct.Point* byval(%struct.Point) align 8 %agg.tmp10, i32 %conv13) %31 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p15 = getelementptr inbounds %struct.Points, %struct.Points* %31, i32 0, i32 2 %32 = load %struct.Point*, %struct.Point** %p15, align 8 %33 = load i64, i64* %kk, align 8 %arrayidx16 = getelementptr inbounds %struct.Point, %struct.Point* %32, i64 %33 %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx16, i32 0, i32 0 %34 = load float, float* %weight, align 8 %mul17 = fmul contract float %call14, %34 %35 = load float, float* %myhiz, align 4 %add18 = fadd contract float %35, %mul17 store float %add18, float* %myhiz, align 4 br label %for.inc for.inc: ; preds = %for.body %36 = load i64, i64* %kk, align 8 %inc = add nsw i64 %36, 1 store i64 %inc, i64* %kk, align 8 br label %for.cond for.end: ; preds = %for.cond %37 = load float, float* %myhiz, align 4 %38 = load float*, float** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs, align 8 %39 = load i32, i32* %pid.addr, align 4 %idxprom = sext i32 %39 to i64 %arrayidx19 = getelementptr inbounds float, float* %38, i64 %idxprom store float %37, float* %arrayidx19, align 4 store i32 0, i32* %i20, align 4 br label %for.cond21 for.cond21: ; preds = %for.inc27, %for.end %40 = load i32, i32* %i20, align 4 %41 = load i32, i32* @_ZL5nproc, align 4 %cmp22 = icmp slt i32 %40, %41 br i1 %cmp22, label %for.body23, label %for.end29 for.body23: ; preds = %for.cond21 %42 = load float*, float** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs, align 8 %43 = load i32, i32* %i20, align 4 %idxprom24 = sext i32 %43 to i64 %arrayidx25 = getelementptr inbounds float, float* %42, i64 %idxprom24 %44 = load float, float* %arrayidx25, align 4 %45 = load float, float* %hiz, align 4 %add26 = fadd contract float %45, %44 store float %add26, float* %hiz, align 4 br label %for.inc27 for.inc27: ; preds = %for.body23 %46 = load i32, i32* %i20, align 4 %inc28 = add nsw i32 %46, 1 store i32 %inc28, i32* %i20, align 4 br label %for.cond21 for.end29: ; preds = %for.cond21 store float 0.000000e+00, float* %loz, align 4 %47 = load float, float* %hiz, align 4 %48 = load float, float* %loz, align 4 %add30 = fadd contract float %47, %48 %conv31 = fpext float %add30 to double %div32 = fdiv double %conv31, 2.000000e+00 %conv33 = fptrunc double %div32 to float store float %conv33, float* %z, align 4 %49 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num34 = getelementptr inbounds %struct.Points, %struct.Points* %49, i32 0, i32 0 %50 = load i64, i64* %num34, align 8 %51 = load i64, i64* %kmax.addr, align 8 %cmp35 = icmp sle i64 %50, %51 br i1 %cmp35, label %if.then36, label %if.end52 if.then36: ; preds = %for.end29 %52 = load i64, i64* %k1, align 8 store i64 %52, i64* %kk37, align 8 br label %for.cond38 for.cond38: ; preds = %for.inc46, %if.then36 %53 = load i64, i64* %kk37, align 8 %54 = load i64, i64* %k2, align 8 %cmp39 = icmp slt i64 %53, %54 br i1 %cmp39, label %for.body40, label %for.end48 for.body40: ; preds = %for.cond38 %55 = load i64, i64* %kk37, align 8 %56 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p41 = getelementptr inbounds %struct.Points, %struct.Points* %56, i32 0, i32 2 %57 = load %struct.Point*, %struct.Point** %p41, align 8 %58 = load i64, i64* %kk37, align 8 %arrayidx42 = getelementptr inbounds %struct.Point, %struct.Point* %57, i64 %58 %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx42, i32 0, i32 2 store i64 %55, i64* %assign, align 8 %59 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p43 = getelementptr inbounds %struct.Points, %struct.Points* %59, i32 0, i32 2 %60 = load %struct.Point*, %struct.Point** %p43, align 8 %61 = load i64, i64* %kk37, align 8 %arrayidx44 = getelementptr inbounds %struct.Point, %struct.Point* %60, i64 %61 %cost45 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx44, i32 0, i32 3 store float 0.000000e+00, float* %cost45, align 8 br label %for.inc46 for.inc46: ; preds = %for.body40 %62 = load i64, i64* %kk37, align 8 %inc47 = add nsw i64 %62, 1 store i64 %inc47, i64* %kk37, align 8 br label %for.cond38 for.end48: ; preds = %for.cond38 store float 0.000000e+00, float* %cost, align 4 %63 = load i32, i32* %pid.addr, align 4 %cmp49 = icmp eq i32 %63, 0 br i1 %cmp49, label %if.then50, label %if.end51 if.then50: ; preds = %for.end48 %64 = load float*, float** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs, align 8 %65 = bitcast float* %64 to i8* call void @free(i8* %65) #2 %66 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %67 = load i64*, i64** %kfinal.addr, align 8 store i64 %66, i64* %67, align 8 br label %if.end51 if.end51: ; preds = %if.then50, %for.end48 %68 = load float, float* %cost, align 4 store float %68, float* %retval, align 4 br label %return if.end52: ; preds = %for.end29 %69 = load i32, i32* %pid.addr, align 4 %cmp53 = icmp eq i32 %69, 0 br i1 %cmp53, label %if.then54, label %if.end55 if.then54: ; preds = %if.end52 %70 = load %struct.Points*, %struct.Points** %points.addr, align 8 call void @_Z7shuffleP6Points(%struct.Points* %70) br label %if.end55 if.end55: ; preds = %if.then54, %if.end52 %71 = load %struct.Points*, %struct.Points** %points.addr, align 8 %72 = load float, float* %z, align 4 %73 = load i32, i32* %pid.addr, align 4 %74 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 %call56 = call float @_Z7pspeedyP6PointsfPliP17pthread_barrier_t(%struct.Points* %71, float %72, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, i32 %73, %union.pthread_barrier_t* %74) store float %call56, float* %cost, align 4 store i32 0, i32* %i, align 4 br label %while.cond while.cond: ; preds = %while.body, %if.end55 %75 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %76 = load i64, i64* %kmin.addr, align 8 %cmp57 = icmp slt i64 %75, %76 br i1 %cmp57, label %land.rhs, label %land.end land.rhs: ; preds = %while.cond %77 = load i32, i32* %i, align 4 %cmp58 = icmp slt i32 %77, 1 br label %land.end land.end: ; preds = %land.rhs, %while.cond %78 = phi i1 [ false, %while.cond ], [ %cmp58, %land.rhs ] br i1 %78, label %while.body, label %while.end while.body: ; preds = %land.end %79 = load %struct.Points*, %struct.Points** %points.addr, align 8 %80 = load float, float* %z, align 4 %81 = load i32, i32* %pid.addr, align 4 %82 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 %call59 = call float @_Z7pspeedyP6PointsfPliP17pthread_barrier_t(%struct.Points* %79, float %80, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, i32 %81, %union.pthread_barrier_t* %82) store float %call59, float* %cost, align 4 %83 = load i32, i32* %i, align 4 %inc60 = add nsw i32 %83, 1 store i32 %inc60, i32* %i, align 4 br label %while.cond while.end: ; preds = %land.end br label %while.cond61 while.cond61: ; preds = %if.end73, %while.end %84 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %85 = load i64, i64* %kmin.addr, align 8 %cmp62 = icmp slt i64 %84, %85 br i1 %cmp62, label %while.body63, label %while.end76 while.body63: ; preds = %while.cond61 %86 = load i32, i32* %i, align 4 %cmp64 = icmp sge i32 %86, 1 br i1 %cmp64, label %if.then65, label %if.end70 if.then65: ; preds = %while.body63 %87 = load float, float* %z, align 4 store float %87, float* %hiz, align 4 %88 = load float, float* %hiz, align 4 %89 = load float, float* %loz, align 4 %add66 = fadd contract float %88, %89 %conv67 = fpext float %add66 to double %div68 = fdiv double %conv67, 2.000000e+00 %conv69 = fptrunc double %div68 to float store float %conv69, float* %z, align 4 store i32 0, i32* %i, align 4 br label %if.end70 if.end70: ; preds = %if.then65, %while.body63 %90 = load i32, i32* %pid.addr, align 4 %cmp71 = icmp eq i32 %90, 0 br i1 %cmp71, label %if.then72, label %if.end73 if.then72: ; preds = %if.end70 %91 = load %struct.Points*, %struct.Points** %points.addr, align 8 call void @_Z7shuffleP6Points(%struct.Points* %91) br label %if.end73 if.end73: ; preds = %if.then72, %if.end70 %92 = load %struct.Points*, %struct.Points** %points.addr, align 8 %93 = load float, float* %z, align 4 %94 = load i32, i32* %pid.addr, align 4 %95 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 %call74 = call float @_Z7pspeedyP6PointsfPliP17pthread_barrier_t(%struct.Points* %92, float %93, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, i32 %94, %union.pthread_barrier_t* %95) store float %call74, float* %cost, align 4 %96 = load i32, i32* %i, align 4 %inc75 = add nsw i32 %96, 1 store i32 %inc75, i32* %i, align 4 br label %while.cond61 while.end76: ; preds = %while.cond61 %97 = load i32, i32* %pid.addr, align 4 %cmp77 = icmp eq i32 %97, 0 br i1 %cmp77, label %if.then78, label %if.end95 if.then78: ; preds = %while.end76 %98 = load %struct.Points*, %struct.Points** %points.addr, align 8 %99 = load i64, i64* %kmin.addr, align 8 %conv79 = trunc i64 %99 to i32 %100 = load i32, i32* %pid.addr, align 4 %101 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 %call80 = call i32 @_Z19selectfeasible_fastP6PointsPPiiiP17pthread_barrier_t(%struct.Points* %98, i32** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE8feasible, i32 %conv79, i32 %100, %union.pthread_barrier_t* %101) store i32 %call80, i32* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE11numfeasible, align 4 store i32 0, i32* %i81, align 4 br label %for.cond82 for.cond82: ; preds = %for.inc92, %if.then78 %102 = load i32, i32* %i81, align 4 %conv83 = sext i32 %102 to i64 %103 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num84 = getelementptr inbounds %struct.Points, %struct.Points* %103, i32 0, i32 0 %104 = load i64, i64* %num84, align 8 %cmp85 = icmp slt i64 %conv83, %104 br i1 %cmp85, label %for.body86, label %for.end94 for.body86: ; preds = %for.cond82 %105 = load i8*, i8** @_ZL9is_center, align 8 %106 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p87 = getelementptr inbounds %struct.Points, %struct.Points* %106, i32 0, i32 2 %107 = load %struct.Point*, %struct.Point** %p87, align 8 %108 = load i32, i32* %i81, align 4 %idxprom88 = sext i32 %108 to i64 %arrayidx89 = getelementptr inbounds %struct.Point, %struct.Point* %107, i64 %idxprom88 %assign90 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx89, i32 0, i32 2 %109 = load i64, i64* %assign90, align 8 %arrayidx91 = getelementptr inbounds i8, i8* %105, i64 %109 store i8 1, i8* %arrayidx91, align 1 br label %for.inc92 for.inc92: ; preds = %for.body86 %110 = load i32, i32* %i81, align 4 %inc93 = add nsw i32 %110, 1 store i32 %inc93, i32* %i81, align 4 br label %for.cond82 for.end94: ; preds = %for.cond82 br label %if.end95 if.end95: ; preds = %for.end94, %while.end76 br label %while.body97 while.body97: ; preds = %if.end95, %if.end160 %111 = load float, float* %cost, align 4 store float %111, float* %lastcost, align 4 %112 = load %struct.Points*, %struct.Points** %points.addr, align 8 %113 = load i32*, i32** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE8feasible, align 8 %114 = load i32, i32* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE11numfeasible, align 4 %115 = load float, float* %z, align 4 %116 = load i64, i64* %kmax.addr, align 8 %conv98 = trunc i64 %116 to i32 %117 = load float, float* %cost, align 4 %118 = load i64, i64* %kmax.addr, align 8 %mul99 = mul nsw i64 3, %118 %conv100 = sitofp i64 %mul99 to float %119 = load i64, i64* %kmax.addr, align 8 %conv101 = sitofp i64 %119 to float %call102 = call float @_ZSt3logf(float %conv101) %mul103 = fmul contract float %conv100, %call102 %conv104 = fptosi float %mul103 to i64 %120 = load i32, i32* %pid.addr, align 4 %121 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 %call105 = call float @_Z3pFLP6PointsPiifPliflfiP17pthread_barrier_t(%struct.Points* %112, i32* %113, i32 %114, float %115, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, i32 %conv98, float %117, i64 %conv104, float 0x3FB99999A0000000, i32 %120, %union.pthread_barrier_t* %121) store float %call105, float* %cost, align 4 %122 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %conv106 = sitofp i64 %122 to double %123 = load i64, i64* %kmax.addr, align 8 %conv107 = sitofp i64 %123 to double %mul108 = fmul contract double 1.100000e+00, %conv107 %cmp109 = fcmp ole double %conv106, %mul108 br i1 %cmp109, label %land.lhs.true, label %lor.lhs.false land.lhs.true: ; preds = %while.body97 %124 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %conv110 = sitofp i64 %124 to double %125 = load i64, i64* %kmin.addr, align 8 %conv111 = sitofp i64 %125 to double %mul112 = fmul contract double 9.000000e-01, %conv111 %cmp113 = fcmp oge double %conv110, %mul112 br i1 %cmp113, label %if.then119, label %lor.lhs.false lor.lhs.false: ; preds = %land.lhs.true, %while.body97 %126 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %127 = load i64, i64* %kmax.addr, align 8 %add114 = add nsw i64 %127, 2 %cmp115 = icmp sle i64 %126, %add114 br i1 %cmp115, label %land.lhs.true116, label %if.end128 land.lhs.true116: ; preds = %lor.lhs.false %128 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %129 = load i64, i64* %kmin.addr, align 8 %sub117 = sub nsw i64 %129, 2 %cmp118 = icmp sge i64 %128, %sub117 br i1 %cmp118, label %if.then119, label %if.end128 if.then119: ; preds = %land.lhs.true116, %land.lhs.true %130 = load %struct.Points*, %struct.Points** %points.addr, align 8 %131 = load i32*, i32** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE8feasible, align 8 %132 = load i32, i32* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE11numfeasible, align 4 %133 = load float, float* %z, align 4 %134 = load i64, i64* %kmax.addr, align 8 %conv120 = trunc i64 %134 to i32 %135 = load float, float* %cost, align 4 %136 = load i64, i64* %kmax.addr, align 8 %mul121 = mul nsw i64 3, %136 %conv122 = sitofp i64 %mul121 to float %137 = load i64, i64* %kmax.addr, align 8 %conv123 = sitofp i64 %137 to float %call124 = call float @_ZSt3logf(float %conv123) %mul125 = fmul contract float %conv122, %call124 %conv126 = fptosi float %mul125 to i64 %138 = load i32, i32* %pid.addr, align 4 %139 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 %call127 = call float @_Z3pFLP6PointsPiifPliflfiP17pthread_barrier_t(%struct.Points* %130, i32* %131, i32 %132, float %133, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, i32 %conv120, float %135, i64 %conv126, float 0x3F50624DE0000000, i32 %138, %union.pthread_barrier_t* %139) store float %call127, float* %cost, align 4 br label %if.end128 if.end128: ; preds = %if.then119, %land.lhs.true116, %lor.lhs.false %140 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %141 = load i64, i64* %kmax.addr, align 8 %cmp129 = icmp sgt i64 %140, %141 br i1 %cmp129, label %if.then130, label %if.end139 if.then130: ; preds = %if.end128 %142 = load float, float* %z, align 4 store float %142, float* %loz, align 4 %143 = load float, float* %hiz, align 4 %144 = load float, float* %loz, align 4 %add131 = fadd contract float %143, %144 %conv132 = fpext float %add131 to double %div133 = fdiv double %conv132, 2.000000e+00 %conv134 = fptrunc double %div133 to float store float %conv134, float* %z, align 4 %145 = load float, float* %z, align 4 %146 = load float, float* %loz, align 4 %sub135 = fsub contract float %145, %146 %147 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %conv136 = sitofp i64 %147 to float %mul137 = fmul contract float %sub135, %conv136 %148 = load float, float* %cost, align 4 %add138 = fadd contract float %148, %mul137 store float %add138, float* %cost, align 4 br label %if.end139 if.end139: ; preds = %if.then130, %if.end128 %149 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %150 = load i64, i64* %kmin.addr, align 8 %cmp140 = icmp slt i64 %149, %150 br i1 %cmp140, label %if.then141, label %if.end150 if.then141: ; preds = %if.end139 %151 = load float, float* %z, align 4 store float %151, float* %hiz, align 4 %152 = load float, float* %hiz, align 4 %153 = load float, float* %loz, align 4 %add142 = fadd contract float %152, %153 %conv143 = fpext float %add142 to double %div144 = fdiv double %conv143, 2.000000e+00 %conv145 = fptrunc double %div144 to float store float %conv145, float* %z, align 4 %154 = load float, float* %z, align 4 %155 = load float, float* %hiz, align 4 %sub146 = fsub contract float %154, %155 %156 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %conv147 = sitofp i64 %156 to float %mul148 = fmul contract float %sub146, %conv147 %157 = load float, float* %cost, align 4 %add149 = fadd contract float %157, %mul148 store float %add149, float* %cost, align 4 br label %if.end150 if.end150: ; preds = %if.then141, %if.end139 %158 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %159 = load i64, i64* %kmax.addr, align 8 %cmp151 = icmp sle i64 %158, %159 br i1 %cmp151, label %land.lhs.true152, label %lor.lhs.false154 land.lhs.true152: ; preds = %if.end150 %160 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %161 = load i64, i64* %kmin.addr, align 8 %cmp153 = icmp sge i64 %160, %161 br i1 %cmp153, label %if.then159, label %lor.lhs.false154 lor.lhs.false154: ; preds = %land.lhs.true152, %if.end150 %162 = load float, float* %loz, align 4 %conv155 = fpext float %162 to double %163 = load float, float* %hiz, align 4 %conv156 = fpext float %163 to double %mul157 = fmul contract double 0x3FEFF7CED916872B, %conv156 %cmp158 = fcmp oge double %conv155, %mul157 br i1 %cmp158, label %if.then159, label %if.end160 if.then159: ; preds = %lor.lhs.false154, %land.lhs.true152 br label %while.end161 if.end160: ; preds = %lor.lhs.false154 br label %while.body97 while.end161: ; preds = %if.then159 %164 = load i32, i32* %pid.addr, align 4 %cmp162 = icmp eq i32 %164, 0 br i1 %cmp162, label %if.then163, label %if.end164 if.then163: ; preds = %while.end161 %165 = load i32*, i32** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE8feasible, align 8 %166 = bitcast i32* %165 to i8* call void @free(i8* %166) #2 %167 = load float*, float** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs, align 8 %168 = bitcast float* %167 to i8* call void @free(i8* %168) #2 %169 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 %170 = load i64*, i64** %kfinal.addr, align 8 store i64 %169, i64* %170, align 8 br label %if.end164 if.end164: ; preds = %if.then163, %while.end161 %171 = load float, float* %cost, align 4 store float %171, float* %retval, align 4 br label %return return: ; preds = %if.end164, %if.end51 %172 = load float, float* %retval, align 4 ret float %172 } ; Function Attrs: nounwind declare dso_local noalias i8* @calloc(i64, i64) #7 ; Function Attrs: noinline nounwind optnone uwtable define dso_local i32 @_Z11contcentersP6Points(%struct.Points* %points) #6 { entry: %points.addr = alloca %struct.Points*, align 8 %i = alloca i64, align 8 %ii = alloca i64, align 8 %relweight = alloca float, align 4 store %struct.Points* %points, %struct.Points** %points.addr, align 8 store i64 0, i64* %i, align 8 br label %for.cond for.cond: ; preds = %for.inc48, %entry %0 = load i64, i64* %i, align 8 %1 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num = getelementptr inbounds %struct.Points, %struct.Points* %1, i32 0, i32 0 %2 = load i64, i64* %num, align 8 %cmp = icmp slt i64 %0, %2 br i1 %cmp, label %for.body, label %for.end50 for.body: ; preds = %for.cond %3 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p = getelementptr inbounds %struct.Points, %struct.Points* %3, i32 0, i32 2 %4 = load %struct.Point*, %struct.Point** %p, align 8 %5 = load i64, i64* %i, align 8 %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %4, i64 %5 %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx, i32 0, i32 2 %6 = load i64, i64* %assign, align 8 %7 = load i64, i64* %i, align 8 %cmp1 = icmp ne i64 %6, %7 br i1 %cmp1, label %if.then, label %if.end if.then: ; preds = %for.body %8 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p2 = getelementptr inbounds %struct.Points, %struct.Points* %8, i32 0, i32 2 %9 = load %struct.Point*, %struct.Point** %p2, align 8 %10 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p3 = getelementptr inbounds %struct.Points, %struct.Points* %10, i32 0, i32 2 %11 = load %struct.Point*, %struct.Point** %p3, align 8 %12 = load i64, i64* %i, align 8 %arrayidx4 = getelementptr inbounds %struct.Point, %struct.Point* %11, i64 %12 %assign5 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx4, i32 0, i32 2 %13 = load i64, i64* %assign5, align 8 %arrayidx6 = getelementptr inbounds %struct.Point, %struct.Point* %9, i64 %13 %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx6, i32 0, i32 0 %14 = load float, float* %weight, align 8 %15 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p7 = getelementptr inbounds %struct.Points, %struct.Points* %15, i32 0, i32 2 %16 = load %struct.Point*, %struct.Point** %p7, align 8 %17 = load i64, i64* %i, align 8 %arrayidx8 = getelementptr inbounds %struct.Point, %struct.Point* %16, i64 %17 %weight9 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx8, i32 0, i32 0 %18 = load float, float* %weight9, align 8 %add = fadd contract float %14, %18 store float %add, float* %relweight, align 4 %19 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p10 = getelementptr inbounds %struct.Points, %struct.Points* %19, i32 0, i32 2 %20 = load %struct.Point*, %struct.Point** %p10, align 8 %21 = load i64, i64* %i, align 8 %arrayidx11 = getelementptr inbounds %struct.Point, %struct.Point* %20, i64 %21 %weight12 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx11, i32 0, i32 0 %22 = load float, float* %weight12, align 8 %23 = load float, float* %relweight, align 4 %div = fdiv float %22, %23 store float %div, float* %relweight, align 4 store i64 0, i64* %ii, align 8 br label %for.cond13 for.cond13: ; preds = %for.inc, %if.then %24 = load i64, i64* %ii, align 8 %25 = load %struct.Points*, %struct.Points** %points.addr, align 8 %dim = getelementptr inbounds %struct.Points, %struct.Points* %25, i32 0, i32 1 %26 = load i32, i32* %dim, align 8 %conv = sext i32 %26 to i64 %cmp14 = icmp slt i64 %24, %conv br i1 %cmp14, label %for.body15, label %for.end for.body15: ; preds = %for.cond13 %27 = load float, float* %relweight, align 4 %conv16 = fpext float %27 to double %sub = fsub contract double 1.000000e+00, %conv16 %28 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p17 = getelementptr inbounds %struct.Points, %struct.Points* %28, i32 0, i32 2 %29 = load %struct.Point*, %struct.Point** %p17, align 8 %30 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p18 = getelementptr inbounds %struct.Points, %struct.Points* %30, i32 0, i32 2 %31 = load %struct.Point*, %struct.Point** %p18, align 8 %32 = load i64, i64* %i, align 8 %arrayidx19 = getelementptr inbounds %struct.Point, %struct.Point* %31, i64 %32 %assign20 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx19, i32 0, i32 2 %33 = load i64, i64* %assign20, align 8 %arrayidx21 = getelementptr inbounds %struct.Point, %struct.Point* %29, i64 %33 %coord = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx21, i32 0, i32 1 %34 = load float*, float** %coord, align 8 %35 = load i64, i64* %ii, align 8 %arrayidx22 = getelementptr inbounds float, float* %34, i64 %35 %36 = load float, float* %arrayidx22, align 4 %conv23 = fpext float %36 to double %mul = fmul contract double %conv23, %sub %conv24 = fptrunc double %mul to float store float %conv24, float* %arrayidx22, align 4 %37 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p25 = getelementptr inbounds %struct.Points, %struct.Points* %37, i32 0, i32 2 %38 = load %struct.Point*, %struct.Point** %p25, align 8 %39 = load i64, i64* %i, align 8 %arrayidx26 = getelementptr inbounds %struct.Point, %struct.Point* %38, i64 %39 %coord27 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx26, i32 0, i32 1 %40 = load float*, float** %coord27, align 8 %41 = load i64, i64* %ii, align 8 %arrayidx28 = getelementptr inbounds float, float* %40, i64 %41 %42 = load float, float* %arrayidx28, align 4 %43 = load float, float* %relweight, align 4 %mul29 = fmul contract float %42, %43 %44 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p30 = getelementptr inbounds %struct.Points, %struct.Points* %44, i32 0, i32 2 %45 = load %struct.Point*, %struct.Point** %p30, align 8 %46 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p31 = getelementptr inbounds %struct.Points, %struct.Points* %46, i32 0, i32 2 %47 = load %struct.Point*, %struct.Point** %p31, align 8 %48 = load i64, i64* %i, align 8 %arrayidx32 = getelementptr inbounds %struct.Point, %struct.Point* %47, i64 %48 %assign33 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx32, i32 0, i32 2 %49 = load i64, i64* %assign33, align 8 %arrayidx34 = getelementptr inbounds %struct.Point, %struct.Point* %45, i64 %49 %coord35 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx34, i32 0, i32 1 %50 = load float*, float** %coord35, align 8 %51 = load i64, i64* %ii, align 8 %arrayidx36 = getelementptr inbounds float, float* %50, i64 %51 %52 = load float, float* %arrayidx36, align 4 %add37 = fadd contract float %52, %mul29 store float %add37, float* %arrayidx36, align 4 br label %for.inc for.inc: ; preds = %for.body15 %53 = load i64, i64* %ii, align 8 %inc = add nsw i64 %53, 1 store i64 %inc, i64* %ii, align 8 br label %for.cond13 for.end: ; preds = %for.cond13 %54 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p38 = getelementptr inbounds %struct.Points, %struct.Points* %54, i32 0, i32 2 %55 = load %struct.Point*, %struct.Point** %p38, align 8 %56 = load i64, i64* %i, align 8 %arrayidx39 = getelementptr inbounds %struct.Point, %struct.Point* %55, i64 %56 %weight40 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx39, i32 0, i32 0 %57 = load float, float* %weight40, align 8 %58 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p41 = getelementptr inbounds %struct.Points, %struct.Points* %58, i32 0, i32 2 %59 = load %struct.Point*, %struct.Point** %p41, align 8 %60 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p42 = getelementptr inbounds %struct.Points, %struct.Points* %60, i32 0, i32 2 %61 = load %struct.Point*, %struct.Point** %p42, align 8 %62 = load i64, i64* %i, align 8 %arrayidx43 = getelementptr inbounds %struct.Point, %struct.Point* %61, i64 %62 %assign44 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx43, i32 0, i32 2 %63 = load i64, i64* %assign44, align 8 %arrayidx45 = getelementptr inbounds %struct.Point, %struct.Point* %59, i64 %63 %weight46 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx45, i32 0, i32 0 %64 = load float, float* %weight46, align 8 %add47 = fadd contract float %64, %57 store float %add47, float* %weight46, align 8 br label %if.end if.end: ; preds = %for.end, %for.body br label %for.inc48 for.inc48: ; preds = %if.end %65 = load i64, i64* %i, align 8 %inc49 = add nsw i64 %65, 1 store i64 %inc49, i64* %i, align 8 br label %for.cond for.end50: ; preds = %for.cond ret i32 0 } ; Function Attrs: noinline nounwind optnone uwtable define dso_local void @_Z11copycentersP6PointsS0_Pll(%struct.Points* %points, %struct.Points* %centers, i64* %centerIDs, i64 %offset) #6 { entry: %points.addr = alloca %struct.Points*, align 8 %centers.addr = alloca %struct.Points*, align 8 %centerIDs.addr = alloca i64*, align 8 %offset.addr = alloca i64, align 8 %i = alloca i64, align 8 %k = alloca i64, align 8 %is_a_median = alloca i8*, align 8 store %struct.Points* %points, %struct.Points** %points.addr, align 8 store %struct.Points* %centers, %struct.Points** %centers.addr, align 8 store i64* %centerIDs, i64** %centerIDs.addr, align 8 store i64 %offset, i64* %offset.addr, align 8 %0 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num = getelementptr inbounds %struct.Points, %struct.Points* %0, i32 0, i32 0 %1 = load i64, i64* %num, align 8 %call = call noalias i8* @calloc(i64 %1, i64 1) #2 store i8* %call, i8** %is_a_median, align 8 store i64 0, i64* %i, align 8 br label %for.cond for.cond: ; preds = %for.inc, %entry %2 = load i64, i64* %i, align 8 %3 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num1 = getelementptr inbounds %struct.Points, %struct.Points* %3, i32 0, i32 0 %4 = load i64, i64* %num1, align 8 %cmp = icmp slt i64 %2, %4 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %5 = load i8*, i8** %is_a_median, align 8 %6 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p = getelementptr inbounds %struct.Points, %struct.Points* %6, i32 0, i32 2 %7 = load %struct.Point*, %struct.Point** %p, align 8 %8 = load i64, i64* %i, align 8 %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %7, i64 %8 %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx, i32 0, i32 2 %9 = load i64, i64* %assign, align 8 %arrayidx2 = getelementptr inbounds i8, i8* %5, i64 %9 store i8 1, i8* %arrayidx2, align 1 br label %for.inc for.inc: ; preds = %for.body %10 = load i64, i64* %i, align 8 %inc = add nsw i64 %10, 1 store i64 %inc, i64* %i, align 8 br label %for.cond for.end: ; preds = %for.cond %11 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %num3 = getelementptr inbounds %struct.Points, %struct.Points* %11, i32 0, i32 0 %12 = load i64, i64* %num3, align 8 store i64 %12, i64* %k, align 8 store i64 0, i64* %i, align 8 br label %for.cond4 for.cond4: ; preds = %for.inc21, %for.end %13 = load i64, i64* %i, align 8 %14 = load %struct.Points*, %struct.Points** %points.addr, align 8 %num5 = getelementptr inbounds %struct.Points, %struct.Points* %14, i32 0, i32 0 %15 = load i64, i64* %num5, align 8 %cmp6 = icmp slt i64 %13, %15 br i1 %cmp6, label %for.body7, label %for.end23 for.body7: ; preds = %for.cond4 %16 = load i8*, i8** %is_a_median, align 8 %17 = load i64, i64* %i, align 8 %arrayidx8 = getelementptr inbounds i8, i8* %16, i64 %17 %18 = load i8, i8* %arrayidx8, align 1 %tobool = trunc i8 %18 to i1 br i1 %tobool, label %if.then, label %if.end if.then: ; preds = %for.body7 %19 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %p9 = getelementptr inbounds %struct.Points, %struct.Points* %19, i32 0, i32 2 %20 = load %struct.Point*, %struct.Point** %p9, align 8 %21 = load i64, i64* %k, align 8 %arrayidx10 = getelementptr inbounds %struct.Point, %struct.Point* %20, i64 %21 %coord = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx10, i32 0, i32 1 %22 = load float*, float** %coord, align 8 %23 = bitcast float* %22 to i8* %24 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p11 = getelementptr inbounds %struct.Points, %struct.Points* %24, i32 0, i32 2 %25 = load %struct.Point*, %struct.Point** %p11, align 8 %26 = load i64, i64* %i, align 8 %arrayidx12 = getelementptr inbounds %struct.Point, %struct.Point* %25, i64 %26 %coord13 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx12, i32 0, i32 1 %27 = load float*, float** %coord13, align 8 %28 = bitcast float* %27 to i8* %29 = load %struct.Points*, %struct.Points** %points.addr, align 8 %dim = getelementptr inbounds %struct.Points, %struct.Points* %29, i32 0, i32 1 %30 = load i32, i32* %dim, align 8 %conv = sext i32 %30 to i64 %mul = mul i64 %conv, 4 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %23, i8* align 4 %28, i64 %mul, i1 false) %31 = load %struct.Points*, %struct.Points** %points.addr, align 8 %p14 = getelementptr inbounds %struct.Points, %struct.Points* %31, i32 0, i32 2 %32 = load %struct.Point*, %struct.Point** %p14, align 8 %33 = load i64, i64* %i, align 8 %arrayidx15 = getelementptr inbounds %struct.Point, %struct.Point* %32, i64 %33 %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx15, i32 0, i32 0 %34 = load float, float* %weight, align 8 %35 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %p16 = getelementptr inbounds %struct.Points, %struct.Points* %35, i32 0, i32 2 %36 = load %struct.Point*, %struct.Point** %p16, align 8 %37 = load i64, i64* %k, align 8 %arrayidx17 = getelementptr inbounds %struct.Point, %struct.Point* %36, i64 %37 %weight18 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx17, i32 0, i32 0 store float %34, float* %weight18, align 8 %38 = load i64, i64* %i, align 8 %39 = load i64, i64* %offset.addr, align 8 %add = add nsw i64 %38, %39 %40 = load i64*, i64** %centerIDs.addr, align 8 %41 = load i64, i64* %k, align 8 %arrayidx19 = getelementptr inbounds i64, i64* %40, i64 %41 store i64 %add, i64* %arrayidx19, align 8 %42 = load i64, i64* %k, align 8 %inc20 = add nsw i64 %42, 1 store i64 %inc20, i64* %k, align 8 br label %if.end if.end: ; preds = %if.then, %for.body7 br label %for.inc21 for.inc21: ; preds = %if.end %43 = load i64, i64* %i, align 8 %inc22 = add nsw i64 %43, 1 store i64 %inc22, i64* %i, align 8 br label %for.cond4 for.end23: ; preds = %for.cond4 %44 = load i64, i64* %k, align 8 %45 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %num24 = getelementptr inbounds %struct.Points, %struct.Points* %45, i32 0, i32 0 store i64 %44, i64* %num24, align 8 %46 = load i8*, i8** %is_a_median, align 8 call void @free(i8* %46) #2 ret void } ; Function Attrs: noinline optnone uwtable define dso_local i8* @_Z14localSearchSubPv(i8* %arg_) #3 { entry: %arg_.addr = alloca i8*, align 8 %arg = alloca %struct.pkmedian_arg_t*, align 8 store i8* %arg_, i8** %arg_.addr, align 8 %0 = load i8*, i8** %arg_.addr, align 8 %1 = bitcast i8* %0 to %struct.pkmedian_arg_t* store %struct.pkmedian_arg_t* %1, %struct.pkmedian_arg_t** %arg, align 8 %2 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %points = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %2, i32 0, i32 0 %3 = load %struct.Points*, %struct.Points** %points, align 8 %4 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %kmin = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %4, i32 0, i32 1 %5 = load i64, i64* %kmin, align 8 %6 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %kmax = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %6, i32 0, i32 2 %7 = load i64, i64* %kmax, align 8 %8 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %kfinal = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %8, i32 0, i32 3 %9 = load i64*, i64** %kfinal, align 8 %10 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %pid = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %10, i32 0, i32 4 %11 = load i32, i32* %pid, align 8 %12 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %barrier = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %12, i32 0, i32 5 %13 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier, align 8 %call = call float @_Z8pkmedianP6PointsllPliP17pthread_barrier_t(%struct.Points* %3, i64 %5, i64 %7, i64* %9, i32 %11, %union.pthread_barrier_t* %13) ret i8* null } ; Function Attrs: noinline optnone uwtable define dso_local void @_Z11localSearchP6PointsllPl(%struct.Points* %points, i64 %kmin, i64 %kmax, i64* %kfinal) #3 { entry: %points.addr = alloca %struct.Points*, align 8 %kmin.addr = alloca i64, align 8 %kmax.addr = alloca i64, align 8 %kfinal.addr = alloca i64*, align 8 %t1 = alloca double, align 8 %barrier = alloca %union.pthread_barrier_t, align 8 %threads = alloca i64*, align 8 %arg = alloca %struct.pkmedian_arg_t*, align 8 %i = alloca i32, align 4 %i20 = alloca i32, align 4 %t2 = alloca double, align 8 store %struct.Points* %points, %struct.Points** %points.addr, align 8 store i64 %kmin, i64* %kmin.addr, align 8 store i64 %kmax, i64* %kmax.addr, align 8 store i64* %kfinal, i64** %kfinal.addr, align 8 %call = call double @_Z7gettimev() store double %call, double* %t1, align 8 %0 = load i32, i32* @_ZL5nproc, align 4 %1 = sext i32 %0 to i64 %2 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %1, i64 8) %3 = extractvalue { i64, i1 } %2, 1 %4 = extractvalue { i64, i1 } %2, 0 %5 = select i1 %3, i64 -1, i64 %4 %call1 = call i8* @_Znam(i64 %5) #16 %6 = bitcast i8* %call1 to i64* store i64* %6, i64** %threads, align 8 %7 = load i32, i32* @_ZL5nproc, align 4 %8 = sext i32 %7 to i64 %9 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %8, i64 48) %10 = extractvalue { i64, i1 } %9, 1 %11 = extractvalue { i64, i1 } %9, 0 %12 = select i1 %10, i64 -1, i64 %11 %call2 = call i8* @_Znam(i64 %12) #16 %13 = bitcast i8* %call2 to %struct.pkmedian_arg_t* store %struct.pkmedian_arg_t* %13, %struct.pkmedian_arg_t** %arg, align 8 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %entry %14 = load i32, i32* %i, align 4 %15 = load i32, i32* @_ZL5nproc, align 4 %cmp = icmp slt i32 %14, %15 br i1 %cmp, label %for.body, label %for.end for.body: ; preds = %for.cond %16 = load %struct.Points*, %struct.Points** %points.addr, align 8 %17 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %18 = load i32, i32* %i, align 4 %idxprom = sext i32 %18 to i64 %arrayidx = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %17, i64 %idxprom %points3 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx, i32 0, i32 0 store %struct.Points* %16, %struct.Points** %points3, align 8 %19 = load i64, i64* %kmin.addr, align 8 %20 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %21 = load i32, i32* %i, align 4 %idxprom4 = sext i32 %21 to i64 %arrayidx5 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %20, i64 %idxprom4 %kmin6 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx5, i32 0, i32 1 store i64 %19, i64* %kmin6, align 8 %22 = load i64, i64* %kmax.addr, align 8 %23 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %24 = load i32, i32* %i, align 4 %idxprom7 = sext i32 %24 to i64 %arrayidx8 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %23, i64 %idxprom7 %kmax9 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx8, i32 0, i32 2 store i64 %22, i64* %kmax9, align 8 %25 = load i32, i32* %i, align 4 %26 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %27 = load i32, i32* %i, align 4 %idxprom10 = sext i32 %27 to i64 %arrayidx11 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %26, i64 %idxprom10 %pid = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx11, i32 0, i32 4 store i32 %25, i32* %pid, align 8 %28 = load i64*, i64** %kfinal.addr, align 8 %29 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %30 = load i32, i32* %i, align 4 %idxprom12 = sext i32 %30 to i64 %arrayidx13 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %29, i64 %idxprom12 %kfinal14 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx13, i32 0, i32 3 store i64* %28, i64** %kfinal14, align 8 %31 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %32 = load i32, i32* %i, align 4 %idxprom15 = sext i32 %32 to i64 %arrayidx16 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %31, i64 %idxprom15 %barrier17 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx16, i32 0, i32 5 store %union.pthread_barrier_t* %barrier, %union.pthread_barrier_t** %barrier17, align 8 %33 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %arrayidx18 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %33, i64 0 %34 = bitcast %struct.pkmedian_arg_t* %arrayidx18 to i8* %call19 = call i8* @_Z14localSearchSubPv(i8* %34) br label %for.inc for.inc: ; preds = %for.body %35 = load i32, i32* %i, align 4 %inc = add nsw i32 %35, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond store i32 0, i32* %i20, align 4 br label %for.cond21 for.cond21: ; preds = %for.inc24, %for.end %36 = load i32, i32* %i20, align 4 %37 = load i32, i32* @_ZL5nproc, align 4 %cmp22 = icmp slt i32 %36, %37 br i1 %cmp22, label %for.body23, label %for.end26 for.body23: ; preds = %for.cond21 br label %for.inc24 for.inc24: ; preds = %for.body23 %38 = load i32, i32* %i20, align 4 %inc25 = add nsw i32 %38, 1 store i32 %inc25, i32* %i20, align 4 br label %for.cond21 for.end26: ; preds = %for.cond21 %39 = load i64*, i64** %threads, align 8 %isnull = icmp eq i64* %39, null br i1 %isnull, label %delete.end, label %delete.notnull delete.notnull: ; preds = %for.end26 %40 = bitcast i64* %39 to i8* call void @_ZdaPv(i8* %40) #17 br label %delete.end delete.end: ; preds = %delete.notnull, %for.end26 %41 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 %isnull27 = icmp eq %struct.pkmedian_arg_t* %41, null br i1 %isnull27, label %delete.end29, label %delete.notnull28 delete.notnull28: ; preds = %delete.end %42 = bitcast %struct.pkmedian_arg_t* %41 to i8* call void @_ZdaPv(i8* %42) #17 br label %delete.end29 delete.end29: ; preds = %delete.notnull28, %delete.end %call30 = call double @_Z7gettimev() store double %call30, double* %t2, align 8 %43 = load double, double* %t2, align 8 %44 = load double, double* %t1, align 8 %sub = fsub contract double %43, %44 %45 = load double, double* @time_local_search, align 8 %add = fadd contract double %45, %sub store double %add, double* @time_local_search, align 8 ret void } ; Function Attrs: nounwind readnone speculatable willreturn declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #8 ; Function Attrs: nobuiltin declare dso_local noalias i8* @_Znam(i64) #9 ; Function Attrs: nobuiltin nounwind declare dso_local void @_ZdaPv(i8*) #10 ; Function Attrs: noinline optnone uwtable define dso_local void @_Z12outcenterIDsP6PointsPlPc(%struct.Points* %centers, i64* %centerIDs, i8* %outfile) #3 { entry: %centers.addr = alloca %struct.Points*, align 8 %centerIDs.addr = alloca i64*, align 8 %outfile.addr = alloca i8*, align 8 %fp = alloca %struct._IO_FILE*, align 8 %is_a_median = alloca i32*, align 8 %i = alloca i32, align 4 %i6 = alloca i32, align 4 %k = alloca i32, align 4 store %struct.Points* %centers, %struct.Points** %centers.addr, align 8 store i64* %centerIDs, i64** %centerIDs.addr, align 8 store i8* %outfile, i8** %outfile.addr, align 8 %0 = load i8*, i8** %outfile.addr, align 8 %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i64 0, i64 0)) store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 %1 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %cmp = icmp eq %struct._IO_FILE* %1, null br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %3 = load i8*, i8** %outfile.addr, align 8 %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.5, i64 0, i64 0), i8* %3) call void @exit(i32 1) #15 unreachable if.end: ; preds = %entry %4 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %num = getelementptr inbounds %struct.Points, %struct.Points* %4, i32 0, i32 0 %5 = load i64, i64* %num, align 8 %call2 = call noalias i8* @calloc(i64 4, i64 %5) #2 %6 = bitcast i8* %call2 to i32* store i32* %6, i32** %is_a_median, align 8 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %if.end %7 = load i32, i32* %i, align 4 %conv = sext i32 %7 to i64 %8 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %num3 = getelementptr inbounds %struct.Points, %struct.Points* %8, i32 0, i32 0 %9 = load i64, i64* %num3, align 8 %cmp4 = icmp slt i64 %conv, %9 br i1 %cmp4, label %for.body, label %for.end for.body: ; preds = %for.cond %10 = load i32*, i32** %is_a_median, align 8 %11 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %p = getelementptr inbounds %struct.Points, %struct.Points* %11, i32 0, i32 2 %12 = load %struct.Point*, %struct.Point** %p, align 8 %13 = load i32, i32* %i, align 4 %idxprom = sext i32 %13 to i64 %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %12, i64 %idxprom %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx, i32 0, i32 2 %14 = load i64, i64* %assign, align 8 %arrayidx5 = getelementptr inbounds i32, i32* %10, i64 %14 store i32 1, i32* %arrayidx5, align 4 br label %for.inc for.inc: ; preds = %for.body %15 = load i32, i32* %i, align 4 %inc = add nsw i32 %15, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond store i32 0, i32* %i6, align 4 br label %for.cond7 for.cond7: ; preds = %for.inc38, %for.end %16 = load i32, i32* %i6, align 4 %conv8 = sext i32 %16 to i64 %17 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %num9 = getelementptr inbounds %struct.Points, %struct.Points* %17, i32 0, i32 0 %18 = load i64, i64* %num9, align 8 %cmp10 = icmp slt i64 %conv8, %18 br i1 %cmp10, label %for.body11, label %for.end40 for.body11: ; preds = %for.cond7 %19 = load i32*, i32** %is_a_median, align 8 %20 = load i32, i32* %i6, align 4 %idxprom12 = sext i32 %20 to i64 %arrayidx13 = getelementptr inbounds i32, i32* %19, i64 %idxprom12 %21 = load i32, i32* %arrayidx13, align 4 %tobool = icmp ne i32 %21, 0 br i1 %tobool, label %if.then14, label %if.end37 if.then14: ; preds = %for.body11 %22 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %23 = load i64*, i64** %centerIDs.addr, align 8 %24 = load i32, i32* %i6, align 4 %idxprom15 = sext i32 %24 to i64 %arrayidx16 = getelementptr inbounds i64, i64* %23, i64 %idxprom15 %25 = load i64, i64* %arrayidx16, align 8 %call17 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %22, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.6, i64 0, i64 0), i64 %25) %26 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %27 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %p18 = getelementptr inbounds %struct.Points, %struct.Points* %27, i32 0, i32 2 %28 = load %struct.Point*, %struct.Point** %p18, align 8 %29 = load i32, i32* %i6, align 4 %idxprom19 = sext i32 %29 to i64 %arrayidx20 = getelementptr inbounds %struct.Point, %struct.Point* %28, i64 %idxprom19 %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx20, i32 0, i32 0 %30 = load float, float* %weight, align 8 %conv21 = fpext float %30 to double %call22 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %26, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.7, i64 0, i64 0), double %conv21) store i32 0, i32* %k, align 4 br label %for.cond23 for.cond23: ; preds = %for.inc33, %if.then14 %31 = load i32, i32* %k, align 4 %32 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %dim = getelementptr inbounds %struct.Points, %struct.Points* %32, i32 0, i32 1 %33 = load i32, i32* %dim, align 8 %cmp24 = icmp slt i32 %31, %33 br i1 %cmp24, label %for.body25, label %for.end35 for.body25: ; preds = %for.cond23 %34 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %35 = load %struct.Points*, %struct.Points** %centers.addr, align 8 %p26 = getelementptr inbounds %struct.Points, %struct.Points* %35, i32 0, i32 2 %36 = load %struct.Point*, %struct.Point** %p26, align 8 %37 = load i32, i32* %i6, align 4 %idxprom27 = sext i32 %37 to i64 %arrayidx28 = getelementptr inbounds %struct.Point, %struct.Point* %36, i64 %idxprom27 %coord = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx28, i32 0, i32 1 %38 = load float*, float** %coord, align 8 %39 = load i32, i32* %k, align 4 %idxprom29 = sext i32 %39 to i64 %arrayidx30 = getelementptr inbounds float, float* %38, i64 %idxprom29 %40 = load float, float* %arrayidx30, align 4 %conv31 = fpext float %40 to double %call32 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %34, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.8, i64 0, i64 0), double %conv31) br label %for.inc33 for.inc33: ; preds = %for.body25 %41 = load i32, i32* %k, align 4 %inc34 = add nsw i32 %41, 1 store i32 %inc34, i32* %k, align 4 br label %for.cond23 for.end35: ; preds = %for.cond23 %42 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call36 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %42, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.9, i64 0, i64 0)) br label %if.end37 if.end37: ; preds = %for.end35, %for.body11 br label %for.inc38 for.inc38: ; preds = %if.end37 %43 = load i32, i32* %i6, align 4 %inc39 = add nsw i32 %43, 1 store i32 %inc39, i32* %i6, align 4 br label %for.cond7 for.end40: ; preds = %for.cond7 %44 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call41 = call i32 @fclose(%struct._IO_FILE* %44) ret void } ; Function Attrs: noinline optnone uwtable define dso_local void @_Z13streamClusterP7PStreamllillPc(%class.PStream* %stream, i64 %kmin, i64 %kmax, i32 %dim, i64 %chunksize, i64 %centersize, i8* %outfile) #3 { entry: %stream.addr = alloca %class.PStream*, align 8 %kmin.addr = alloca i64, align 8 %kmax.addr = alloca i64, align 8 %dim.addr = alloca i32, align 4 %chunksize.addr = alloca i64, align 8 %centersize.addr = alloca i64, align 8 %outfile.addr = alloca i8*, align 8 %block = alloca float*, align 8 %centerBlock = alloca float*, align 8 %centerIDs = alloca i64*, align 8 %points = alloca %struct.Points, align 8 %i = alloca i32, align 4 %centers = alloca %struct.Points, align 8 %i25 = alloca i32, align 4 %IDoffset = alloca i64, align 8 %kfinal = alloca i64, align 8 %numRead = alloca i64, align 8 %i60 = alloca i32, align 4 store %class.PStream* %stream, %class.PStream** %stream.addr, align 8 store i64 %kmin, i64* %kmin.addr, align 8 store i64 %kmax, i64* %kmax.addr, align 8 store i32 %dim, i32* %dim.addr, align 4 store i64 %chunksize, i64* %chunksize.addr, align 8 store i64 %centersize, i64* %centersize.addr, align 8 store i8* %outfile, i8** %outfile.addr, align 8 %0 = load i64, i64* %chunksize.addr, align 8 %1 = load i32, i32* %dim.addr, align 4 %conv = sext i32 %1 to i64 %mul = mul nsw i64 %0, %conv %mul1 = mul i64 %mul, 4 %call = call noalias i8* @malloc(i64 %mul1) #2 %2 = bitcast i8* %call to float* store float* %2, float** %block, align 8 %3 = load i64, i64* %centersize.addr, align 8 %4 = load i32, i32* %dim.addr, align 4 %conv2 = sext i32 %4 to i64 %mul3 = mul nsw i64 %3, %conv2 %mul4 = mul i64 %mul3, 4 %call5 = call noalias i8* @malloc(i64 %mul4) #2 %5 = bitcast i8* %call5 to float* store float* %5, float** %centerBlock, align 8 %6 = load i64, i64* %centersize.addr, align 8 %7 = load i32, i32* %dim.addr, align 4 %conv6 = sext i32 %7 to i64 %mul7 = mul nsw i64 %6, %conv6 %mul8 = mul i64 %mul7, 8 %call9 = call noalias i8* @malloc(i64 %mul8) #2 %8 = bitcast i8* %call9 to i64* store i64* %8, i64** %centerIDs, align 8 %9 = load float*, float** %block, align 8 %cmp = icmp eq float* %9, null br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %10 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call10 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str.10, i64 0, i64 0)) call void @exit(i32 1) #15 unreachable if.end: ; preds = %entry %11 = load i32, i32* %dim.addr, align 4 %dim11 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 1 store i32 %11, i32* %dim11, align 8 %12 = load i64, i64* %chunksize.addr, align 8 %num = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 store i64 %12, i64* %num, align 8 %13 = load i64, i64* %chunksize.addr, align 8 %mul12 = mul i64 %13, 32 %call13 = call noalias i8* @malloc(i64 %mul12) #2 %14 = bitcast i8* %call13 to %struct.Point* %p = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 2 store %struct.Point* %14, %struct.Point** %p, align 8 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc, %if.end %15 = load i32, i32* %i, align 4 %conv14 = sext i32 %15 to i64 %16 = load i64, i64* %chunksize.addr, align 8 %cmp15 = icmp slt i64 %conv14, %16 br i1 %cmp15, label %for.body, label %for.end for.body: ; preds = %for.cond %17 = load float*, float** %block, align 8 %18 = load i32, i32* %i, align 4 %19 = load i32, i32* %dim.addr, align 4 %mul16 = mul nsw i32 %18, %19 %idxprom = sext i32 %mul16 to i64 %arrayidx = getelementptr inbounds float, float* %17, i64 %idxprom %p17 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 2 %20 = load %struct.Point*, %struct.Point** %p17, align 8 %21 = load i32, i32* %i, align 4 %idxprom18 = sext i32 %21 to i64 %arrayidx19 = getelementptr inbounds %struct.Point, %struct.Point* %20, i64 %idxprom18 %coord = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx19, i32 0, i32 1 store float* %arrayidx, float** %coord, align 8 br label %for.inc for.inc: ; preds = %for.body %22 = load i32, i32* %i, align 4 %inc = add nsw i32 %22, 1 store i32 %inc, i32* %i, align 4 br label %for.cond for.end: ; preds = %for.cond %23 = load i32, i32* %dim.addr, align 4 %dim20 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 1 store i32 %23, i32* %dim20, align 8 %24 = load i64, i64* %centersize.addr, align 8 %mul21 = mul i64 %24, 32 %call22 = call noalias i8* @malloc(i64 %mul21) #2 %25 = bitcast i8* %call22 to %struct.Point* %p23 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 2 store %struct.Point* %25, %struct.Point** %p23, align 8 %num24 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 0 store i64 0, i64* %num24, align 8 store i32 0, i32* %i25, align 4 br label %for.cond26 for.cond26: ; preds = %for.inc40, %for.end %26 = load i32, i32* %i25, align 4 %conv27 = sext i32 %26 to i64 %27 = load i64, i64* %centersize.addr, align 8 %cmp28 = icmp slt i64 %conv27, %27 br i1 %cmp28, label %for.body29, label %for.end42 for.body29: ; preds = %for.cond26 %28 = load float*, float** %centerBlock, align 8 %29 = load i32, i32* %i25, align 4 %30 = load i32, i32* %dim.addr, align 4 %mul30 = mul nsw i32 %29, %30 %idxprom31 = sext i32 %mul30 to i64 %arrayidx32 = getelementptr inbounds float, float* %28, i64 %idxprom31 %p33 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 2 %31 = load %struct.Point*, %struct.Point** %p33, align 8 %32 = load i32, i32* %i25, align 4 %idxprom34 = sext i32 %32 to i64 %arrayidx35 = getelementptr inbounds %struct.Point, %struct.Point* %31, i64 %idxprom34 %coord36 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx35, i32 0, i32 1 store float* %arrayidx32, float** %coord36, align 8 %p37 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 2 %33 = load %struct.Point*, %struct.Point** %p37, align 8 %34 = load i32, i32* %i25, align 4 %idxprom38 = sext i32 %34 to i64 %arrayidx39 = getelementptr inbounds %struct.Point, %struct.Point* %33, i64 %idxprom38 %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx39, i32 0, i32 0 store float 1.000000e+00, float* %weight, align 8 br label %for.inc40 for.inc40: ; preds = %for.body29 %35 = load i32, i32* %i25, align 4 %inc41 = add nsw i32 %35, 1 store i32 %inc41, i32* %i25, align 4 br label %for.cond26 for.end42: ; preds = %for.cond26 store i64 0, i64* %IDoffset, align 8 br label %while.body while.body: ; preds = %for.end42, %if.end94 %36 = load %class.PStream*, %class.PStream** %stream.addr, align 8 %37 = load float*, float** %block, align 8 %38 = load i32, i32* %dim.addr, align 4 %39 = load i64, i64* %chunksize.addr, align 8 %conv43 = trunc i64 %39 to i32 %40 = bitcast %class.PStream* %36 to i64 (%class.PStream*, float*, i32, i32)*** %vtable = load i64 (%class.PStream*, float*, i32, i32)**, i64 (%class.PStream*, float*, i32, i32)*** %40, align 8 %vfn = getelementptr inbounds i64 (%class.PStream*, float*, i32, i32)*, i64 (%class.PStream*, float*, i32, i32)** %vtable, i64 0 %41 = load i64 (%class.PStream*, float*, i32, i32)*, i64 (%class.PStream*, float*, i32, i32)** %vfn, align 8 %call44 = call i64 %41(%class.PStream* %36, float* %37, i32 %38, i32 %conv43) store i64 %call44, i64* %numRead, align 8 %42 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %43 = load i64, i64* %numRead, align 8 %call45 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %42, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.11, i64 0, i64 0), i64 %43) %44 = load %class.PStream*, %class.PStream** %stream.addr, align 8 %45 = bitcast %class.PStream* %44 to i32 (%class.PStream*)*** %vtable46 = load i32 (%class.PStream*)**, i32 (%class.PStream*)*** %45, align 8 %vfn47 = getelementptr inbounds i32 (%class.PStream*)*, i32 (%class.PStream*)** %vtable46, i64 1 %46 = load i32 (%class.PStream*)*, i32 (%class.PStream*)** %vfn47, align 8 %call48 = call i32 %46(%class.PStream* %44) %tobool = icmp ne i32 %call48, 0 br i1 %tobool, label %if.then56, label %lor.lhs.false lor.lhs.false: ; preds = %while.body %47 = load i64, i64* %numRead, align 8 %48 = load i64, i64* %chunksize.addr, align 8 %conv49 = trunc i64 %48 to i32 %conv50 = zext i32 %conv49 to i64 %cmp51 = icmp ult i64 %47, %conv50 br i1 %cmp51, label %land.lhs.true, label %if.end58 land.lhs.true: ; preds = %lor.lhs.false %49 = load %class.PStream*, %class.PStream** %stream.addr, align 8 %50 = bitcast %class.PStream* %49 to i32 (%class.PStream*)*** %vtable52 = load i32 (%class.PStream*)**, i32 (%class.PStream*)*** %50, align 8 %vfn53 = getelementptr inbounds i32 (%class.PStream*)*, i32 (%class.PStream*)** %vtable52, i64 2 %51 = load i32 (%class.PStream*)*, i32 (%class.PStream*)** %vfn53, align 8 %call54 = call i32 %51(%class.PStream* %49) %tobool55 = icmp ne i32 %call54, 0 br i1 %tobool55, label %if.end58, label %if.then56 if.then56: ; preds = %land.lhs.true, %while.body %52 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call57 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %52, i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.12, i64 0, i64 0)) call void @exit(i32 1) #15 unreachable if.end58: ; preds = %land.lhs.true, %lor.lhs.false %53 = load i64, i64* %numRead, align 8 %num59 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 store i64 %53, i64* %num59, align 8 store i32 0, i32* %i60, align 4 br label %for.cond61 for.cond61: ; preds = %for.inc70, %if.end58 %54 = load i32, i32* %i60, align 4 %conv62 = sext i32 %54 to i64 %num63 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 %55 = load i64, i64* %num63, align 8 %cmp64 = icmp slt i64 %conv62, %55 br i1 %cmp64, label %for.body65, label %for.end72 for.body65: ; preds = %for.cond61 %p66 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 2 %56 = load %struct.Point*, %struct.Point** %p66, align 8 %57 = load i32, i32* %i60, align 4 %idxprom67 = sext i32 %57 to i64 %arrayidx68 = getelementptr inbounds %struct.Point, %struct.Point* %56, i64 %idxprom67 %weight69 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx68, i32 0, i32 0 store float 1.000000e+00, float* %weight69, align 8 br label %for.inc70 for.inc70: ; preds = %for.body65 %58 = load i32, i32* %i60, align 4 %inc71 = add nsw i32 %58, 1 store i32 %inc71, i32* %i60, align 4 br label %for.cond61 for.end72: ; preds = %for.cond61 %num73 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 %59 = load i64, i64* %num73, align 8 %mul74 = mul i64 %59, 1 %call75 = call noalias i8* @malloc(i64 %mul74) #2 store i8* %call75, i8** @_ZL17switch_membership, align 8 %num76 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 %60 = load i64, i64* %num76, align 8 %call77 = call noalias i8* @calloc(i64 %60, i64 1) #2 store i8* %call77, i8** @_ZL9is_center, align 8 %num78 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 %61 = load i64, i64* %num78, align 8 %mul79 = mul i64 %61, 4 %call80 = call noalias i8* @malloc(i64 %mul79) #2 %62 = bitcast i8* %call80 to i32* store i32* %62, i32** @_ZL12center_table, align 8 %63 = load i64, i64* %kmin.addr, align 8 %64 = load i64, i64* %kmax.addr, align 8 call void @_Z11localSearchP6PointsllPl(%struct.Points* %points, i64 %63, i64 %64, i64* %kfinal) %65 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call81 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %65, i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.13, i64 0, i64 0)) %call82 = call i32 @_Z11contcentersP6Points(%struct.Points* %points) store i8 1, i8* @isCoordChanged, align 1 %66 = load i64, i64* %kfinal, align 8 %num83 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 0 %67 = load i64, i64* %num83, align 8 %add = add nsw i64 %66, %67 %68 = load i64, i64* %centersize.addr, align 8 %cmp84 = icmp sgt i64 %add, %68 br i1 %cmp84, label %if.then85, label %if.end87 if.then85: ; preds = %for.end72 %69 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call86 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %69, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.14, i64 0, i64 0)) call void @exit(i32 1) #15 unreachable if.end87: ; preds = %for.end72 %70 = load i64*, i64** %centerIDs, align 8 %71 = load i64, i64* %IDoffset, align 8 call void @_Z11copycentersP6PointsS0_Pll(%struct.Points* %points, %struct.Points* %centers, i64* %70, i64 %71) %72 = load i64, i64* %numRead, align 8 %73 = load i64, i64* %IDoffset, align 8 %add88 = add i64 %73, %72 store i64 %add88, i64* %IDoffset, align 8 %74 = load i8*, i8** @_ZL9is_center, align 8 call void @free(i8* %74) #2 %75 = load i8*, i8** @_ZL17switch_membership, align 8 call void @free(i8* %75) #2 %76 = load i32*, i32** @_ZL12center_table, align 8 %77 = bitcast i32* %76 to i8* call void @free(i8* %77) #2 %78 = load %class.PStream*, %class.PStream** %stream.addr, align 8 %79 = bitcast %class.PStream* %78 to i32 (%class.PStream*)*** %vtable89 = load i32 (%class.PStream*)**, i32 (%class.PStream*)*** %79, align 8 %vfn90 = getelementptr inbounds i32 (%class.PStream*)*, i32 (%class.PStream*)** %vtable89, i64 2 %80 = load i32 (%class.PStream*)*, i32 (%class.PStream*)** %vfn90, align 8 %call91 = call i32 %80(%class.PStream* %78) %tobool92 = icmp ne i32 %call91, 0 br i1 %tobool92, label %if.then93, label %if.end94 if.then93: ; preds = %if.end87 br label %while.end if.end94: ; preds = %if.end87 br label %while.body while.end: ; preds = %if.then93 %num95 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 0 %81 = load i64, i64* %num95, align 8 %mul96 = mul i64 %81, 1 %call97 = call noalias i8* @malloc(i64 %mul96) #2 store i8* %call97, i8** @_ZL17switch_membership, align 8 %num98 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 0 %82 = load i64, i64* %num98, align 8 %call99 = call noalias i8* @calloc(i64 %82, i64 1) #2 store i8* %call99, i8** @_ZL9is_center, align 8 %num100 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 0 %83 = load i64, i64* %num100, align 8 %mul101 = mul i64 %83, 4 %call102 = call noalias i8* @malloc(i64 %mul101) #2 %84 = bitcast i8* %call102 to i32* store i32* %84, i32** @_ZL12center_table, align 8 %85 = load i64, i64* %kmin.addr, align 8 %86 = load i64, i64* %kmax.addr, align 8 call void @_Z11localSearchP6PointsllPl(%struct.Points* %centers, i64 %85, i64 %86, i64* %kfinal) %call103 = call i32 @_Z11contcentersP6Points(%struct.Points* %centers) %87 = load i64*, i64** %centerIDs, align 8 %88 = load i8*, i8** %outfile.addr, align 8 call void @_Z12outcenterIDsP6PointsPlPc(%struct.Points* %centers, i64* %87, i8* %88) ret void } ; Function Attrs: noinline norecurse optnone uwtable define dso_local i32 @main(i32 %argc, i8** %argv) #11 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: %retval = alloca i32, align 4 %argc.addr = alloca i32, align 4 %argv.addr = alloca i8**, align 8 %outfilename = alloca i8*, align 8 %infilename = alloca i8*, align 8 %kmin = alloca i64, align 8 %kmax = alloca i64, align 8 %n = alloca i64, align 8 %chunksize = alloca i64, align 8 %clustersize = alloca i64, align 8 %dim = alloca i32, align 4 %stream = alloca %class.PStream*, align 8 %exn.slot = alloca i8* %ehselector.slot = alloca i32 %t1 = alloca double, align 8 %t2 = alloca double, align 8 store i32 0, i32* %retval, align 4 store i32 %argc, i32* %argc.addr, align 4 store i8** %argv, i8*** %argv.addr, align 8 %call = call i32 @cudaSetDevice(i32 0) %call1 = call i8* @_Znam(i64 1024) #16 store i8* %call1, i8** %outfilename, align 8 %call2 = call i8* @_Znam(i64 1024) #16 store i8* %call2, i8** %infilename, align 8 %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.15, i64 0, i64 0)) %call4 = call i32 @fflush(%struct._IO_FILE* null) %0 = load i32, i32* %argc.addr, align 4 %cmp = icmp slt i32 %0, 10 br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %entry %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %2 = load i8**, i8*** %argv.addr, align 8 %arrayidx = getelementptr inbounds i8*, i8** %2, i64 0 %3 = load i8*, i8** %arrayidx, align 8 %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @.str.16, i64 0, i64 0), i8* %3) %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call6 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.17, i64 0, i64 0)) %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call7 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.18, i64 0, i64 0)) %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call8 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([45 x i8], [45 x i8]* @.str.19, i64 0, i64 0)) %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([38 x i8], [38 x i8]* @.str.20, i64 0, i64 0)) %8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call10 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([57 x i8], [57 x i8]* @.str.21, i64 0, i64 0)) %9 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call11 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([55 x i8], [55 x i8]* @.str.22, i64 0, i64 0)) %10 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call12 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %10, i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.23, i64 0, i64 0)) %11 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call13 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %11, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.24, i64 0, i64 0)) %12 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call14 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %12, i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.25, i64 0, i64 0)) %13 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call15 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %13, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.26, i64 0, i64 0)) %14 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %call16 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %14, i8* getelementptr inbounds ([77 x i8], [77 x i8]* @.str.27, i64 0, i64 0)) call void @exit(i32 1) #15 unreachable if.end: ; preds = %entry %15 = load i8**, i8*** %argv.addr, align 8 %arrayidx17 = getelementptr inbounds i8*, i8** %15, i64 1 %16 = load i8*, i8** %arrayidx17, align 8 %call18 = call i32 @atoi(i8* %16) #18 %conv = sext i32 %call18 to i64 store i64 %conv, i64* %kmin, align 8 %17 = load i8**, i8*** %argv.addr, align 8 %arrayidx19 = getelementptr inbounds i8*, i8** %17, i64 2 %18 = load i8*, i8** %arrayidx19, align 8 %call20 = call i32 @atoi(i8* %18) #18 %conv21 = sext i32 %call20 to i64 store i64 %conv21, i64* %kmax, align 8 %19 = load i8**, i8*** %argv.addr, align 8 %arrayidx22 = getelementptr inbounds i8*, i8** %19, i64 3 %20 = load i8*, i8** %arrayidx22, align 8 %call23 = call i32 @atoi(i8* %20) #18 store i32 %call23, i32* %dim, align 4 %21 = load i8**, i8*** %argv.addr, align 8 %arrayidx24 = getelementptr inbounds i8*, i8** %21, i64 4 %22 = load i8*, i8** %arrayidx24, align 8 %call25 = call i32 @atoi(i8* %22) #18 %conv26 = sext i32 %call25 to i64 store i64 %conv26, i64* %n, align 8 %23 = load i8**, i8*** %argv.addr, align 8 %arrayidx27 = getelementptr inbounds i8*, i8** %23, i64 5 %24 = load i8*, i8** %arrayidx27, align 8 %call28 = call i32 @atoi(i8* %24) #18 %conv29 = sext i32 %call28 to i64 store i64 %conv29, i64* %chunksize, align 8 %25 = load i8**, i8*** %argv.addr, align 8 %arrayidx30 = getelementptr inbounds i8*, i8** %25, i64 6 %26 = load i8*, i8** %arrayidx30, align 8 %call31 = call i32 @atoi(i8* %26) #18 %conv32 = sext i32 %call31 to i64 store i64 %conv32, i64* %clustersize, align 8 %27 = load i8*, i8** %infilename, align 8 %28 = load i8**, i8*** %argv.addr, align 8 %arrayidx33 = getelementptr inbounds i8*, i8** %28, i64 7 %29 = load i8*, i8** %arrayidx33, align 8 %call34 = call i8* @strcpy(i8* %27, i8* %29) %30 = load i8*, i8** %outfilename, align 8 %31 = load i8**, i8*** %argv.addr, align 8 %arrayidx35 = getelementptr inbounds i8*, i8** %31, i64 8 %32 = load i8*, i8** %arrayidx35, align 8 %call36 = call i8* @strcpy(i8* %30, i8* %32) %33 = load i8**, i8*** %argv.addr, align 8 %arrayidx37 = getelementptr inbounds i8*, i8** %33, i64 9 %34 = load i8*, i8** %arrayidx37, align 8 %call38 = call i32 @atoi(i8* %34) #18 store i32 %call38, i32* @_ZL5nproc, align 4 call void @srand48(i64 1) #2 %35 = load i64, i64* %n, align 8 %cmp39 = icmp sgt i64 %35, 0 br i1 %cmp39, label %if.then40, label %if.else if.then40: ; preds = %if.end %call41 = call i8* @_Znwm(i64 16) #16 %36 = bitcast i8* %call41 to %class.SimStream* %37 = load i64, i64* %n, align 8 invoke void @_ZN9SimStreamC2El(%class.SimStream* %36, i64 %37) to label %invoke.cont unwind label %lpad invoke.cont: ; preds = %if.then40 %38 = bitcast %class.SimStream* %36 to %class.PStream* store %class.PStream* %38, %class.PStream** %stream, align 8 br label %if.end45 lpad: ; preds = %if.then40 %39 = landingpad { i8*, i32 } cleanup %40 = extractvalue { i8*, i32 } %39, 0 store i8* %40, i8** %exn.slot, align 8 %41 = extractvalue { i8*, i32 } %39, 1 store i32 %41, i32* %ehselector.slot, align 4 call void @_ZdlPv(i8* %call41) #17 br label %eh.resume if.else: ; preds = %if.end %call42 = call i8* @_Znwm(i64 16) #16 %42 = bitcast i8* %call42 to %class.FileStream* %43 = load i8*, i8** %infilename, align 8 invoke void @_ZN10FileStreamC2EPc(%class.FileStream* %42, i8* %43) to label %invoke.cont44 unwind label %lpad43 invoke.cont44: ; preds = %if.else %44 = bitcast %class.FileStream* %42 to %class.PStream* store %class.PStream* %44, %class.PStream** %stream, align 8 br label %if.end45 lpad43: ; preds = %if.else %45 = landingpad { i8*, i32 } cleanup %46 = extractvalue { i8*, i32 } %45, 0 store i8* %46, i8** %exn.slot, align 8 %47 = extractvalue { i8*, i32 } %45, 1 store i32 %47, i32* %ehselector.slot, align 4 call void @_ZdlPv(i8* %call42) #17 br label %eh.resume if.end45: ; preds = %invoke.cont44, %invoke.cont %call46 = call double @_Z7gettimev() store double %call46, double* %t1, align 8 store double 0.000000e+00, double* @serial_t, align 8 store double 0.000000e+00, double* @cpu_to_gpu_t, align 8 store double 0.000000e+00, double* @gpu_to_cpu_t, align 8 store double 0.000000e+00, double* @alloc_t, align 8 store double 0.000000e+00, double* @free_t, align 8 store double 0.000000e+00, double* @kernel_t, align 8 store i8 0, i8* @isCoordChanged, align 1 %48 = load %class.PStream*, %class.PStream** %stream, align 8 %49 = load i64, i64* %kmin, align 8 %50 = load i64, i64* %kmax, align 8 %51 = load i32, i32* %dim, align 4 %52 = load i64, i64* %chunksize, align 8 %53 = load i64, i64* %clustersize, align 8 %54 = load i8*, i8** %outfilename, align 8 call void @_Z13streamClusterP7PStreamllillPc(%class.PStream* %48, i64 %49, i64 %50, i32 %51, i64 %52, i64 %53, i8* %54) call void @_Z10freeDevMemv() call void @_Z11freeHostMemv() %call47 = call double @_Z7gettimev() store double %call47, double* %t2, align 8 %55 = load double, double* %t2, align 8 %56 = load double, double* %t1, align 8 %sub = fsub contract double %55, %56 %call48 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.28, i64 0, i64 0), double %sub) %57 = load %class.PStream*, %class.PStream** %stream, align 8 %isnull = icmp eq %class.PStream* %57, null br i1 %isnull, label %delete.end, label %delete.notnull delete.notnull: ; preds = %if.end45 %58 = bitcast %class.PStream* %57 to void (%class.PStream*)*** %vtable = load void (%class.PStream*)**, void (%class.PStream*)*** %58, align 8 %vfn = getelementptr inbounds void (%class.PStream*)*, void (%class.PStream*)** %vtable, i64 4 %59 = load void (%class.PStream*)*, void (%class.PStream*)** %vfn, align 8 call void %59(%class.PStream* %57) br label %delete.end delete.end: ; preds = %delete.notnull, %if.end45 %60 = load double, double* @time_gain, align 8 %call49 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.29, i64 0, i64 0), double %60) %61 = load double, double* @time_gain_dist, align 8 %call50 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.30, i64 0, i64 0), double %61) %62 = load double, double* @time_gain_init, align 8 %call51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.31, i64 0, i64 0), double %62) %63 = load double, double* @time_select_feasible, align 8 %call52 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.32, i64 0, i64 0), double %63) %64 = load double, double* @time_speedy, align 8 %call53 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.33, i64 0, i64 0), double %64) %65 = load double, double* @time_shuffle, align 8 %call54 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.34, i64 0, i64 0), double %65) %66 = load double, double* @time_local_search, align 8 %call55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.35, i64 0, i64 0), double %66) %call56 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.9, i64 0, i64 0)) %call57 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.36, i64 0, i64 0)) %67 = load double, double* @serial_t, align 8 %div = fdiv double %67, 1.000000e+03 %call58 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.37, i64 0, i64 0), double %div) %68 = load double, double* @cpu_to_gpu_t, align 8 %div59 = fdiv double %68, 1.000000e+03 %call60 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.38, i64 0, i64 0), double %div59) %69 = load double, double* @gpu_to_cpu_t, align 8 %div61 = fdiv double %69, 1.000000e+03 %call62 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.39, i64 0, i64 0), double %div61) %70 = load double, double* @alloc_t, align 8 %div63 = fdiv double %70, 1.000000e+03 %call64 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.40, i64 0, i64 0), double %div63) %71 = load double, double* @free_t, align 8 %div65 = fdiv double %71, 1.000000e+03 %call66 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.41, i64 0, i64 0), double %div65) %72 = load double, double* @kernel_t, align 8 %div67 = fdiv double %72, 1.000000e+03 %call68 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.42, i64 0, i64 0), double %div67) ret i32 0 eh.resume: ; preds = %lpad43, %lpad %exn = load i8*, i8** %exn.slot, align 8 %sel = load i32, i32* %ehselector.slot, align 4 %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 %lpad.val69 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 resume { i8*, i32 } %lpad.val69 } declare dso_local i32 @cudaSetDevice(i32) #1 declare dso_local i32 @fflush(%struct._IO_FILE*) #1 ; Function Attrs: nounwind readonly declare dso_local i32 @atoi(i8*) #12 declare dso_local i8* @strcpy(i8*, i8*) #1 ; Function Attrs: nounwind declare dso_local void @srand48(i64) #7 ; Function Attrs: nobuiltin declare dso_local noalias i8* @_Znwm(i64) #9 ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN9SimStreamC2El(%class.SimStream* %this, i64 %n_) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %class.SimStream*, align 8 %n_.addr = alloca i64, align 8 store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 store i64 %n_, i64* %n_.addr, align 8 %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 %0 = bitcast %class.SimStream* %this1 to %class.PStream* call void @_ZN7PStreamC2Ev(%class.PStream* %0) #2 %1 = bitcast %class.SimStream* %this1 to i32 (...)*** store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*] }, { [7 x i8*] }* @_ZTV9SimStream, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %1, align 8 %2 = load i64, i64* %n_.addr, align 8 %n = getelementptr inbounds %class.SimStream, %class.SimStream* %this1, i32 0, i32 1 store i64 %2, i64* %n, align 8 ret void } declare dso_local i32 @__gxx_personality_v0(...) ; Function Attrs: nobuiltin nounwind declare dso_local void @_ZdlPv(i8*) #10 ; Function Attrs: noinline optnone uwtable define linkonce_odr dso_local void @_ZN10FileStreamC2EPc(%class.FileStream* %this, i8* %filename) unnamed_addr #3 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: %this.addr = alloca %class.FileStream*, align 8 %filename.addr = alloca i8*, align 8 %exn.slot = alloca i8* %ehselector.slot = alloca i32 store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 store i8* %filename, i8** %filename.addr, align 8 %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 %0 = bitcast %class.FileStream* %this1 to %class.PStream* call void @_ZN7PStreamC2Ev(%class.PStream* %0) #2 %1 = bitcast %class.FileStream* %this1 to i32 (...)*** store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*] }, { [7 x i8*] }* @_ZTV10FileStream, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %1, align 8 %2 = load i8*, i8** %filename.addr, align 8 %call = invoke %struct._IO_FILE* @fopen(i8* %2, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.43, i64 0, i64 0)) to label %invoke.cont unwind label %lpad invoke.cont: ; preds = %entry %fp = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 %fp2 = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 %3 = load %struct._IO_FILE*, %struct._IO_FILE** %fp2, align 8 %cmp = icmp eq %struct._IO_FILE* %3, null br i1 %cmp, label %if.then, label %if.end if.then: ; preds = %invoke.cont %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 %5 = load i8*, i8** %filename.addr, align 8 %call4 = invoke i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.44, i64 0, i64 0), i8* %5) to label %invoke.cont3 unwind label %lpad invoke.cont3: ; preds = %if.then call void @exit(i32 1) #15 unreachable lpad: ; preds = %if.then, %entry %6 = landingpad { i8*, i32 } cleanup %7 = extractvalue { i8*, i32 } %6, 0 store i8* %7, i8** %exn.slot, align 8 %8 = extractvalue { i8*, i32 } %6, 1 store i32 %8, i32* %ehselector.slot, align 4 %9 = bitcast %class.FileStream* %this1 to %class.PStream* invoke void @_ZN7PStreamD2Ev(%class.PStream* %9) to label %invoke.cont5 unwind label %terminate.lpad if.end: ; preds = %invoke.cont ret void invoke.cont5: ; preds = %lpad br label %eh.resume eh.resume: ; preds = %invoke.cont5 %exn = load i8*, i8** %exn.slot, align 8 %sel = load i32, i32* %ehselector.slot, align 4 %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 %lpad.val6 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 resume { i8*, i32 } %lpad.val6 terminate.lpad: ; preds = %lpad %10 = landingpad { i8*, i32 } catch i8* null %11 = extractvalue { i8*, i32 } %10, 0 call void @__clang_call_terminate(i8* %11) #15 unreachable } ; Function Attrs: nounwind declare dso_local float @logf(float) #7 ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN7PStreamC2Ev(%class.PStream* %this) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %class.PStream*, align 8 store %class.PStream* %this, %class.PStream** %this.addr, align 8 %this1 = load %class.PStream*, %class.PStream** %this.addr, align 8 %0 = bitcast %class.PStream* %this1 to i32 (...)*** store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*] }, { [7 x i8*] }* @_ZTV7PStream, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %0, align 8 ret void } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local i64 @_ZN9SimStream4readEPfii(%class.SimStream* %this, float* %dest, i32 %dim, i32 %num) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %class.SimStream*, align 8 %dest.addr = alloca float*, align 8 %dim.addr = alloca i32, align 4 %num.addr = alloca i32, align 4 %count = alloca i64, align 8 %i = alloca i32, align 4 %k = alloca i32, align 4 store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 store float* %dest, float** %dest.addr, align 8 store i32 %dim, i32* %dim.addr, align 4 store i32 %num, i32* %num.addr, align 4 %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 store i64 0, i64* %count, align 8 store i32 0, i32* %i, align 4 br label %for.cond for.cond: ; preds = %for.inc8, %entry %0 = load i32, i32* %i, align 4 %1 = load i32, i32* %num.addr, align 4 %cmp = icmp slt i32 %0, %1 br i1 %cmp, label %land.rhs, label %land.end land.rhs: ; preds = %for.cond %n = getelementptr inbounds %class.SimStream, %class.SimStream* %this1, i32 0, i32 1 %2 = load i64, i64* %n, align 8 %cmp2 = icmp sgt i64 %2, 0 br label %land.end land.end: ; preds = %land.rhs, %for.cond %3 = phi i1 [ false, %for.cond ], [ %cmp2, %land.rhs ] br i1 %3, label %for.body, label %for.end10 for.body: ; preds = %land.end store i32 0, i32* %k, align 4 br label %for.cond3 for.cond3: ; preds = %for.inc, %for.body %4 = load i32, i32* %k, align 4 %5 = load i32, i32* %dim.addr, align 4 %cmp4 = icmp slt i32 %4, %5 br i1 %cmp4, label %for.body5, label %for.end for.body5: ; preds = %for.cond3 %call = call i64 @lrand48() #2 %conv = sitofp i64 %call to float %div = fdiv float %conv, 0x41E0000000000000 %6 = load float*, float** %dest.addr, align 8 %7 = load i32, i32* %i, align 4 %8 = load i32, i32* %dim.addr, align 4 %mul = mul nsw i32 %7, %8 %9 = load i32, i32* %k, align 4 %add = add nsw i32 %mul, %9 %idxprom = sext i32 %add to i64 %arrayidx = getelementptr inbounds float, float* %6, i64 %idxprom store float %div, float* %arrayidx, align 4 br label %for.inc for.inc: ; preds = %for.body5 %10 = load i32, i32* %k, align 4 %inc = add nsw i32 %10, 1 store i32 %inc, i32* %k, align 4 br label %for.cond3 for.end: ; preds = %for.cond3 %n6 = getelementptr inbounds %class.SimStream, %class.SimStream* %this1, i32 0, i32 1 %11 = load i64, i64* %n6, align 8 %dec = add nsw i64 %11, -1 store i64 %dec, i64* %n6, align 8 %12 = load i64, i64* %count, align 8 %inc7 = add i64 %12, 1 store i64 %inc7, i64* %count, align 8 br label %for.inc8 for.inc8: ; preds = %for.end %13 = load i32, i32* %i, align 4 %inc9 = add nsw i32 %13, 1 store i32 %inc9, i32* %i, align 4 br label %for.cond for.end10: ; preds = %land.end %14 = load i64, i64* %count, align 8 ret i64 %14 } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local i32 @_ZN9SimStream6ferrorEv(%class.SimStream* %this) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %class.SimStream*, align 8 store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 ret i32 0 } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local i32 @_ZN9SimStream4feofEv(%class.SimStream* %this) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %class.SimStream*, align 8 store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 %n = getelementptr inbounds %class.SimStream, %class.SimStream* %this1, i32 0, i32 1 %0 = load i64, i64* %n, align 8 %cmp = icmp sle i64 %0, 0 %conv = zext i1 %cmp to i32 ret i32 %conv } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN9SimStreamD2Ev(%class.SimStream* %this) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %class.SimStream*, align 8 store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 %0 = bitcast %class.SimStream* %this1 to %class.PStream* call void @_ZN7PStreamD2Ev(%class.PStream* %0) ret void } ; Function Attrs: noinline optnone uwtable define linkonce_odr dso_local void @_ZN9SimStreamD0Ev(%class.SimStream* %this) unnamed_addr #3 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: %this.addr = alloca %class.SimStream*, align 8 %exn.slot = alloca i8* %ehselector.slot = alloca i32 store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 invoke void @_ZN9SimStreamD2Ev(%class.SimStream* %this1) to label %invoke.cont unwind label %lpad invoke.cont: ; preds = %entry %0 = bitcast %class.SimStream* %this1 to i8* call void @_ZdlPv(i8* %0) #17 ret void lpad: ; preds = %entry %1 = landingpad { i8*, i32 } cleanup %2 = extractvalue { i8*, i32 } %1, 0 store i8* %2, i8** %exn.slot, align 8 %3 = extractvalue { i8*, i32 } %1, 1 store i32 %3, i32* %ehselector.slot, align 4 %4 = bitcast %class.SimStream* %this1 to i8* call void @_ZdlPv(i8* %4) #17 br label %eh.resume eh.resume: ; preds = %lpad %exn = load i8*, i8** %exn.slot, align 8 %sel = load i32, i32* %ehselector.slot, align 4 %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 resume { i8*, i32 } %lpad.val2 } declare dso_local void @__cxa_pure_virtual() unnamed_addr ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN7PStreamD2Ev(%class.PStream* %this) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %class.PStream*, align 8 store %class.PStream* %this, %class.PStream** %this.addr, align 8 %this1 = load %class.PStream*, %class.PStream** %this.addr, align 8 ret void } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local void @_ZN7PStreamD0Ev(%class.PStream* %this) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %class.PStream*, align 8 store %class.PStream* %this, %class.PStream** %this.addr, align 8 %this1 = load %class.PStream*, %class.PStream** %this.addr, align 8 call void @llvm.trap() #15 unreachable } ; Function Attrs: cold noreturn nounwind declare void @llvm.trap() #13 ; Function Attrs: noinline noreturn nounwind define linkonce_odr hidden void @__clang_call_terminate(i8* %0) #14 comdat { %2 = call i8* @__cxa_begin_catch(i8* %0) #2 call void @_ZSt9terminatev() #15 unreachable } declare dso_local i8* @__cxa_begin_catch(i8*) declare dso_local void @_ZSt9terminatev() ; Function Attrs: noinline optnone uwtable define linkonce_odr dso_local i64 @_ZN10FileStream4readEPfii(%class.FileStream* %this, float* %dest, i32 %dim, i32 %num) unnamed_addr #3 comdat align 2 { entry: %this.addr = alloca %class.FileStream*, align 8 %dest.addr = alloca float*, align 8 %dim.addr = alloca i32, align 4 %num.addr = alloca i32, align 4 store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 store float* %dest, float** %dest.addr, align 8 store i32 %dim, i32* %dim.addr, align 4 store i32 %num, i32* %num.addr, align 4 %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 %0 = load float*, float** %dest.addr, align 8 %1 = bitcast float* %0 to i8* %2 = load i32, i32* %dim.addr, align 4 %conv = sext i32 %2 to i64 %mul = mul i64 4, %conv %3 = load i32, i32* %num.addr, align 4 %conv2 = sext i32 %3 to i64 %fp = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 %4 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call = call i64 @fread(i8* %1, i64 %mul, i64 %conv2, %struct._IO_FILE* %4) ret i64 %call } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local i32 @_ZN10FileStream6ferrorEv(%class.FileStream* %this) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %class.FileStream*, align 8 store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 %fp = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 %0 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call = call i32 @ferror(%struct._IO_FILE* %0) #2 ret i32 %call } ; Function Attrs: noinline nounwind optnone uwtable define linkonce_odr dso_local i32 @_ZN10FileStream4feofEv(%class.FileStream* %this) unnamed_addr #6 comdat align 2 { entry: %this.addr = alloca %class.FileStream*, align 8 store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 %fp = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 %0 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call = call i32 @feof(%struct._IO_FILE* %0) #2 ret i32 %call } ; Function Attrs: noinline optnone uwtable define linkonce_odr dso_local void @_ZN10FileStreamD2Ev(%class.FileStream* %this) unnamed_addr #3 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: %this.addr = alloca %class.FileStream*, align 8 %exn.slot = alloca i8* %ehselector.slot = alloca i32 store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 %0 = bitcast %class.FileStream* %this1 to i32 (...)*** store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*] }, { [7 x i8*] }* @_ZTV10FileStream, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %0, align 8 %call = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.45, i64 0, i64 0)) to label %invoke.cont unwind label %lpad invoke.cont: ; preds = %entry %fp = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 %1 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 %call3 = invoke i32 @fclose(%struct._IO_FILE* %1) to label %invoke.cont2 unwind label %lpad invoke.cont2: ; preds = %invoke.cont %2 = bitcast %class.FileStream* %this1 to %class.PStream* call void @_ZN7PStreamD2Ev(%class.PStream* %2) ret void lpad: ; preds = %invoke.cont, %entry %3 = landingpad { i8*, i32 } cleanup %4 = extractvalue { i8*, i32 } %3, 0 store i8* %4, i8** %exn.slot, align 8 %5 = extractvalue { i8*, i32 } %3, 1 store i32 %5, i32* %ehselector.slot, align 4 %6 = bitcast %class.FileStream* %this1 to %class.PStream* invoke void @_ZN7PStreamD2Ev(%class.PStream* %6) to label %invoke.cont4 unwind label %terminate.lpad invoke.cont4: ; preds = %lpad br label %eh.resume eh.resume: ; preds = %invoke.cont4 %exn = load i8*, i8** %exn.slot, align 8 %sel = load i32, i32* %ehselector.slot, align 4 %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 resume { i8*, i32 } %lpad.val5 terminate.lpad: ; preds = %lpad %7 = landingpad { i8*, i32 } catch i8* null %8 = extractvalue { i8*, i32 } %7, 0 call void @__clang_call_terminate(i8* %8) #15 unreachable } ; Function Attrs: noinline optnone uwtable define linkonce_odr dso_local void @_ZN10FileStreamD0Ev(%class.FileStream* %this) unnamed_addr #3 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { entry: %this.addr = alloca %class.FileStream*, align 8 %exn.slot = alloca i8* %ehselector.slot = alloca i32 store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 invoke void @_ZN10FileStreamD2Ev(%class.FileStream* %this1) to label %invoke.cont unwind label %lpad invoke.cont: ; preds = %entry %0 = bitcast %class.FileStream* %this1 to i8* call void @_ZdlPv(i8* %0) #17 ret void lpad: ; preds = %entry %1 = landingpad { i8*, i32 } cleanup %2 = extractvalue { i8*, i32 } %1, 0 store i8* %2, i8** %exn.slot, align 8 %3 = extractvalue { i8*, i32 } %1, 1 store i32 %3, i32* %ehselector.slot, align 4 %4 = bitcast %class.FileStream* %this1 to i8* call void @_ZdlPv(i8* %4) #17 br label %eh.resume eh.resume: ; preds = %lpad %exn = load i8*, i8** %exn.slot, align 8 %sel = load i32, i32* %ehselector.slot, align 4 %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 resume { i8*, i32 } %lpad.val2 } declare dso_local i64 @fread(i8*, i64, i64, %struct._IO_FILE*) #1 ; Function Attrs: nounwind declare dso_local i32 @ferror(%struct._IO_FILE*) #7 ; Function Attrs: nounwind declare dso_local i32 @feof(%struct._IO_FILE*) #7 ; Function Attrs: noinline uwtable define internal void @_GLOBAL__sub_I_streamcluster_cuda_cpu.cu() #0 section ".text.startup" { entry: call void @__cxx_global_var_init() ret void } define internal void @__cuda_register_globals(i8** %0) { entry: %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32, i32, i64, %struct.Point*, i32, i32, float*, float*, i32*, i8*)* @_Z19kernel_compute_costiilP5PointiiPfS1_PiPb to i8*), i8* getelementptr inbounds ([45 x i8], [45 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([45 x i8], [45 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) ret void } declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) declare dso_local i8** @__cudaRegisterFatBinary(i8*) define internal void @__cuda_module_ctor(i8* %0) { entry: %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) store i8** %1, i8*** @__cuda_gpubin_handle, align 8 call void @__cuda_register_globals(i8** %1) call void @__cudaRegisterFatBinaryEnd(i8** %1) %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) ret void } declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) declare dso_local void @__cudaUnregisterFatBinary(i8**) define internal void @__cuda_module_dtor(i8* %0) { entry: %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 call void @__cudaUnregisterFatBinary(i8** %1) ret void } declare dso_local i32 @atexit(void (i8*)*) attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #2 = { nounwind } attributes #3 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #4 = { argmemonly nounwind willreturn } attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #6 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #7 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #8 = { nounwind readnone speculatable willreturn } attributes #9 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #10 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #11 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #12 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #13 = { cold noreturn nounwind } attributes #14 = { noinline noreturn nounwind } attributes #15 = { noreturn nounwind } attributes #16 = { builtin } attributes #17 = { builtin nounwind } attributes #18 = { nounwind readonly } !llvm.module.flags = !{!0, !1} !llvm.ident = !{!2} !0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} !1 = !{i32 1, !"wchar_size", i32 4} !2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}