From 9152feb24f8a5ce29f31d168d41e882f69e23a73 Mon Sep 17 00:00:00 2001 From: Ruobing Han Date: Thu, 15 Sep 2022 11:31:58 -0400 Subject: [PATCH] remove useless examples --- examples/backprop/backprop.c | 454 - examples/backprop/backprop.h | 50 - ...rop_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll | 615 - ...prop_cuda-host-x86_64-unknown-linux-gnu.ll | 894 -- examples/backprop/backprop_cuda.cu | 195 - examples/backprop/backprop_cuda_kernel.cu | 96 - examples/backprop/facetrain.c | 48 - examples/backprop/imagenet.c | 22 - examples/backprop/run.sh | 28 - .../bfs/bfs-cuda-nvptx64-nvidia-cuda-sm_61.ll | 307 - .../bfs/bfs-host-x86_64-unknown-linux-gnu.ll | 825 -- examples/bfs/bfs.cu | 213 - examples/bfs/kernel.cu | 23 - examples/bfs/kernel2.cu | 18 - examples/bfs/run.sh | 21 - examples/btree/common.h | 343 - examples/btree/kernel/kernel_gpu_cuda.cu | 54 - examples/btree/kernel/kernel_gpu_cuda_2.cu | 70 - .../btree/kernel/kernel_gpu_cuda_wrapper.cu | 292 - .../btree/kernel/kernel_gpu_cuda_wrapper.h | 23 - .../btree/kernel/kernel_gpu_cuda_wrapper_2.cu | 347 - .../btree/kernel/kernel_gpu_cuda_wrapper_2.h | 23 - ..._wrapper-cuda-nvptx64-nvidia-cuda-sm_61.ll | 332 - ...a_wrapper-host-x86_64-unknown-linux-gnu.ll | 553 - ...rapper_2-cuda-nvptx64-nvidia-cuda-sm_61.ll | 475 - ...wrapper_2-host-x86_64-unknown-linux-gnu.ll | 651 - examples/btree/main.c | 2192 --- examples/btree/run.sh | 40 - examples/btree/util/cuda/cuda.cu | 75 - examples/btree/util/cuda/cuda.h | 37 - examples/btree/util/num/num.c | 55 - examples/btree/util/num/num.h | 21 - examples/btree/util/timer/timer.c | 36 - examples/btree/util/timer/timer.h | 21 - examples/cfd/euler3d.cu | 662 - examples/cfd/run.sh | 15 - examples/dwt2d/common.h | 64 - examples/dwt2d/components.cu | 193 - examples/dwt2d/components.h | 39 - examples/dwt2d/dwt.cu | 385 - examples/dwt2d/dwt.h | 41 - examples/dwt2d/dwt_cuda/common.cu | 35 - examples/dwt2d/dwt_cuda/common.h | 232 - examples/dwt2d/dwt_cuda/dwt.h | 103 - examples/dwt2d/dwt_cuda/fdwt53.cu | 400 - examples/dwt2d/dwt_cuda/fdwt97.cu | 383 - examples/dwt2d/dwt_cuda/io.h | 440 - examples/dwt2d/dwt_cuda/rdwt53.cu | 360 - examples/dwt2d/dwt_cuda/rdwt97.cu | 363 - examples/dwt2d/dwt_cuda/transform_buffer.h | 338 - examples/dwt2d/main.cu | 401 - examples/dwt2d/run.sh | 8 - examples/dwt2d/run_cpu.sh | 7 - examples/dwt2d/run_nvcc.sh | 14 - examples/dwt2d/test_compile_cpu.sh | 51 - examples/dwt2d/test_compile_nvcc.sh | 9 - ...gaussian-cuda-nvptx64-nvidia-cuda-sm_61.ll | 396 - .../gaussian-host-x86_64-unknown-linux-gnu.ll | 1551 -- examples/gauss/gaussian.cu | 522 - examples/gauss/run.sh | 23 - examples/heartwall/AVI/avilib.c | 1829 --- examples/heartwall/AVI/avilib.h | 317 - examples/heartwall/AVI/avimod.c | 130 - examples/heartwall/AVI/avimod.h | 24 - examples/heartwall/define.c | 396 - examples/heartwall/kernel.cu | 1239 -- examples/heartwall/main.cu | 795 - examples/heartwall/run.sh | 17 - examples/heartwall/setdevice.cu | 5 - .../hotspot-cuda-nvptx64-nvidia-cuda-sm_61.ll | 719 - .../hotspot-host-x86_64-unknown-linux-gnu.ll | 1022 -- examples/hotspot/hotspot.cu | 353 - examples/hotspot/run.sh | 21 - .../3D-cuda-nvptx64-nvidia-cuda-sm_61.ll | 587 - .../3D-host-x86_64-unknown-linux-gnu.ll | 1507 -- examples/hotspot3D/3D.cu | 205 - examples/hotspot3D/run.sh | 22 - examples/huffman/comparison_helpers.h | 24 - examples/huffman/cpuencode.cpp | 116 - examples/huffman/cpuencode.h | 8 - examples/huffman/cuda_helpers.h | 20 - examples/huffman/cutil.h | 931 -- examples/huffman/hist.cu | 104 - examples/huffman/huffTree.h | 90 - examples/huffman/load_data.h | 65 - ..._test_cu-cuda-nvptx64-nvidia-cuda-sm_61.ll | 1933 --- ...n_test_cu-host-x86_64-unknown-linux-gnu.ll | 12230 ---------------- examples/huffman/main_test_cu.cu | 225 - examples/huffman/pabio_kernels_v2.cu | 62 - examples/huffman/pack_kernels.cu | 43 - examples/huffman/parameters.h | 27 - examples/huffman/print_helpers.h | 217 - examples/huffman/run.sh | 20 - examples/huffman/scan.cu | 216 - examples/huffman/scanLargeArray_kernel.cu | 237 - examples/huffman/stats_logger.cpp | 43 - examples/huffman/stats_logger.h | 45 - examples/huffman/stdafx.h | 11 - examples/huffman/testdatagen.h | 83 - examples/huffman/vlc_kernel_sm64huff.cu | 160 - .../common-host-x86_64-unknown-linux-gnu.ll | 1291 -- .../lud/lud-host-x86_64-unknown-linux-gnu.ll | 326 - ...d_kernel-cuda-nvptx64-nvidia-cuda-sm_50.ll | 1001 -- ...ud_kernel-host-x86_64-unknown-linux-gnu.ll | 452 - examples/lud/run.sh | 25 - examples/microbench/cudamemcpy_test.cc | 40 - examples/microbench/dummy_kernel.cc | 39 - examples/microbench/kerne_arg.cc | 36 - examples/microbench/one_thread_kernel.cc | 36 - examples/myocyte/run.sh | 21 - examples/nn/filelist_4 | 4 - .../nn_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll | 271 - .../nn_cuda-host-x86_64-unknown-linux-gnu.ll | 3691 ----- examples/nn/nn_cuda.cu | 328 - examples/nn/run.sh | 21 - .../needle-cuda-nvptx64-nvidia-cuda-sm_61.ll | 923 -- .../needle-host-x86_64-unknown-linux-gnu.ll | 1218 -- examples/nw/needle.cu | 301 - examples/nw/needle.h | 10 - examples/nw/needle_kernel.cu | 165 - examples/nw/run.sh | 20 - ...aive_seq-cuda-nvptx64-nvidia-cuda-sm_61.ll | 482 - ...naive_seq-host-x86_64-unknown-linux-gnu.ll | 2920 ---- examples/particlefilter/run.sh | 20 - ...thfinder-cuda-nvptx64-nvidia-cuda-sm_61.ll | 462 - ...athfinder-host-x86_64-unknown-linux-gnu.ll | 745 - examples/pathfinder/pathfinder.cu | 238 - examples/pathfinder/run.sh | 20 - examples/srad_v2/run.sh | 21 - .../srad-cuda-nvptx64-nvidia-cuda-sm_61.ll | 1551 -- .../srad-host-x86_64-unknown-linux-gnu.ll | 962 -- examples/srad_v2/srad.cu | 279 - examples/srad_v2/srad.h | 15 - examples/srad_v2/srad_kernel.cu | 257 - examples/streamcluster/run.sh | 18 - examples/streamcluster/streamcluster_cuda.cu | 363 - ...cuda_cpu-cuda-nvptx64-nvidia-cuda-sm_61.ll | 366 - ..._cuda_cpu-host-x86_64-unknown-linux-gnu.ll | 5115 ------- .../streamcluster/streamcluster_cuda_cpu.cu | 963 -- examples/streamcluster/streamcluster_header.h | 143 - 140 files changed, 67741 deletions(-) delete mode 100644 examples/backprop/backprop.c delete mode 100644 examples/backprop/backprop.h delete mode 100644 examples/backprop/backprop_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/backprop/backprop_cuda-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/backprop/backprop_cuda.cu delete mode 100644 examples/backprop/backprop_cuda_kernel.cu delete mode 100644 examples/backprop/facetrain.c delete mode 100644 examples/backprop/imagenet.c delete mode 100644 examples/backprop/run.sh delete mode 100644 examples/bfs/bfs-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/bfs/bfs-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/bfs/bfs.cu delete mode 100644 examples/bfs/kernel.cu delete mode 100644 examples/bfs/kernel2.cu delete mode 100644 examples/bfs/run.sh delete mode 100644 examples/btree/common.h delete mode 100755 examples/btree/kernel/kernel_gpu_cuda.cu delete mode 100755 examples/btree/kernel/kernel_gpu_cuda_2.cu delete mode 100755 examples/btree/kernel/kernel_gpu_cuda_wrapper.cu delete mode 100644 examples/btree/kernel/kernel_gpu_cuda_wrapper.h delete mode 100755 examples/btree/kernel/kernel_gpu_cuda_wrapper_2.cu delete mode 100644 examples/btree/kernel/kernel_gpu_cuda_wrapper_2.h delete mode 100644 examples/btree/kernel_gpu_cuda_wrapper-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/btree/kernel_gpu_cuda_wrapper-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/btree/kernel_gpu_cuda_wrapper_2-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/btree/kernel_gpu_cuda_wrapper_2-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/btree/main.c delete mode 100755 examples/btree/run.sh delete mode 100755 examples/btree/util/cuda/cuda.cu delete mode 100644 examples/btree/util/cuda/cuda.h delete mode 100644 examples/btree/util/num/num.c delete mode 100755 examples/btree/util/num/num.h delete mode 100644 examples/btree/util/timer/timer.c delete mode 100644 examples/btree/util/timer/timer.h delete mode 100755 examples/cfd/euler3d.cu delete mode 100644 examples/cfd/run.sh delete mode 100644 examples/dwt2d/common.h delete mode 100755 examples/dwt2d/components.cu delete mode 100644 examples/dwt2d/components.h delete mode 100755 examples/dwt2d/dwt.cu delete mode 100644 examples/dwt2d/dwt.h delete mode 100755 examples/dwt2d/dwt_cuda/common.cu delete mode 100644 examples/dwt2d/dwt_cuda/common.h delete mode 100644 examples/dwt2d/dwt_cuda/dwt.h delete mode 100755 examples/dwt2d/dwt_cuda/fdwt53.cu delete mode 100755 examples/dwt2d/dwt_cuda/fdwt97.cu delete mode 100644 examples/dwt2d/dwt_cuda/io.h delete mode 100755 examples/dwt2d/dwt_cuda/rdwt53.cu delete mode 100755 examples/dwt2d/dwt_cuda/rdwt97.cu delete mode 100644 examples/dwt2d/dwt_cuda/transform_buffer.h delete mode 100755 examples/dwt2d/main.cu delete mode 100755 examples/dwt2d/run.sh delete mode 100755 examples/dwt2d/run_cpu.sh delete mode 100644 examples/dwt2d/run_nvcc.sh delete mode 100644 examples/dwt2d/test_compile_cpu.sh delete mode 100755 examples/dwt2d/test_compile_nvcc.sh delete mode 100644 examples/gauss/gaussian-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/gauss/gaussian-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/gauss/gaussian.cu delete mode 100755 examples/gauss/run.sh delete mode 100644 examples/heartwall/AVI/avilib.c delete mode 100644 examples/heartwall/AVI/avilib.h delete mode 100644 examples/heartwall/AVI/avimod.c delete mode 100644 examples/heartwall/AVI/avimod.h delete mode 100644 examples/heartwall/define.c delete mode 100755 examples/heartwall/kernel.cu delete mode 100644 examples/heartwall/main.cu delete mode 100644 examples/heartwall/run.sh delete mode 100755 examples/heartwall/setdevice.cu delete mode 100644 examples/hotspot/hotspot-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/hotspot/hotspot-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/hotspot/hotspot.cu delete mode 100644 examples/hotspot/run.sh delete mode 100644 examples/hotspot3D/3D-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/hotspot3D/3D-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/hotspot3D/3D.cu delete mode 100644 examples/hotspot3D/run.sh delete mode 100644 examples/huffman/comparison_helpers.h delete mode 100644 examples/huffman/cpuencode.cpp delete mode 100644 examples/huffman/cpuencode.h delete mode 100644 examples/huffman/cuda_helpers.h delete mode 100644 examples/huffman/cutil.h delete mode 100644 examples/huffman/hist.cu delete mode 100644 examples/huffman/huffTree.h delete mode 100644 examples/huffman/load_data.h delete mode 100644 examples/huffman/main_test_cu-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/huffman/main_test_cu-host-x86_64-unknown-linux-gnu.ll delete mode 100755 examples/huffman/main_test_cu.cu delete mode 100644 examples/huffman/pabio_kernels_v2.cu delete mode 100755 examples/huffman/pack_kernels.cu delete mode 100644 examples/huffman/parameters.h delete mode 100644 examples/huffman/print_helpers.h delete mode 100644 examples/huffman/run.sh delete mode 100755 examples/huffman/scan.cu delete mode 100644 examples/huffman/scanLargeArray_kernel.cu delete mode 100644 examples/huffman/stats_logger.cpp delete mode 100644 examples/huffman/stats_logger.h delete mode 100644 examples/huffman/stdafx.h delete mode 100644 examples/huffman/testdatagen.h delete mode 100755 examples/huffman/vlc_kernel_sm64huff.cu delete mode 100644 examples/lud/common-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/lud/lud-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/lud/lud_kernel-cuda-nvptx64-nvidia-cuda-sm_50.ll delete mode 100644 examples/lud/lud_kernel-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/lud/run.sh delete mode 100644 examples/microbench/cudamemcpy_test.cc delete mode 100644 examples/microbench/dummy_kernel.cc delete mode 100644 examples/microbench/kerne_arg.cc delete mode 100644 examples/microbench/one_thread_kernel.cc delete mode 100644 examples/myocyte/run.sh delete mode 100644 examples/nn/filelist_4 delete mode 100644 examples/nn/nn_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/nn/nn_cuda-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/nn/nn_cuda.cu delete mode 100644 examples/nn/run.sh delete mode 100644 examples/nw/needle-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/nw/needle-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/nw/needle.cu delete mode 100644 examples/nw/needle.h delete mode 100644 examples/nw/needle_kernel.cu delete mode 100644 examples/nw/run.sh delete mode 100644 examples/particlefilter/ex_particle_CUDA_naive_seq-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/particlefilter/ex_particle_CUDA_naive_seq-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/particlefilter/run.sh delete mode 100644 examples/pathfinder/pathfinder-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/pathfinder/pathfinder-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/pathfinder/pathfinder.cu delete mode 100644 examples/pathfinder/run.sh delete mode 100644 examples/srad_v2/run.sh delete mode 100644 examples/srad_v2/srad-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/srad_v2/srad-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/srad_v2/srad.cu delete mode 100644 examples/srad_v2/srad.h delete mode 100644 examples/srad_v2/srad_kernel.cu delete mode 100644 examples/streamcluster/run.sh delete mode 100644 examples/streamcluster/streamcluster_cuda.cu delete mode 100644 examples/streamcluster/streamcluster_cuda_cpu-cuda-nvptx64-nvidia-cuda-sm_61.ll delete mode 100644 examples/streamcluster/streamcluster_cuda_cpu-host-x86_64-unknown-linux-gnu.ll delete mode 100644 examples/streamcluster/streamcluster_cuda_cpu.cu delete mode 100644 examples/streamcluster/streamcluster_header.h diff --git a/examples/backprop/backprop.c b/examples/backprop/backprop.c deleted file mode 100644 index e6f8f5f..0000000 --- a/examples/backprop/backprop.c +++ /dev/null @@ -1,454 +0,0 @@ -#include "backprop.h" -#include -#include -#include - -//#define OPEN - -#define ABS(x) (((x) > 0.0) ? (x) : (-(x))) - -#define fastcopy(to, from, len) \ - { \ - register char *_to, *_from; \ - register int _i, _l; \ - _to = (char *)(to); \ - _from = (char *)(from); \ - _l = (len); \ - for (_i = 0; _i < _l; _i++) \ - *_to++ = *_from++; \ - } - -/*** Return random number between 0.0 and 1.0 ***/ -float drnd() { return ((float)rand() / (float)BIGRND); } - -/*** Return random number between -1.0 and 1.0 ***/ -float dpn1() { return ((drnd() * 2.0) - 1.0); } - -/*** The squashing function. Currently, it's a sigmoid. ***/ - -float squash(x) -float x; -{ - float m; - // x = -x; - // m = 1 + x + x*x/2 + x*x*x/6 + x*x*x*x/24 + x*x*x*x*x/120; - // return(1.0 / (1.0 + m)); - return (1.0 / (1.0 + exp(-x))); -} - -/*** Allocate 1d array of floats ***/ - -float *alloc_1d_dbl(n) -int n; -{ - float *new; - - new = (float *)malloc((unsigned)(n * sizeof(float))); - if (new == NULL) { - printf("ALLOC_1D_DBL: Couldn't allocate array of floats\n"); - return (NULL); - } - return (new); -} - -/*** Allocate 2d array of floats ***/ - -float **alloc_2d_dbl(m, n) -int m, n; -{ - int i; - float **new; - - new = (float **)malloc((unsigned)(m * sizeof(float *))); - if (new == NULL) { - printf("ALLOC_2D_DBL: Couldn't allocate array of dbl ptrs\n"); - return (NULL); - } - - for (i = 0; i < m; i++) { - new[i] = alloc_1d_dbl(n); - } - - return (new); -} - -bpnn_randomize_weights(w, m, n) float **w; -int m, n; -{ - int i, j; - - for (i = 0; i <= m; i++) { - for (j = 0; j <= n; j++) { - w[i][j] = (float)rand() / RAND_MAX; - // w[i][j] = dpn1(); - } - } -} - -bpnn_randomize_row(w, m) float *w; -int m; -{ - int i; - for (i = 0; i <= m; i++) { - // w[i] = (float) rand()/RAND_MAX; - w[i] = 0.1; - } -} - -bpnn_zero_weights(w, m, n) float **w; -int m, n; -{ - int i, j; - - for (i = 0; i <= m; i++) { - for (j = 0; j <= n; j++) { - w[i][j] = 0.0; - } - } -} - -void bpnn_initialize(seed) { - printf("Random number generator seed: %d\n", seed); - srand(seed); -} - -BPNN *bpnn_internal_create(n_in, n_hidden, n_out) -int n_in, n_hidden, n_out; -{ - BPNN *newnet; - - newnet = (BPNN *)malloc(sizeof(BPNN)); - if (newnet == NULL) { - printf("BPNN_CREATE: Couldn't allocate neural network\n"); - return (NULL); - } - - newnet->input_n = n_in; - newnet->hidden_n = n_hidden; - newnet->output_n = n_out; - newnet->input_units = alloc_1d_dbl(n_in + 1); - newnet->hidden_units = alloc_1d_dbl(n_hidden + 1); - newnet->output_units = alloc_1d_dbl(n_out + 1); - - newnet->hidden_delta = alloc_1d_dbl(n_hidden + 1); - newnet->output_delta = alloc_1d_dbl(n_out + 1); - newnet->target = alloc_1d_dbl(n_out + 1); - - newnet->input_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); - newnet->hidden_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); - - newnet->input_prev_weights = alloc_2d_dbl(n_in + 1, n_hidden + 1); - newnet->hidden_prev_weights = alloc_2d_dbl(n_hidden + 1, n_out + 1); - - return (newnet); -} - -void bpnn_free(net) BPNN *net; -{ - int n1, n2, i; - - n1 = net->input_n; - n2 = net->hidden_n; - - free((char *)net->input_units); - free((char *)net->hidden_units); - free((char *)net->output_units); - - free((char *)net->hidden_delta); - free((char *)net->output_delta); - free((char *)net->target); - - for (i = 0; i <= n1; i++) { - free((char *)net->input_weights[i]); - free((char *)net->input_prev_weights[i]); - } - free((char *)net->input_weights); - free((char *)net->input_prev_weights); - - for (i = 0; i <= n2; i++) { - free((char *)net->hidden_weights[i]); - free((char *)net->hidden_prev_weights[i]); - } - free((char *)net->hidden_weights); - free((char *)net->hidden_prev_weights); - - free((char *)net); -} - -/*** Creates a new fully-connected network from scratch, - with the given numbers of input, hidden, and output units. - Threshold units are automatically included. All weights are - randomly initialized. - Space is also allocated for temporary storage (momentum weights, - error computations, etc). -***/ - -BPNN *bpnn_create(n_in, n_hidden, n_out) -int n_in, n_hidden, n_out; -{ - - BPNN *newnet; - - newnet = bpnn_internal_create(n_in, n_hidden, n_out); - -#ifdef INITZERO - bpnn_zero_weights(newnet->input_weights, n_in, n_hidden); -#else - bpnn_randomize_weights(newnet->input_weights, n_in, n_hidden); -#endif - bpnn_randomize_weights(newnet->hidden_weights, n_hidden, n_out); - bpnn_zero_weights(newnet->input_prev_weights, n_in, n_hidden); - bpnn_zero_weights(newnet->hidden_prev_weights, n_hidden, n_out); - bpnn_randomize_row(newnet->target, n_out); - return (newnet); -} - -void bpnn_layerforward(l1, l2, conn, n1, n2) float *l1, *l2, **conn; -int n1, n2; -{ - float sum; - int j, k; - - /*** Set up thresholding unit ***/ - l1[0] = 1.0; -#ifdef OPEN - omp_set_num_threads(NUM_THREAD); -#pragma omp parallel for shared(conn, n1, n2, l1) private(k, j) reduction(+: sum) schedule(static) -#endif - /*** For each unit in second layer ***/ - for (j = 1; j <= n2; j++) { - - /*** Compute weighted sum of its inputs ***/ - sum = 0.0; - for (k = 0; k <= n1; k++) { - sum += conn[k][j] * l1[k]; - } - l2[j] = squash(sum); - } -} - -// extern "C" -void bpnn_output_error(delta, target, output, nj, err) float *delta, *target, - *output, *err; -int nj; -{ - int j; - float o, t, errsum; - errsum = 0.0; - for (j = 1; j <= nj; j++) { - o = output[j]; - t = target[j]; - delta[j] = o * (1.0 - o) * (t - o); - errsum += ABS(delta[j]); - } - *err = errsum; -} - -void bpnn_hidden_error(delta_h, nh, delta_o, no, who, hidden, - err) float *delta_h, - *delta_o, *hidden, **who, *err; -int nh, no; -{ - int j, k; - float h, sum, errsum; - - errsum = 0.0; - for (j = 1; j <= nh; j++) { - h = hidden[j]; - sum = 0.0; - for (k = 1; k <= no; k++) { - sum += delta_o[k] * who[j][k]; - } - delta_h[j] = h * (1.0 - h) * sum; - errsum += ABS(delta_h[j]); - } - *err = errsum; -} - -void bpnn_adjust_weights(delta, ndelta, ly, nly, w, oldw) float *delta, *ly, - **w, **oldw; -{ - float new_dw; - int k, j; - ly[0] = 1.0; - // eta = 0.3; - // momentum = 0.3; - -#ifdef OPEN - omp_set_num_threads(NUM_THREAD); -#pragma omp parallel for shared(oldw, w, delta) private(j, k, new_dw) \ - firstprivate(ndelta, nly, momentum) -#endif - for (j = 1; j <= ndelta; j++) { - for (k = 0; k <= nly; k++) { - new_dw = ((ETA * delta[j] * ly[k]) + (MOMENTUM * oldw[k][j])); - w[k][j] += new_dw; - oldw[k][j] = new_dw; - } - } -} - -void bpnn_feedforward(net) BPNN *net; -{ - int in, hid, out; - - in = net->input_n; - hid = net->hidden_n; - out = net->output_n; - - /*** Feed forward input activations. ***/ - bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, - hid); - bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, - hid, out); -} - -void bpnn_train(net, eo, eh) BPNN *net; -float *eo, *eh; -{ - int in, hid, out; - float out_err, hid_err; - - in = net->input_n; - hid = net->hidden_n; - out = net->output_n; - - /*** Feed forward input activations. ***/ - bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, - hid); - bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, - hid, out); - - /*** Compute error on output and hidden units. ***/ - bpnn_output_error(net->output_delta, net->target, net->output_units, out, - &out_err); - bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, - net->hidden_weights, net->hidden_units, &hid_err); - *eo = out_err; - *eh = hid_err; - - /*** Adjust input and hidden weights. ***/ - bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, - net->hidden_weights, net->hidden_prev_weights); - bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, - net->input_weights, net->input_prev_weights); -} - -void bpnn_save(net, filename) BPNN *net; -char *filename; -{ - int n1, n2, n3, i, j, memcnt; - float dvalue, **w; - char *mem; - /// add// - FILE *pFile; - pFile = fopen(filename, "w+"); - /////// - /* - if ((fd = creat(filename, 0644)) == -1) { - printf("BPNN_SAVE: Cannot create '%s'\n", filename); - return; - } - */ - - n1 = net->input_n; - n2 = net->hidden_n; - n3 = net->output_n; - printf("Saving %dx%dx%d network to '%s'\n", n1, n2, n3, filename); - // fflush(stdout); - - // write(fd, (char *) &n1, sizeof(int)); - // write(fd, (char *) &n2, sizeof(int)); - // write(fd, (char *) &n3, sizeof(int)); - - fwrite((char *)&n1, sizeof(char), sizeof(char), pFile); - fwrite((char *)&n2, sizeof(char), sizeof(char), pFile); - fwrite((char *)&n3, sizeof(char), sizeof(char), pFile); - - memcnt = 0; - w = net->input_weights; - mem = (char *)malloc((unsigned)((n1 + 1) * (n2 + 1) * sizeof(float))); - for (i = 0; i <= n1; i++) { - for (j = 0; j <= n2; j++) { - dvalue = w[i][j]; - fastcopy(&mem[memcnt], &dvalue, sizeof(float)); - memcnt += sizeof(float); - } - } - // write(fd, mem, (n1+1) * (n2+1) * sizeof(float)); - fwrite(mem, (unsigned)(sizeof(float)), - (unsigned)((n1 + 1) * (n2 + 1) * sizeof(float)), pFile); - free(mem); - - memcnt = 0; - w = net->hidden_weights; - mem = (char *)malloc((unsigned)((n2 + 1) * (n3 + 1) * sizeof(float))); - for (i = 0; i <= n2; i++) { - for (j = 0; j <= n3; j++) { - dvalue = w[i][j]; - fastcopy(&mem[memcnt], &dvalue, sizeof(float)); - memcnt += sizeof(float); - } - } - // write(fd, mem, (n2+1) * (n3+1) * sizeof(float)); - fwrite(mem, sizeof(float), (unsigned)((n2 + 1) * (n3 + 1) * sizeof(float)), - pFile); - free(mem); - - fclose(pFile); - return; -} - -BPNN *bpnn_read(filename) -char *filename; -{ - char *mem; - BPNN *new; - int fd, n1, n2, n3, i, j, memcnt; - - if ((fd = open(filename, 0, 0644)) == -1) { - return (NULL); - } - - printf("Reading '%s'\n", filename); // fflush(stdout); - - read(fd, (char *)&n1, sizeof(int)); - read(fd, (char *)&n2, sizeof(int)); - read(fd, (char *)&n3, sizeof(int)); - new = bpnn_internal_create(n1, n2, n3); - - printf("'%s' contains a %dx%dx%d network\n", filename, n1, n2, n3); - printf("Reading input weights..."); // fflush(stdout); - - memcnt = 0; - mem = (char *)malloc((unsigned)((n1 + 1) * (n2 + 1) * sizeof(float))); - read(fd, mem, (n1 + 1) * (n2 + 1) * sizeof(float)); - for (i = 0; i <= n1; i++) { - for (j = 0; j <= n2; j++) { - fastcopy(&(new->input_weights[i][j]), &mem[memcnt], sizeof(float)); - memcnt += sizeof(float); - } - } - free(mem); - - printf("Done\nReading hidden weights..."); // fflush(stdout); - - memcnt = 0; - mem = (char *)malloc((unsigned)((n2 + 1) * (n3 + 1) * sizeof(float))); - read(fd, mem, (n2 + 1) * (n3 + 1) * sizeof(float)); - for (i = 0; i <= n2; i++) { - for (j = 0; j <= n3; j++) { - fastcopy(&(new->hidden_weights[i][j]), &mem[memcnt], sizeof(float)); - memcnt += sizeof(float); - } - } - free(mem); - close(fd); - - printf("Done\n"); // fflush(stdout); - - bpnn_zero_weights(new->input_prev_weights, n1, n2); - bpnn_zero_weights(new->hidden_prev_weights, n2, n3); - - return (new); -} diff --git a/examples/backprop/backprop.h b/examples/backprop/backprop.h deleted file mode 100644 index a6a753c..0000000 --- a/examples/backprop/backprop.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef _BACKPROP_H_ -#define _BACKPROP_H_ - -#define BIGRND 0x7fffffff - -#define GPU -#define THREADS 256 -#define WIDTH 16 // shared memory width -#define HEIGHT 16 // shared memory height - -#define ETA 0.3 // eta value -#define MOMENTUM 0.3 // momentum value -#define NUM_THREAD 4 // OpenMP threads - -typedef struct { - int input_n; /* number of input units */ - int hidden_n; /* number of hidden units */ - int output_n; /* number of output units */ - - float *input_units; /* the input units */ - float *hidden_units; /* the hidden units */ - float *output_units; /* the output units */ - - float *hidden_delta; /* storage for hidden unit error */ - float *output_delta; /* storage for output unit error */ - - float *target; /* storage for target vector */ - - float **input_weights; /* weights from input to hidden layer */ - float **hidden_weights; /* weights from hidden to output layer */ - - /*** The next two are for momentum ***/ - float **input_prev_weights; /* previous change on input to hidden wgt */ - float **hidden_prev_weights; /* previous change on hidden to output wgt */ -} BPNN; - -/*** User-level functions ***/ - -void bpnn_initialize(); - -BPNN *bpnn_create(); -void bpnn_free(); - -void bpnn_train(); -void bpnn_feedforward(); - -void bpnn_save(); -BPNN *bpnn_read(); - -#endif diff --git a/examples/backprop/backprop_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/backprop/backprop_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 321377b..0000000 --- a/examples/backprop/backprop_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,615 +0,0 @@ -; ModuleID = 'backprop_cuda-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "backprop_cuda.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv = comdat any - -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 -@_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE10input_node = internal addrspace(3) global [16 x float] undef, align 4 -@_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@"$str" = private addrspace(1) constant [11 x i8] c"__CUDA_FTZ\00" - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z22bpnn_layerforward_CUDAPfS_S_S_ii(float* %input_cuda, float* %output_hidden_cuda, float* %input_hidden_cuda, float* %hidden_partial_sum, i32 %in, i32 %hid) #0 { -entry: - %input_cuda.addr = alloca float*, align 8 - %output_hidden_cuda.addr = alloca float*, align 8 - %input_hidden_cuda.addr = alloca float*, align 8 - %hidden_partial_sum.addr = alloca float*, align 8 - %in.addr = alloca i32, align 4 - %hid.addr = alloca i32, align 4 - %by = alloca i32, align 4 - %tx = alloca i32, align 4 - %ty = alloca i32, align 4 - %index = alloca i32, align 4 - %index_in = alloca i32, align 4 - %i = alloca i32, align 4 - %power_two = alloca i32, align 4 - store float* %input_cuda, float** %input_cuda.addr, align 8 - store float* %output_hidden_cuda, float** %output_hidden_cuda.addr, align 8 - store float* %input_hidden_cuda, float** %input_hidden_cuda.addr, align 8 - store float* %hidden_partial_sum, float** %hidden_partial_sum.addr, align 8 - store i32 %in, i32* %in.addr, align 4 - store i32 %hid, i32* %hid.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #2 - store i32 %call, i32* %by, align 4 - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call1, i32* %tx, align 4 - %call2 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - store i32 %call2, i32* %ty, align 4 - %0 = load i32, i32* %hid.addr, align 4 - %add = add nsw i32 %0, 1 - %mul = mul nsw i32 %add, 16 - %1 = load i32, i32* %by, align 4 - %mul3 = mul nsw i32 %mul, %1 - %2 = load i32, i32* %hid.addr, align 4 - %add4 = add nsw i32 %2, 1 - %3 = load i32, i32* %ty, align 4 - %mul5 = mul nsw i32 %add4, %3 - %add6 = add nsw i32 %mul3, %mul5 - %4 = load i32, i32* %tx, align 4 - %add7 = add nsw i32 %add6, %4 - %add8 = add nsw i32 %add7, 1 - %5 = load i32, i32* %hid.addr, align 4 - %add9 = add nsw i32 %5, 1 - %add10 = add nsw i32 %add8, %add9 - store i32 %add10, i32* %index, align 4 - %6 = load i32, i32* %by, align 4 - %mul11 = mul nsw i32 16, %6 - %7 = load i32, i32* %ty, align 4 - %add12 = add nsw i32 %mul11, %7 - %add13 = add nsw i32 %add12, 1 - store i32 %add13, i32* %index_in, align 4 - %8 = load i32, i32* %tx, align 4 - %cmp = icmp eq i32 %8, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %9 = load float*, float** %input_cuda.addr, align 8 - %10 = load i32, i32* %index_in, align 4 - %idxprom = sext i32 %10 to i64 - %arrayidx = getelementptr inbounds float, float* %9, i64 %idxprom - %11 = load float, float* %arrayidx, align 4 - %12 = load i32, i32* %ty, align 4 - %idxprom14 = sext i32 %12 to i64 - %arrayidx15 = getelementptr inbounds [16 x float], [16 x float]* addrspacecast ([16 x float] addrspace(3)* @_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE10input_node to [16 x float]*), i64 0, i64 %idxprom14 - store float %11, float* %arrayidx15, align 4 - br label %if.end - -if.end: ; preds = %if.then, %entry - call void @llvm.nvvm.barrier0() - %13 = load float*, float** %input_hidden_cuda.addr, align 8 - %14 = load i32, i32* %index, align 4 - %idxprom16 = sext i32 %14 to i64 - %arrayidx17 = getelementptr inbounds float, float* %13, i64 %idxprom16 - %15 = load float, float* %arrayidx17, align 4 - %16 = load i32, i32* %ty, align 4 - %idxprom18 = sext i32 %16 to i64 - %arrayidx19 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix to [16 x [16 x float]]*), i64 0, i64 %idxprom18 - %17 = load i32, i32* %tx, align 4 - %idxprom20 = sext i32 %17 to i64 - %arrayidx21 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx19, i64 0, i64 %idxprom20 - store float %15, float* %arrayidx21, align 4 - call void @llvm.nvvm.barrier0() - %18 = load i32, i32* %ty, align 4 - %idxprom22 = sext i32 %18 to i64 - %arrayidx23 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix to [16 x [16 x float]]*), i64 0, i64 %idxprom22 - %19 = load i32, i32* %tx, align 4 - %idxprom24 = sext i32 %19 to i64 - %arrayidx25 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx23, i64 0, i64 %idxprom24 - %20 = load float, float* %arrayidx25, align 4 - %21 = load i32, i32* %ty, align 4 - %idxprom26 = sext i32 %21 to i64 - %arrayidx27 = getelementptr inbounds [16 x float], [16 x float]* addrspacecast ([16 x float] addrspace(3)* @_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE10input_node to [16 x float]*), i64 0, i64 %idxprom26 - %22 = load float, float* %arrayidx27, align 4 - %mul28 = fmul contract float %20, %22 - %23 = load i32, i32* %ty, align 4 - %idxprom29 = sext i32 %23 to i64 - %arrayidx30 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix to [16 x [16 x float]]*), i64 0, i64 %idxprom29 - %24 = load i32, i32* %tx, align 4 - %idxprom31 = sext i32 %24 to i64 - %arrayidx32 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx30, i64 0, i64 %idxprom31 - store float %mul28, float* %arrayidx32, align 4 - call void @llvm.nvvm.barrier0() - store i32 1, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end - %25 = load i32, i32* %i, align 4 - %conv = sitofp i32 %25 to float - %call33 = call float @_ZL7__log2ff(float 1.600000e+01) #2 - %cmp34 = fcmp ole float %conv, %call33 - br i1 %cmp34, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %26 = load i32, i32* %i, align 4 - %conv35 = sitofp i32 %26 to float - %call36 = call float @_ZL6__powfff(float 2.000000e+00, float %conv35) #2 - %conv37 = fptosi float %call36 to i32 - store i32 %conv37, i32* %power_two, align 4 - %27 = load i32, i32* %ty, align 4 - %28 = load i32, i32* %power_two, align 4 - %rem = srem i32 %27, %28 - %cmp38 = icmp eq i32 %rem, 0 - br i1 %cmp38, label %if.then39, label %if.end54 - -if.then39: ; preds = %for.body - %29 = load i32, i32* %ty, align 4 - %idxprom40 = sext i32 %29 to i64 - %arrayidx41 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix to [16 x [16 x float]]*), i64 0, i64 %idxprom40 - %30 = load i32, i32* %tx, align 4 - %idxprom42 = sext i32 %30 to i64 - %arrayidx43 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx41, i64 0, i64 %idxprom42 - %31 = load float, float* %arrayidx43, align 4 - %32 = load i32, i32* %ty, align 4 - %33 = load i32, i32* %power_two, align 4 - %div = sdiv i32 %33, 2 - %add44 = add nsw i32 %32, %div - %idxprom45 = sext i32 %add44 to i64 - %arrayidx46 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix to [16 x [16 x float]]*), i64 0, i64 %idxprom45 - %34 = load i32, i32* %tx, align 4 - %idxprom47 = sext i32 %34 to i64 - %arrayidx48 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx46, i64 0, i64 %idxprom47 - %35 = load float, float* %arrayidx48, align 4 - %add49 = fadd contract float %31, %35 - %36 = load i32, i32* %ty, align 4 - %idxprom50 = sext i32 %36 to i64 - %arrayidx51 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix to [16 x [16 x float]]*), i64 0, i64 %idxprom50 - %37 = load i32, i32* %tx, align 4 - %idxprom52 = sext i32 %37 to i64 - %arrayidx53 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx51, i64 0, i64 %idxprom52 - store float %add49, float* %arrayidx53, align 4 - br label %if.end54 - -if.end54: ; preds = %if.then39, %for.body - call void @llvm.nvvm.barrier0() - br label %for.inc - -for.inc: ; preds = %if.end54 - %38 = load i32, i32* %i, align 4 - %inc = add nsw i32 %38, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %39 = load i32, i32* %ty, align 4 - %idxprom55 = sext i32 %39 to i64 - %arrayidx56 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix to [16 x [16 x float]]*), i64 0, i64 %idxprom55 - %40 = load i32, i32* %tx, align 4 - %idxprom57 = sext i32 %40 to i64 - %arrayidx58 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx56, i64 0, i64 %idxprom57 - %41 = load float, float* %arrayidx58, align 4 - %42 = load float*, float** %input_hidden_cuda.addr, align 8 - %43 = load i32, i32* %index, align 4 - %idxprom59 = sext i32 %43 to i64 - %arrayidx60 = getelementptr inbounds float, float* %42, i64 %idxprom59 - store float %41, float* %arrayidx60, align 4 - call void @llvm.nvvm.barrier0() - %44 = load i32, i32* %tx, align 4 - %cmp61 = icmp eq i32 %44, 0 - br i1 %cmp61, label %if.then62, label %if.end71 - -if.then62: ; preds = %for.end - %45 = load i32, i32* %tx, align 4 - %idxprom63 = sext i32 %45 to i64 - %arrayidx64 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix to [16 x [16 x float]]*), i64 0, i64 %idxprom63 - %46 = load i32, i32* %ty, align 4 - %idxprom65 = sext i32 %46 to i64 - %arrayidx66 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx64, i64 0, i64 %idxprom65 - %47 = load float, float* %arrayidx66, align 4 - %48 = load float*, float** %hidden_partial_sum.addr, align 8 - %49 = load i32, i32* %by, align 4 - %50 = load i32, i32* %hid.addr, align 4 - %mul67 = mul nsw i32 %49, %50 - %51 = load i32, i32* %ty, align 4 - %add68 = add nsw i32 %mul67, %51 - %idxprom69 = sext i32 %add68 to i64 - %arrayidx70 = getelementptr inbounds float, float* %48, i64 %idxprom69 - store float %47, float* %arrayidx70, align 4 - br label %if.end71 - -if.end71: ; preds = %if.then62, %for.end - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() - ret i32 %0 -} - -; Function Attrs: convergent nounwind -declare void @llvm.nvvm.barrier0() #2 - -; Function Attrs: alwaysinline convergent nounwind -define internal float @_ZL7__log2ff(float %__a) #1 { -entry: - %__a.addr = alloca float, align 4 - store float %__a, float* %__a.addr, align 4 - %0 = load float, float* %__a.addr, align 4 - %call = call float @__nv_fast_log2f(float %0) #2 - ret float %call -} - -; Function Attrs: alwaysinline convergent nounwind -define internal float @_ZL6__powfff(float %__a, float %__b) #1 { -entry: - %__a.addr = alloca float, align 4 - %__b.addr = alloca float, align 4 - store float %__a, float* %__a.addr, align 4 - store float %__b, float* %__b.addr, align 4 - %0 = load float, float* %__a.addr, align 4 - %1 = load float, float* %__b.addr, align 4 - %call = call float @__nv_fast_powf(float %0, float %1) #2 - ret float %call -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z24bpnn_adjust_weights_cudaPfiS_iS_S_(float* %delta, i32 %hid, float* %ly, i32 %in, float* %w, float* %oldw) #0 { -entry: - %delta.addr = alloca float*, align 8 - %hid.addr = alloca i32, align 4 - %ly.addr = alloca float*, align 8 - %in.addr = alloca i32, align 4 - %w.addr = alloca float*, align 8 - %oldw.addr = alloca float*, align 8 - %by = alloca i32, align 4 - %tx = alloca i32, align 4 - %ty = alloca i32, align 4 - %index = alloca i32, align 4 - %index_y = alloca i32, align 4 - %index_x = alloca i32, align 4 - store float* %delta, float** %delta.addr, align 8 - store i32 %hid, i32* %hid.addr, align 4 - store float* %ly, float** %ly.addr, align 8 - store i32 %in, i32* %in.addr, align 4 - store float* %w, float** %w.addr, align 8 - store float* %oldw, float** %oldw.addr, align 8 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #2 - store i32 %call, i32* %by, align 4 - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call1, i32* %tx, align 4 - %call2 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - store i32 %call2, i32* %ty, align 4 - %0 = load i32, i32* %hid.addr, align 4 - %add = add nsw i32 %0, 1 - %mul = mul nsw i32 %add, 16 - %1 = load i32, i32* %by, align 4 - %mul3 = mul nsw i32 %mul, %1 - %2 = load i32, i32* %hid.addr, align 4 - %add4 = add nsw i32 %2, 1 - %3 = load i32, i32* %ty, align 4 - %mul5 = mul nsw i32 %add4, %3 - %add6 = add nsw i32 %mul3, %mul5 - %4 = load i32, i32* %tx, align 4 - %add7 = add nsw i32 %add6, %4 - %add8 = add nsw i32 %add7, 1 - %5 = load i32, i32* %hid.addr, align 4 - %add9 = add nsw i32 %5, 1 - %add10 = add nsw i32 %add8, %add9 - store i32 %add10, i32* %index, align 4 - %6 = load i32, i32* %by, align 4 - %mul11 = mul nsw i32 16, %6 - %7 = load i32, i32* %ty, align 4 - %add12 = add nsw i32 %mul11, %7 - %add13 = add nsw i32 %add12, 1 - store i32 %add13, i32* %index_y, align 4 - %8 = load i32, i32* %tx, align 4 - %add14 = add nsw i32 %8, 1 - store i32 %add14, i32* %index_x, align 4 - %9 = load float*, float** %delta.addr, align 8 - %10 = load i32, i32* %index_x, align 4 - %idxprom = sext i32 %10 to i64 - %arrayidx = getelementptr inbounds float, float* %9, i64 %idxprom - %11 = load float, float* %arrayidx, align 4 - %conv = fpext float %11 to double - %mul15 = fmul contract double 3.000000e-01, %conv - %12 = load float*, float** %ly.addr, align 8 - %13 = load i32, i32* %index_y, align 4 - %idxprom16 = sext i32 %13 to i64 - %arrayidx17 = getelementptr inbounds float, float* %12, i64 %idxprom16 - %14 = load float, float* %arrayidx17, align 4 - %conv18 = fpext float %14 to double - %mul19 = fmul contract double %mul15, %conv18 - %15 = load float*, float** %oldw.addr, align 8 - %16 = load i32, i32* %index, align 4 - %idxprom20 = sext i32 %16 to i64 - %arrayidx21 = getelementptr inbounds float, float* %15, i64 %idxprom20 - %17 = load float, float* %arrayidx21, align 4 - %conv22 = fpext float %17 to double - %mul23 = fmul contract double 3.000000e-01, %conv22 - %add24 = fadd contract double %mul19, %mul23 - %18 = load float*, float** %w.addr, align 8 - %19 = load i32, i32* %index, align 4 - %idxprom25 = sext i32 %19 to i64 - %arrayidx26 = getelementptr inbounds float, float* %18, i64 %idxprom25 - %20 = load float, float* %arrayidx26, align 4 - %conv27 = fpext float %20 to double - %add28 = fadd contract double %conv27, %add24 - %conv29 = fptrunc double %add28 to float - store float %conv29, float* %arrayidx26, align 4 - %21 = load float*, float** %delta.addr, align 8 - %22 = load i32, i32* %index_x, align 4 - %idxprom30 = sext i32 %22 to i64 - %arrayidx31 = getelementptr inbounds float, float* %21, i64 %idxprom30 - %23 = load float, float* %arrayidx31, align 4 - %conv32 = fpext float %23 to double - %mul33 = fmul contract double 3.000000e-01, %conv32 - %24 = load float*, float** %ly.addr, align 8 - %25 = load i32, i32* %index_y, align 4 - %idxprom34 = sext i32 %25 to i64 - %arrayidx35 = getelementptr inbounds float, float* %24, i64 %idxprom34 - %26 = load float, float* %arrayidx35, align 4 - %conv36 = fpext float %26 to double - %mul37 = fmul contract double %mul33, %conv36 - %27 = load float*, float** %oldw.addr, align 8 - %28 = load i32, i32* %index, align 4 - %idxprom38 = sext i32 %28 to i64 - %arrayidx39 = getelementptr inbounds float, float* %27, i64 %idxprom38 - %29 = load float, float* %arrayidx39, align 4 - %conv40 = fpext float %29 to double - %mul41 = fmul contract double 3.000000e-01, %conv40 - %add42 = fadd contract double %mul37, %mul41 - %conv43 = fptrunc double %add42 to float - %30 = load float*, float** %oldw.addr, align 8 - %31 = load i32, i32* %index, align 4 - %idxprom44 = sext i32 %31 to i64 - %arrayidx45 = getelementptr inbounds float, float* %30, i64 %idxprom44 - store float %conv43, float* %arrayidx45, align 4 - call void @llvm.nvvm.barrier0() - %32 = load i32, i32* %ty, align 4 - %cmp = icmp eq i32 %32, 0 - br i1 %cmp, label %land.lhs.true, label %if.end - -land.lhs.true: ; preds = %entry - %33 = load i32, i32* %by, align 4 - %cmp46 = icmp eq i32 %33, 0 - br i1 %cmp46, label %if.then, label %if.end - -if.then: ; preds = %land.lhs.true - %34 = load float*, float** %delta.addr, align 8 - %35 = load i32, i32* %index_x, align 4 - %idxprom47 = sext i32 %35 to i64 - %arrayidx48 = getelementptr inbounds float, float* %34, i64 %idxprom47 - %36 = load float, float* %arrayidx48, align 4 - %conv49 = fpext float %36 to double - %mul50 = fmul contract double 3.000000e-01, %conv49 - %37 = load float*, float** %oldw.addr, align 8 - %38 = load i32, i32* %index_x, align 4 - %idxprom51 = sext i32 %38 to i64 - %arrayidx52 = getelementptr inbounds float, float* %37, i64 %idxprom51 - %39 = load float, float* %arrayidx52, align 4 - %conv53 = fpext float %39 to double - %mul54 = fmul contract double 3.000000e-01, %conv53 - %add55 = fadd contract double %mul50, %mul54 - %40 = load float*, float** %w.addr, align 8 - %41 = load i32, i32* %index_x, align 4 - %idxprom56 = sext i32 %41 to i64 - %arrayidx57 = getelementptr inbounds float, float* %40, i64 %idxprom56 - %42 = load float, float* %arrayidx57, align 4 - %conv58 = fpext float %42 to double - %add59 = fadd contract double %conv58, %add55 - %conv60 = fptrunc double %add59 to float - store float %conv60, float* %arrayidx57, align 4 - %43 = load float*, float** %delta.addr, align 8 - %44 = load i32, i32* %index_x, align 4 - %idxprom61 = sext i32 %44 to i64 - %arrayidx62 = getelementptr inbounds float, float* %43, i64 %idxprom61 - %45 = load float, float* %arrayidx62, align 4 - %conv63 = fpext float %45 to double - %mul64 = fmul contract double 3.000000e-01, %conv63 - %46 = load float*, float** %oldw.addr, align 8 - %47 = load i32, i32* %index_x, align 4 - %idxprom65 = sext i32 %47 to i64 - %arrayidx66 = getelementptr inbounds float, float* %46, i64 %idxprom65 - %48 = load float, float* %arrayidx66, align 4 - %conv67 = fpext float %48 to double - %mul68 = fmul contract double 3.000000e-01, %conv67 - %add69 = fadd contract double %mul64, %mul68 - %conv70 = fptrunc double %add69 to float - %49 = load float*, float** %oldw.addr, align 8 - %50 = load i32, i32* %index_x, align 4 - %idxprom71 = sext i32 %50 to i64 - %arrayidx72 = getelementptr inbounds float, float* %49, i64 %idxprom71 - store float %conv70, float* %arrayidx72, align 4 - br label %if.end - -if.end: ; preds = %if.then, %land.lhs.true, %entry - ret void -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() #3 - -; Function Attrs: alwaysinline convergent inlinehint nounwind -define internal float @__nv_fast_log2f(float %a) #4 { - %call.i = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*)) - %1 = icmp ne i32 %call.i, 0 - br i1 %1, label %2, label %4 - -2: ; preds = %0 - %3 = call float @llvm.nvvm.lg2.approx.ftz.f(float %a) - br label %__nvvm_builtin_log2f.exit - -4: ; preds = %0 - %5 = call float @llvm.nvvm.lg2.approx.f(float %a) - br label %__nvvm_builtin_log2f.exit - -__nvvm_builtin_log2f.exit: ; preds = %4, %2 - %retval.0.i = phi float [ %3, %2 ], [ %5, %4 ] - ret float %retval.0.i -} - -; Function Attrs: convergent nounwind -declare i32 @__nvvm_reflect(i8*) #5 - -; Function Attrs: nounwind readnone -declare float @llvm.nvvm.lg2.approx.ftz.f(float) #3 - -; Function Attrs: nounwind readnone -declare float @llvm.nvvm.lg2.approx.f(float) #3 - -; Function Attrs: alwaysinline convergent inlinehint nounwind -define internal float @__nv_fast_powf(float %a, float %b) #4 { - %call.i.i = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*)) - %1 = icmp ne i32 %call.i.i, 0 - br i1 %1, label %2, label %4 - -2: ; preds = %0 - %3 = call float @llvm.nvvm.lg2.approx.ftz.f(float %a) - br label %__nv_fast_log2f.exit - -4: ; preds = %0 - %5 = call float @llvm.nvvm.lg2.approx.f(float %a) - br label %__nv_fast_log2f.exit - -__nv_fast_log2f.exit: ; preds = %4, %2 - %retval.0.i.i = phi float [ %3, %2 ], [ %5, %4 ] - %6 = fmul float %b, %retval.0.i.i - %call.i.i1 = call i32 @__nvvm_reflect(i8* addrspacecast (i8 addrspace(1)* getelementptr inbounds ([11 x i8], [11 x i8] addrspace(1)* @"$str", i32 0, i32 0) to i8*)) - %7 = icmp ne i32 %call.i.i1, 0 - br i1 %7, label %8, label %10 - -8: ; preds = %__nv_fast_log2f.exit - %9 = call float @llvm.nvvm.ex2.approx.ftz.f(float %6) - br label %__nv_exp2f.exit - -10: ; preds = %__nv_fast_log2f.exit - %11 = call float @llvm.nvvm.ex2.approx.f(float %6) - br label %__nv_exp2f.exit - -__nv_exp2f.exit: ; preds = %10, %8 - %retval.0.i.i2 = phi float [ %9, %8 ], [ %11, %10 ] - ret float %retval.0.i.i2 -} - -; Function Attrs: nounwind readnone -declare float @llvm.nvvm.ex2.approx.ftz.f(float) #3 - -; Function Attrs: nounwind readnone -declare float @llvm.nvvm.ex2.approx.f(float) #3 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind } -attributes #3 = { nounwind readnone } -attributes #4 = { alwaysinline convergent inlinehint nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !6, !5, !7, !7, !7, !7, !8, !8, !7} -!llvm.ident = !{!9} -!nvvmir.version = !{!10} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (float*, float*, float*, float*, i32, i32)* @_Z22bpnn_layerforward_CUDAPfS_S_S_ii, !"kernel", i32 1} -!4 = !{void (float*, i32, float*, i32, float*, float*)* @_Z24bpnn_adjust_weights_cudaPfiS_iS_S_, !"kernel", i32 1} -!5 = !{null, !"align", i32 8} -!6 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!7 = !{null, !"align", i32 16} -!8 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!9 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!10 = !{i32 1, i32 4} diff --git a/examples/backprop/backprop_cuda-host-x86_64-unknown-linux-gnu.ll b/examples/backprop/backprop_cuda-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 6c7daea..0000000 --- a/examples/backprop/backprop_cuda-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,894 +0,0 @@ -; ModuleID = 'backprop_cuda-host-x86_64-unknown-linux-gnu.bc' -source_filename = "backprop_cuda.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque -%struct.timeval = type { i64, i64 } -%struct.timezone = type { i32, i32 } -%struct.BPNN = type { i32, i32, i32, float*, float*, float*, float*, float*, float*, float**, float**, float**, float** } - -$_ZN4dim3C2Ejjj = comdat any - -$_ZSt3expf = comdat any - -@num_threads = dso_local global i32 0, align 4 -@num_blocks = dso_local global i32 0, align 4 -@.str = private unnamed_addr constant [28 x i8] c"Performing GPU computation\0A\00", align 1 -@.str.1 = private unnamed_addr constant [23 x i8] c"bpnn kernel error: %s\0A\00", align 1 -@.str.2 = private unnamed_addr constant [4 x i8] c"%f \00", align 1 -@.str.3 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 -@0 = private unnamed_addr constant [37 x i8] c"_Z22bpnn_layerforward_CUDAPfS_S_S_ii\00", align 1 -@1 = private unnamed_addr constant [39 x i8] c"_Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00", align 1 -@2 = private constant [26889 x i8] c"P\EDU\BA\01\00\10\00\F8h\00\00\00\00\00\00\02\00\01\01@\00\00\00xY\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\D0X\00\00\00\00\00\00\10U\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0F\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.info._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.shared._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.global\00.nv.global.init\00.nv.constant2._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.constant0._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.text._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.nv.info._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.nv.shared._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.nv.constant0._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.text._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.info._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.shared._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00.nv.global\00blockIdx\00threadIdx\00.nv.global.init\00$str\00.nv.constant2._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00__ocg_const\00.nv.constant0._Z24bpnn_adjust_weights_cudaPfiS_iS_S_\00_param\00_Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.text._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.nv.info._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00.nv.shared._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00$___ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE10input_node__186\00$___ZZ22bpnn_layerforward_CUDAPfS_S_S_iiE13weight_matrix__188\00.nv.constant0._Z22bpnn_layerforward_CUDAPfS_S_S_ii\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00Y\00\00\00\03\00\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\E8\00\00\00\03\00\0D\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\F3\00\00\00\01\00\0D\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\FC\00\00\00\01\00\0D\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\06\01\00\00\03\00\0C\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\16\01\00\00\01\00\0C\00\00\00\00\00\00\00\00\00\0B\00\00\00\00\00\00\00\1B\01\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\\\01\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\BD\01\00\00\03\00\0B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\16\02\00\00\03\00\0E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\BF\02\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\0A\00\00\00\00\00\00\00\00\00\80\1D\00\00\00\00\00\00\98\01\00\00\12\10\0B\00\00\00\00\00\00\00\00\00\80,\00\00\00\00\00\00\04/\08\00\0D\00\00\00\10\00\00\00\04#\08\00\0D\00\00\00\00\00\00\00\04\12\08\00\0D\00\00\00X\00\00\00\04\11\08\00\0D\00\00\00X\00\00\00\04/\08\00\0C\00\00\00\10\00\00\00\04#\08\00\0C\00\00\00\00\00\00\00\04\12\08\00\0C\00\00\00H\00\00\00\04\11\08\00\0C\00\00\00H\00\00\00\010\00\00\01*\00\00\04\0A\08\00\08\00\00\00@\010\00\03\190\00\04\17\0C\00\00\00\00\00\05\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0\11\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00X\05\00\00\04\1C\04\00p\1D\00\00\04\1E\04\000\00\00\00\010\00\00\01*\00\00\04\0A\08\00\0B\00\00\00@\01(\00\03\19(\00\04\17\0C\00\00\00\00\00\05\00$\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00H\05\00\00\04\1C\04\008,\00\00\04\1E\04\00@\00\00\00333333\D3?\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveB\A5\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F0\1Evisible .entry _Z22bpnn_layerforward_CUDAPfS_\02\00&ii\AD\04\00\A7\00\0F2\00\11\0E\9B\04\0F:\00\1C\1F1:\00&\1F2:\00&\07e\04\00a\01\0F:\00\18\1F4:\00&\1F5\8D\04\13?6[8^\0B\16\95pred %p<5\8E\04\10f/\02\\f<20>\B1\04\1D7\B2\04\108%\00`\0A\09.shaH\00\03\93\00\124\93\00\1FZ\D9\00\10\FF\02E10input_node[64]O\00-\F0\033weight_matrix[102T\00\0FU\05\08\1F6U\05\1C\0F\AF\01\19\0F\EE\04\00\0FB\00\1B\0F\85\05\01\1F4C\00\1B\1F3\C8\05\01\0FC\00\1B\0Fb\05\01\0F\0B\01\1C\0FP\05\01\0F\0C\01\1C#0]\AA\01#tor\15\04I\00\115\04\05\04f\0B\0A\1C\00\116\1C\00\1F5;\00\05\147?\05\0F;\00\00\118\1C\00\1F7;\00\05\149\A7\05\0F;\00\00!10\1D\00\1F9<\00\05$11\FA\05\0F=\00\01\122\1D\00\0B\EC\05\03\02\06?d12\04\06\03*10\18\00\03\05\06*d8\17\00\134v\06\1A63\06\1F4m\10\02\1F5I\06\03\8B%ctaid.y-\00\02\A1\00\09L\0B\9A4, %tid.x+\00\126\85\00\184+\00\135+\00\0BV\00\126\DF\00\115@\02\02*\00%6,\9E\00q;\0Aadd.s\17\00\227,\1C\00\171+\00%8,\9C\00\83;\0Amul.lo.\00$9,3\00\00!\01#hl\E6\04\02\BA\01G9, 4F\00\00\AF\01\04\8D\00\0BG\00%12H\00(11\8E\00513,O\00(12M\00%4,\05\01\091\00&5,7\00\194\1A\00%6, \00\197\19\00#7,\1F\00\0B\B8\01\136\CE\01\187x\00\09\0C\01\07\F2\00#9,\1E\00\08\F3\00(20\F3\00\06\A6\00521,4\00)20\1A\00#2, \00\0B\8D\00\137E\02\08\05\01(23\05\01rsetp.neI\003p1,!\00\F2\0C0;\0A@%p1 bra LBB6_2;\0Abra.uni\10\0021;\0A\08\00\11:Z\00\03?\03%3,1\03\01r\00\02\9F\00\15dx\01$72\00\01\031\00$5, \00\132\D1\00\03\19\00$6,Q\00\01'\00\01N\00\02\1F\07\01\11\02,rdc\00\1875\01\08c\00$8, \00\172\8E\06 rdI\01\0F)\07\1F\037\04\02'\07\05\92\05\02t\04)19\C4\00\02\B0\01\05\1D\00\02\0D\04\11f\9B\01\00\1D\00\00\9A\01+f1W\01\132W\01\B02:\0Abar.sync\8C\01\06c\01\00\EB\01\04d\04\08\00\01\06\D6\01\1A8\00\01424, \00\0Ac\01425,Q\00\01'\00\07c\01\222,\A4\00\1A5c\00\196\8B\03\06J\01?27,$\08#\0FM\01\03\02\B1\01*27\C7\00$9,\84\00\196\C7\00830,6\00\189\B2\00)31\EB\02\06\15\01432, \00\0A\15\01733,U\00)32\B4\01!33\B4\01,2;\9C\01\04o\00\1F4\84\02\04435, \00\0B\BD\00\186\BD\00)35N\00\1F7\BD\00\05\03.\00\1D7\BD\00'9,U\00'38\D2\01\223,\C2\00\1A9\BA\01/40\04\036\124\EE\02)40\AC\00\134i\01\0D\E1\03843,6\00\172\AC\00\124\AC\00#43\F1\05#rn\19\00\225,\CA\00:%f4\98\01\119\98\01\1A5\98\01\07V\12\1E4\19\05\021\07+24\80\03\133\80\03'3:1\05%5,5\00\01\FB\00\03\92\00\02\A9\01\11fn\01)25q\00\00\17\06\9A098907648z\00\03\C0\12\176\F1\00\04\E9\01\00\E8\01\A3lg2.approx\1D\00\00\ED\01#f7\A9\052gtu\17\003p2,{\00!f8\AB\05\162\AB\05\1B8\D4\00\134\D4\00\174\D4\00/33\D4\00\0A\02%\09\193P\08\00\C4\01\AB1073741824\C3\08\02\7F\08\08\97\01\04E\198f11\BD\05\04\BF\04(4]\16\00\04\8E\00\1F8\02\01\01!14G\00\1B2\0E\02\01<\06\01;\00\00#\00L;\0Aex8\00\01\1F\01#15\E0\00 zi\DD\00\02\1C\00\22r3=\00\0B\8A\09\138\87\0F\08\F2\08\193\F6\04\06\17\00%7,4\00\00\B6\0F\13m\B7\03\02\9D\03\02\83\03,37`\07#4,#\00\02`\07\164\B5\01\1B6\B5\01\135\B5\01\185\B5\01\189\8B\00\00\D5\004s64/\04\126y\00\199\E0\03/69\9A\059\127\16\05)69\E3\03471,\80\00\0A\DD\04772,6\00(71\DD\04/73\DD\04\04\127\AF\06\1D7\AF\06875,U\00\08\AF\06\02\FC\07\00#\00\08\A2\01(40\A2\014shr\18\00#1,\1E\00\1936\0A$42\18\00\00$\00\022\00\03\1A\00#3, \00\0A1\00#4,\95\01\00#\00\0D\91\01\117\04\02*44\DF\00$7,\1C\00\0B-\01\198-\01\1A7\1D\00(9,$\00\09\FC\00\138\FC\00\139,\0A\06 \03#9,\1C\01+%f}\08\2275\9A\03\1B9u\02\136u\02\1A6~\08\09$\00\137$\00\177\99\02\194\22\05\07\22\01\02\03\01\1F5\7F\05\05/46\7F\05\04'8:,\02\1F4W\07\05\00Y\00\03 \00\1A6\D7\06\1F6\F7\029\124\B4\01\1A4\B1\01848,\1D\00\09\BB\07/49\DE\02\04450, \00\0A\DE\02751,U\00'50\E2\01\129\E1\01851]\08\0A\1F5\08\0A\04\1F5\08\0A\05454, \00\0B{\00$5,Q\00\01'\00\09*\02\125*\02\1D9\F3\08\03\E9\06\1F7L\0C\07#3,!\00\02\EC\04\163\EC\04\1C1T\02\139\F7\01\189\F7\01/56E\01\05$7, \00\0A\F7\01/58\F7\019\125u\03)58.\01860,\1D\00\08\E6\0D/28\9F\05\09\02[\0D\198\CA\00\126t\01\1D6\87\0A763,m\00\186\1E\09\2210\10\02)63\10\02564,e\10\08\94\00\189z\0E\06\BE\06\180\C8\0F\09V\0F\00\14\0B\028\00\00'\00\08\CE\03\025\0B\153\C9\00\0C\E2\00\01H\02*32\E2\00$6,\1C\00\0B\E2\00$7,\B7\00\01'\00\09v\02\2267\A0\04\0C(\02$10v\0E/0:*\17\0A\114\02\02radjust_\EB\01 s_A\18`PfiS_i\06\02\0D,\17\0F4\00\10\0F.\17\00/32<\00\1B/1,x\00'\1F2x\00(\1F3x\00(\1F4<\00(\0F8\17\14O7[728\17\1D\1C38\17,178\17\162\12\00\10fF\00Nfd<2E \1F5&'\0D\1F7F \19\03%\16\0F#\01\18\0F'\16\01\0FE\00\1D.4]3\17\0FD\00\1D\0F\B0\16\01\0FE\00\1E\0F\C9%\01\0FD\00\1D\0F\B3\16\01\0FE\00\17\0F\B5\16\F4\1A0\B4\16\0E\DA&\0F\E1\16\01\1A0-\00\03\E1\16\1F2\DF\16\1D\0F\B3\16\10/48\B3\16\16/52\B3\16\16/56\B3\16\06\1F8\B2\16\16/48\B2\162/56\B2\168/52\B2\16G\1F0\B2\16\08/48\B2\16\19/56\B2\16-/64\B2\16\08\0B\05\01$24\AF\16\0F\84\17\03\03\D4\14\0C\9D\16\1F0\9C\16\03/68\9C\16;\00\1F\093f64\1A\00#d1\EF\15\04\C4\10\02\8F\06#2,\1C\00h0d3FD3\01\00\09\BC\00\1F7Y\17\04\1F8\F6\16\05$9, \00\0B\F6\15\150\0F\17\03\B2\16\0D\F6\15\1E0\BD\00\133\F8\14\07\88\16\1B1p\0A\04%\16\1F2\B6\0B\04%23o\16\0B\92\00(4,.\17\1E3\B6\14.24\92\00\01\14\12\09$\01(25\12\0D\08a\00\03e\15\1E5a\00\03k\14\1F2\B0\01\00\135h\147fma\B0\01#6,}\00\0E\B0\01\01\1A\00\1C51\00&7,\E7\01\02#\00\196p\13\02'\00\01\DA\14*d7\22\18\146\D6\14\07\E4\00\1F7\C4\02\02/28\FD\17\05\03u\06\0D\07\02\046\17\1876\17\14f\D0\14\01U\15\0F\07\02\00\01\8C\14\1C6\C4\02#9,\1C\00\0F\C4\02\0B/31\C4\02\03/326\17\05\03\8C\17\1D2\BD\00\154\BE\17\01'\00\0CK\15\00\22\00\0F\81\03\00\130J\15\07\93\00\1F5\C5\02\03\1F3{\0E\05%37b\17\0B\93\00(8,\E9\17\177\93\00\03\A1\11/38\93\00\00\131\C1\15\0AQ\01\01'\06\02\1E\00\0F\17\04\01\0Aa\02\02\01\15\03\85\01\02\E4\00\01\F4\07\0Dd\02\03\22\00\1A3;\17\1F8\E0\0F\0B\195\B9\1C\0F,\1C\00\1D5,\1C97_3\B7\0D\137,\1C87_1U\17\196a\06\09Z\00\02\1C\06\146Z\00\1F2Z\00\07\132Z\00\182\86\1C/39%\03\02/40%\03\04\05\BA\18\0D\A4\18'2,W\19/41\86\0F\00/42\D6\01\00\02\B0\16\09\B7\1B(43j\02\08d\00)4,\0F\19\08d\00\03N\06\1F4\CD\02\01\175[\17\04d\00\0F\01\05\05\02\87\13\05/\13\0E\90\17\00#\00\0Fd\00\00\146x\17\0Br\02$7,\83\00\0F\06\05\04-164\00$8,\1B\01\0F4\00\05\1F7\B4\02\00\03\DD\02\0B\BF\14\124\1A\05)13\FE\00\0F\1B\05\03/48\F6\01\05\03\D6\01\0D\1B\05)50\D9\13\08\1B\05\1310\06\1F5\1C\05\00\03o\15\09s\08/51\F6\01\04\04m\13\1F1d\00\00\135d\00\0FZ\02\00$20\D2\18\0BF\08$1,\1F\00\0F1\04\0F\01{\09\02\B0\00\0F\8B\01\04/21\8B\01\01\02\FB\06*22 \14$2]I\19\09\99\03\133\99\03\B03:\0Aret;\0A\0A}\0A\00\00\00\00\00\00\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([26889 x i8], [26889 x i8]* @2, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z22bpnn_layerforward_CUDAPfS_S_S_ii(float* %input_cuda, float* %output_hidden_cuda, float* %input_hidden_cuda, float* %hidden_partial_sum, i32 %in, i32 %hid) #0 { -entry: - %input_cuda.addr = alloca float*, align 8 - %output_hidden_cuda.addr = alloca float*, align 8 - %input_hidden_cuda.addr = alloca float*, align 8 - %hidden_partial_sum.addr = alloca float*, align 8 - %in.addr = alloca i32, align 4 - %hid.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store float* %input_cuda, float** %input_cuda.addr, align 8 - store float* %output_hidden_cuda, float** %output_hidden_cuda.addr, align 8 - store float* %input_hidden_cuda, float** %input_hidden_cuda.addr, align 8 - store float* %hidden_partial_sum, float** %hidden_partial_sum.addr, align 8 - store i32 %in, i32* %in.addr, align 4 - store i32 %hid, i32* %hid.addr, align 4 - %kernel_args = alloca i8*, i64 6, align 16 - %0 = bitcast float** %input_cuda.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast float** %output_hidden_cuda.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast float** %input_hidden_cuda.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast float** %hidden_partial_sum.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %in.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32* %hid.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %13 = load i64, i64* %shmem_size, align 8 - %14 = load i8*, i8** %stream, align 8 - %15 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %16 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %22 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false) - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %24 = load i64, i64* %23, align 8 - %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %26 = load i32, i32* %25, align 8 - %27 = bitcast i8* %14 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, float*, float*, float*, i32, i32)* @_Z22bpnn_layerforward_CUDAPfS_S_S_ii to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z24bpnn_adjust_weights_cudaPfiS_iS_S_(float* %delta, i32 %hid, float* %ly, i32 %in, float* %w, float* %oldw) #0 { -entry: - %delta.addr = alloca float*, align 8 - %hid.addr = alloca i32, align 4 - %ly.addr = alloca float*, align 8 - %in.addr = alloca i32, align 4 - %w.addr = alloca float*, align 8 - %oldw.addr = alloca float*, align 8 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store float* %delta, float** %delta.addr, align 8 - store i32 %hid, i32* %hid.addr, align 4 - store float* %ly, float** %ly.addr, align 8 - store i32 %in, i32* %in.addr, align 4 - store float* %w, float** %w.addr, align 8 - store float* %oldw, float** %oldw.addr, align 8 - %kernel_args = alloca i8*, i64 6, align 16 - %0 = bitcast float** %delta.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32* %hid.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast float** %ly.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32* %in.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast float** %w.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast float** %oldw.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %13 = load i64, i64* %shmem_size, align 8 - %14 = load i8*, i8** %stream, align 8 - %15 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %16 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %22 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false) - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %24 = load i64, i64* %23, align 8 - %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %26 = load i32, i32* %25, align 8 - %27 = bitcast i8* %14 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, i32, float*, i32, float*, float*)* @_Z24bpnn_adjust_weights_cudaPfiS_iS_S_ to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local double @_Z7gettimev() #2 { -entry: - %t = alloca %struct.timeval, align 8 - %call = call i32 @gettimeofday(%struct.timeval* %t, %struct.timezone* null) #7 - %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %t, i32 0, i32 0 - %0 = load i64, i64* %tv_sec, align 8 - %conv = sitofp i64 %0 to double - %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %t, i32 0, i32 1 - %1 = load i64, i64* %tv_usec, align 8 - %conv1 = sitofp i64 %1 to double - %mul = fmul contract double %conv1, 0x3EB0C6F7A0B5ED8D - %add = fadd contract double %conv, %mul - ret double %add -} - -; Function Attrs: nounwind -declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #3 - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #4 { -entry: - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call i32 @cudaSetDevice(i32 0) - %0 = load i32, i32* %argc.addr, align 4 - %1 = load i8**, i8*** %argv.addr, align 8 - %call1 = call i32 @setup(i32 %0, i8** %1) - ret i32 0 -} - -declare dso_local i32 @cudaSetDevice(i32) #5 - -declare dso_local i32 @setup(i32, i8**) #5 - -; Function Attrs: noinline optnone uwtable -define dso_local void @bpnn_train_cuda(%struct.BPNN* %net, float* %eo, float* %eh) #0 { -entry: - %net.addr = alloca %struct.BPNN*, align 8 - %eo.addr = alloca float*, align 8 - %eh.addr = alloca float*, align 8 - %in = alloca i32, align 4 - %hid = alloca i32, align 4 - %out = alloca i32, align 4 - %out_err = alloca float, align 4 - %hid_err = alloca float, align 4 - %m = alloca i32, align 4 - %input_hidden_cuda = alloca float*, align 8 - %input_cuda = alloca float*, align 8 - %output_hidden_cuda = alloca float*, align 8 - %partial_sum = alloca float*, align 8 - %hidden_partial_sum = alloca float*, align 8 - %hidden_delta_cuda = alloca float*, align 8 - %input_prev_weights_cuda = alloca float*, align 8 - %sum = alloca float, align 4 - %input_weights_one_dim = alloca float*, align 8 - %input_weights_prev_one_dim = alloca float*, align 8 - %grid = alloca %struct.dim3, align 4 - %threads = alloca %struct.dim3, align 4 - %k = alloca i32, align 4 - %j = alloca i32, align 4 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp59 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp59.coerce = alloca { i64, i32 }, align 4 - %error = alloca i32, align 4 - %j70 = alloca i32, align 4 - %k74 = alloca i32, align 4 - %agg.tmp136 = alloca %struct.dim3, align 4 - %agg.tmp137 = alloca %struct.dim3, align 4 - %agg.tmp136.coerce = alloca { i64, i32 }, align 4 - %agg.tmp137.coerce = alloca { i64, i32 }, align 4 - %i = alloca i32, align 4 - store %struct.BPNN* %net, %struct.BPNN** %net.addr, align 8 - store float* %eo, float** %eo.addr, align 8 - store float* %eh, float** %eh.addr, align 8 - %0 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %input_n = getelementptr inbounds %struct.BPNN, %struct.BPNN* %0, i32 0, i32 0 - %1 = load i32, i32* %input_n, align 8 - store i32 %1, i32* %in, align 4 - %2 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_n = getelementptr inbounds %struct.BPNN, %struct.BPNN* %2, i32 0, i32 1 - %3 = load i32, i32* %hidden_n, align 4 - store i32 %3, i32* %hid, align 4 - %4 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %output_n = getelementptr inbounds %struct.BPNN, %struct.BPNN* %4, i32 0, i32 2 - %5 = load i32, i32* %output_n, align 8 - store i32 %5, i32* %out, align 4 - store i32 0, i32* %m, align 4 - %6 = load i32, i32* %in, align 4 - %div = sdiv i32 %6, 16 - store i32 %div, i32* @num_blocks, align 4 - %7 = load i32, i32* @num_blocks, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid, i32 1, i32 %7, i32 1) - call void @_ZN4dim3C2Ejjj(%struct.dim3* %threads, i32 16, i32 16, i32 1) - %8 = load i32, i32* %in, align 4 - %add = add nsw i32 %8, 1 - %9 = load i32, i32* %hid, align 4 - %add1 = add nsw i32 %9, 1 - %mul = mul nsw i32 %add, %add1 - %conv = sext i32 %mul to i64 - %mul2 = mul i64 %conv, 4 - %call = call noalias i8* @malloc(i64 %mul2) #7 - %10 = bitcast i8* %call to float* - store float* %10, float** %input_weights_one_dim, align 8 - %11 = load i32, i32* %in, align 4 - %add3 = add nsw i32 %11, 1 - %12 = load i32, i32* %hid, align 4 - %add4 = add nsw i32 %12, 1 - %mul5 = mul nsw i32 %add3, %add4 - %conv6 = sext i32 %mul5 to i64 - %mul7 = mul i64 %conv6, 4 - %call8 = call noalias i8* @malloc(i64 %mul7) #7 - %13 = bitcast i8* %call8 to float* - store float* %13, float** %input_weights_prev_one_dim, align 8 - %14 = load i32, i32* @num_blocks, align 4 - %mul9 = mul i32 %14, 16 - %conv10 = zext i32 %mul9 to i64 - %mul11 = mul i64 %conv10, 4 - %call12 = call noalias i8* @malloc(i64 %mul11) #7 - %15 = bitcast i8* %call12 to float* - store float* %15, float** %partial_sum, align 8 - store i32 0, i32* %k, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc27, %entry - %16 = load i32, i32* %k, align 4 - %17 = load i32, i32* %in, align 4 - %cmp = icmp sle i32 %16, %17 - br i1 %cmp, label %for.body, label %for.end29 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond13 - -for.cond13: ; preds = %for.inc, %for.body - %18 = load i32, i32* %j, align 4 - %19 = load i32, i32* %hid, align 4 - %cmp14 = icmp sle i32 %18, %19 - br i1 %cmp14, label %for.body15, label %for.end - -for.body15: ; preds = %for.cond13 - %20 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %input_weights = getelementptr inbounds %struct.BPNN, %struct.BPNN* %20, i32 0, i32 9 - %21 = load float**, float*** %input_weights, align 8 - %22 = load i32, i32* %k, align 4 - %idxprom = sext i32 %22 to i64 - %arrayidx = getelementptr inbounds float*, float** %21, i64 %idxprom - %23 = load float*, float** %arrayidx, align 8 - %24 = load i32, i32* %j, align 4 - %idxprom16 = sext i32 %24 to i64 - %arrayidx17 = getelementptr inbounds float, float* %23, i64 %idxprom16 - %25 = load float, float* %arrayidx17, align 4 - %26 = load float*, float** %input_weights_one_dim, align 8 - %27 = load i32, i32* %m, align 4 - %idxprom18 = sext i32 %27 to i64 - %arrayidx19 = getelementptr inbounds float, float* %26, i64 %idxprom18 - store float %25, float* %arrayidx19, align 4 - %28 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %input_prev_weights = getelementptr inbounds %struct.BPNN, %struct.BPNN* %28, i32 0, i32 11 - %29 = load float**, float*** %input_prev_weights, align 8 - %30 = load i32, i32* %k, align 4 - %idxprom20 = sext i32 %30 to i64 - %arrayidx21 = getelementptr inbounds float*, float** %29, i64 %idxprom20 - %31 = load float*, float** %arrayidx21, align 8 - %32 = load i32, i32* %j, align 4 - %idxprom22 = sext i32 %32 to i64 - %arrayidx23 = getelementptr inbounds float, float* %31, i64 %idxprom22 - %33 = load float, float* %arrayidx23, align 4 - %34 = load float*, float** %input_weights_prev_one_dim, align 8 - %35 = load i32, i32* %m, align 4 - %idxprom24 = sext i32 %35 to i64 - %arrayidx25 = getelementptr inbounds float, float* %34, i64 %idxprom24 - store float %33, float* %arrayidx25, align 4 - %36 = load i32, i32* %m, align 4 - %inc = add nsw i32 %36, 1 - store i32 %inc, i32* %m, align 4 - br label %for.inc - -for.inc: ; preds = %for.body15 - %37 = load i32, i32* %j, align 4 - %inc26 = add nsw i32 %37, 1 - store i32 %inc26, i32* %j, align 4 - br label %for.cond13 - -for.end: ; preds = %for.cond13 - br label %for.inc27 - -for.inc27: ; preds = %for.end - %38 = load i32, i32* %k, align 4 - %inc28 = add nsw i32 %38, 1 - store i32 %inc28, i32* %k, align 4 - br label %for.cond - -for.end29: ; preds = %for.cond - %39 = bitcast float** %input_cuda to i8** - %40 = load i32, i32* %in, align 4 - %add30 = add nsw i32 %40, 1 - %conv31 = sext i32 %add30 to i64 - %mul32 = mul i64 %conv31, 4 - %call33 = call i32 @cudaMalloc(i8** %39, i64 %mul32) - %41 = bitcast float** %output_hidden_cuda to i8** - %42 = load i32, i32* %hid, align 4 - %add34 = add nsw i32 %42, 1 - %conv35 = sext i32 %add34 to i64 - %mul36 = mul i64 %conv35, 4 - %call37 = call i32 @cudaMalloc(i8** %41, i64 %mul36) - %43 = bitcast float** %input_hidden_cuda to i8** - %44 = load i32, i32* %in, align 4 - %add38 = add nsw i32 %44, 1 - %45 = load i32, i32* %hid, align 4 - %add39 = add nsw i32 %45, 1 - %mul40 = mul nsw i32 %add38, %add39 - %conv41 = sext i32 %mul40 to i64 - %mul42 = mul i64 %conv41, 4 - %call43 = call i32 @cudaMalloc(i8** %43, i64 %mul42) - %46 = bitcast float** %hidden_partial_sum to i8** - %47 = load i32, i32* @num_blocks, align 4 - %mul44 = mul i32 %47, 16 - %conv45 = zext i32 %mul44 to i64 - %mul46 = mul i64 %conv45, 4 - %call47 = call i32 @cudaMalloc(i8** %46, i64 %mul46) - %call48 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str, i64 0, i64 0)) - %48 = load float*, float** %input_cuda, align 8 - %49 = bitcast float* %48 to i8* - %50 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %input_units = getelementptr inbounds %struct.BPNN, %struct.BPNN* %50, i32 0, i32 3 - %51 = load float*, float** %input_units, align 8 - %52 = bitcast float* %51 to i8* - %53 = load i32, i32* %in, align 4 - %add49 = add nsw i32 %53, 1 - %conv50 = sext i32 %add49 to i64 - %mul51 = mul i64 %conv50, 4 - %call52 = call i32 @cudaMemcpy(i8* %49, i8* %52, i64 %mul51, i32 1) - %54 = load float*, float** %input_hidden_cuda, align 8 - %55 = bitcast float* %54 to i8* - %56 = load float*, float** %input_weights_one_dim, align 8 - %57 = bitcast float* %56 to i8* - %58 = load i32, i32* %in, align 4 - %add53 = add nsw i32 %58, 1 - %59 = load i32, i32* %hid, align 4 - %add54 = add nsw i32 %59, 1 - %mul55 = mul nsw i32 %add53, %add54 - %conv56 = sext i32 %mul55 to i64 - %mul57 = mul i64 %conv56, 4 - %call58 = call i32 @cudaMemcpy(i8* %55, i8* %57, i64 %mul57, i32 1) - %60 = bitcast %struct.dim3* %agg.tmp to i8* - %61 = bitcast %struct.dim3* %grid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %60, i8* align 4 %61, i64 12, i1 false) - %62 = bitcast %struct.dim3* %agg.tmp59 to i8* - %63 = bitcast %struct.dim3* %threads to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %62, i8* align 4 %63, i64 12, i1 false) - %64 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %65 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %64, i8* align 4 %65, i64 12, i1 false) - %66 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %67 = load i64, i64* %66, align 4 - %68 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %69 = load i32, i32* %68, align 4 - %70 = bitcast { i64, i32 }* %agg.tmp59.coerce to i8* - %71 = bitcast %struct.dim3* %agg.tmp59 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %70, i8* align 4 %71, i64 12, i1 false) - %72 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp59.coerce, i32 0, i32 0 - %73 = load i64, i64* %72, align 4 - %74 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp59.coerce, i32 0, i32 1 - %75 = load i32, i32* %74, align 4 - %call60 = call i32 @__cudaPushCallConfiguration(i64 %67, i32 %69, i64 %73, i32 %75, i64 0, i8* null) - %tobool = icmp ne i32 %call60, 0 - br i1 %tobool, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %for.end29 - %76 = load float*, float** %input_cuda, align 8 - %77 = load float*, float** %output_hidden_cuda, align 8 - %78 = load float*, float** %input_hidden_cuda, align 8 - %79 = load float*, float** %hidden_partial_sum, align 8 - %80 = load i32, i32* %in, align 4 - %81 = load i32, i32* %hid, align 4 - call void @_Z22bpnn_layerforward_CUDAPfS_S_S_ii(float* %76, float* %77, float* %78, float* %79, i32 %80, i32 %81) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %for.end29 - %call61 = call i32 @cudaThreadSynchronize() - %call62 = call i32 @cudaGetLastError() - store i32 %call62, i32* %error, align 4 - %82 = load i32, i32* %error, align 4 - %cmp63 = icmp ne i32 %82, 0 - br i1 %cmp63, label %if.then, label %if.end - -if.then: ; preds = %kcall.end - %83 = load i32, i32* %error, align 4 - %call64 = call i8* @cudaGetErrorString(i32 %83) - %call65 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.1, i64 0, i64 0), i8* %call64) - call void @exit(i32 1) #8 - unreachable - -if.end: ; preds = %kcall.end - %84 = load float*, float** %partial_sum, align 8 - %85 = bitcast float* %84 to i8* - %86 = load float*, float** %hidden_partial_sum, align 8 - %87 = bitcast float* %86 to i8* - %88 = load i32, i32* @num_blocks, align 4 - %mul66 = mul i32 %88, 16 - %conv67 = zext i32 %mul66 to i64 - %mul68 = mul i64 %conv67, 4 - %call69 = call i32 @cudaMemcpy(i8* %85, i8* %87, i64 %mul68, i32 2) - store i32 1, i32* %j70, align 4 - br label %for.cond71 - -for.cond71: ; preds = %for.inc98, %if.end - %89 = load i32, i32* %j70, align 4 - %90 = load i32, i32* %hid, align 4 - %cmp72 = icmp sle i32 %89, %90 - br i1 %cmp72, label %for.body73, label %for.end100 - -for.body73: ; preds = %for.cond71 - store float 0.000000e+00, float* %sum, align 4 - store i32 0, i32* %k74, align 4 - br label %for.cond75 - -for.cond75: ; preds = %for.inc83, %for.body73 - %91 = load i32, i32* %k74, align 4 - %92 = load i32, i32* @num_blocks, align 4 - %cmp76 = icmp ult i32 %91, %92 - br i1 %cmp76, label %for.body77, label %for.end85 - -for.body77: ; preds = %for.cond75 - %93 = load float*, float** %partial_sum, align 8 - %94 = load i32, i32* %k74, align 4 - %95 = load i32, i32* %hid, align 4 - %mul78 = mul nsw i32 %94, %95 - %96 = load i32, i32* %j70, align 4 - %add79 = add nsw i32 %mul78, %96 - %sub = sub nsw i32 %add79, 1 - %idxprom80 = sext i32 %sub to i64 - %arrayidx81 = getelementptr inbounds float, float* %93, i64 %idxprom80 - %97 = load float, float* %arrayidx81, align 4 - %98 = load float, float* %sum, align 4 - %add82 = fadd contract float %98, %97 - store float %add82, float* %sum, align 4 - br label %for.inc83 - -for.inc83: ; preds = %for.body77 - %99 = load i32, i32* %k74, align 4 - %inc84 = add nsw i32 %99, 1 - store i32 %inc84, i32* %k74, align 4 - br label %for.cond75 - -for.end85: ; preds = %for.cond75 - %100 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %input_weights86 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %100, i32 0, i32 9 - %101 = load float**, float*** %input_weights86, align 8 - %arrayidx87 = getelementptr inbounds float*, float** %101, i64 0 - %102 = load float*, float** %arrayidx87, align 8 - %103 = load i32, i32* %j70, align 4 - %idxprom88 = sext i32 %103 to i64 - %arrayidx89 = getelementptr inbounds float, float* %102, i64 %idxprom88 - %104 = load float, float* %arrayidx89, align 4 - %105 = load float, float* %sum, align 4 - %add90 = fadd contract float %105, %104 - store float %add90, float* %sum, align 4 - %106 = load float, float* %sum, align 4 - %fneg = fneg float %106 - %call91 = call float @_ZSt3expf(float %fneg) - %conv92 = fpext float %call91 to double - %add93 = fadd contract double 1.000000e+00, %conv92 - %div94 = fdiv double 1.000000e+00, %add93 - %conv95 = fptrunc double %div94 to float - %107 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_units = getelementptr inbounds %struct.BPNN, %struct.BPNN* %107, i32 0, i32 4 - %108 = load float*, float** %hidden_units, align 8 - %109 = load i32, i32* %j70, align 4 - %idxprom96 = sext i32 %109 to i64 - %arrayidx97 = getelementptr inbounds float, float* %108, i64 %idxprom96 - store float %conv95, float* %arrayidx97, align 4 - br label %for.inc98 - -for.inc98: ; preds = %for.end85 - %110 = load i32, i32* %j70, align 4 - %inc99 = add nsw i32 %110, 1 - store i32 %inc99, i32* %j70, align 4 - br label %for.cond71 - -for.end100: ; preds = %for.cond71 - %111 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_units101 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %111, i32 0, i32 4 - %112 = load float*, float** %hidden_units101, align 8 - %113 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %output_units = getelementptr inbounds %struct.BPNN, %struct.BPNN* %113, i32 0, i32 5 - %114 = load float*, float** %output_units, align 8 - %115 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_weights = getelementptr inbounds %struct.BPNN, %struct.BPNN* %115, i32 0, i32 10 - %116 = load float**, float*** %hidden_weights, align 8 - %117 = load i32, i32* %hid, align 4 - %118 = load i32, i32* %out, align 4 - call void @bpnn_layerforward(float* %112, float* %114, float** %116, i32 %117, i32 %118) - %119 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %output_delta = getelementptr inbounds %struct.BPNN, %struct.BPNN* %119, i32 0, i32 7 - %120 = load float*, float** %output_delta, align 8 - %121 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %target = getelementptr inbounds %struct.BPNN, %struct.BPNN* %121, i32 0, i32 8 - %122 = load float*, float** %target, align 8 - %123 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %output_units102 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %123, i32 0, i32 5 - %124 = load float*, float** %output_units102, align 8 - %125 = load i32, i32* %out, align 4 - call void @bpnn_output_error(float* %120, float* %122, float* %124, i32 %125, float* %out_err) - %126 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_delta = getelementptr inbounds %struct.BPNN, %struct.BPNN* %126, i32 0, i32 6 - %127 = load float*, float** %hidden_delta, align 8 - %128 = load i32, i32* %hid, align 4 - %129 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %output_delta103 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %129, i32 0, i32 7 - %130 = load float*, float** %output_delta103, align 8 - %131 = load i32, i32* %out, align 4 - %132 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_weights104 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %132, i32 0, i32 10 - %133 = load float**, float*** %hidden_weights104, align 8 - %134 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_units105 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %134, i32 0, i32 4 - %135 = load float*, float** %hidden_units105, align 8 - call void @bpnn_hidden_error(float* %127, i32 %128, float* %130, i32 %131, float** %133, float* %135, float* %hid_err) - %136 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %output_delta106 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %136, i32 0, i32 7 - %137 = load float*, float** %output_delta106, align 8 - %138 = load i32, i32* %out, align 4 - %139 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_units107 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %139, i32 0, i32 4 - %140 = load float*, float** %hidden_units107, align 8 - %141 = load i32, i32* %hid, align 4 - %142 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_weights108 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %142, i32 0, i32 10 - %143 = load float**, float*** %hidden_weights108, align 8 - %144 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_prev_weights = getelementptr inbounds %struct.BPNN, %struct.BPNN* %144, i32 0, i32 12 - %145 = load float**, float*** %hidden_prev_weights, align 8 - call void @bpnn_adjust_weights(float* %137, i32 %138, float* %140, i32 %141, float** %143, float** %145) - %146 = bitcast float** %hidden_delta_cuda to i8** - %147 = load i32, i32* %hid, align 4 - %add109 = add nsw i32 %147, 1 - %conv110 = sext i32 %add109 to i64 - %mul111 = mul i64 %conv110, 4 - %call112 = call i32 @cudaMalloc(i8** %146, i64 %mul111) - %148 = bitcast float** %input_prev_weights_cuda to i8** - %149 = load i32, i32* %in, align 4 - %add113 = add nsw i32 %149, 1 - %150 = load i32, i32* %hid, align 4 - %add114 = add nsw i32 %150, 1 - %mul115 = mul nsw i32 %add113, %add114 - %conv116 = sext i32 %mul115 to i64 - %mul117 = mul i64 %conv116, 4 - %call118 = call i32 @cudaMalloc(i8** %148, i64 %mul117) - %151 = load float*, float** %hidden_delta_cuda, align 8 - %152 = bitcast float* %151 to i8* - %153 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %hidden_delta119 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %153, i32 0, i32 6 - %154 = load float*, float** %hidden_delta119, align 8 - %155 = bitcast float* %154 to i8* - %156 = load i32, i32* %hid, align 4 - %add120 = add nsw i32 %156, 1 - %conv121 = sext i32 %add120 to i64 - %mul122 = mul i64 %conv121, 4 - %call123 = call i32 @cudaMemcpy(i8* %152, i8* %155, i64 %mul122, i32 1) - %157 = load float*, float** %input_prev_weights_cuda, align 8 - %158 = bitcast float* %157 to i8* - %159 = load float*, float** %input_weights_prev_one_dim, align 8 - %160 = bitcast float* %159 to i8* - %161 = load i32, i32* %in, align 4 - %add124 = add nsw i32 %161, 1 - %162 = load i32, i32* %hid, align 4 - %add125 = add nsw i32 %162, 1 - %mul126 = mul nsw i32 %add124, %add125 - %conv127 = sext i32 %mul126 to i64 - %mul128 = mul i64 %conv127, 4 - %call129 = call i32 @cudaMemcpy(i8* %158, i8* %160, i64 %mul128, i32 1) - %163 = load float*, float** %input_hidden_cuda, align 8 - %164 = bitcast float* %163 to i8* - %165 = load float*, float** %input_weights_one_dim, align 8 - %166 = bitcast float* %165 to i8* - %167 = load i32, i32* %in, align 4 - %add130 = add nsw i32 %167, 1 - %168 = load i32, i32* %hid, align 4 - %add131 = add nsw i32 %168, 1 - %mul132 = mul nsw i32 %add130, %add131 - %conv133 = sext i32 %mul132 to i64 - %mul134 = mul i64 %conv133, 4 - %call135 = call i32 @cudaMemcpy(i8* %164, i8* %166, i64 %mul134, i32 1) - %169 = bitcast %struct.dim3* %agg.tmp136 to i8* - %170 = bitcast %struct.dim3* %grid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %169, i8* align 4 %170, i64 12, i1 false) - %171 = bitcast %struct.dim3* %agg.tmp137 to i8* - %172 = bitcast %struct.dim3* %threads to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %171, i8* align 4 %172, i64 12, i1 false) - %173 = bitcast { i64, i32 }* %agg.tmp136.coerce to i8* - %174 = bitcast %struct.dim3* %agg.tmp136 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %173, i8* align 4 %174, i64 12, i1 false) - %175 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp136.coerce, i32 0, i32 0 - %176 = load i64, i64* %175, align 4 - %177 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp136.coerce, i32 0, i32 1 - %178 = load i32, i32* %177, align 4 - %179 = bitcast { i64, i32 }* %agg.tmp137.coerce to i8* - %180 = bitcast %struct.dim3* %agg.tmp137 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %179, i8* align 4 %180, i64 12, i1 false) - %181 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp137.coerce, i32 0, i32 0 - %182 = load i64, i64* %181, align 4 - %183 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp137.coerce, i32 0, i32 1 - %184 = load i32, i32* %183, align 4 - %call138 = call i32 @__cudaPushCallConfiguration(i64 %176, i32 %178, i64 %182, i32 %184, i64 0, i8* null) - %tobool139 = icmp ne i32 %call138, 0 - br i1 %tobool139, label %kcall.end141, label %kcall.configok140 - -kcall.configok140: ; preds = %for.end100 - %185 = load float*, float** %hidden_delta_cuda, align 8 - %186 = load i32, i32* %hid, align 4 - %187 = load float*, float** %input_cuda, align 8 - %188 = load i32, i32* %in, align 4 - %189 = load float*, float** %input_hidden_cuda, align 8 - %190 = load float*, float** %input_prev_weights_cuda, align 8 - call void @_Z24bpnn_adjust_weights_cudaPfiS_iS_S_(float* %185, i32 %186, float* %187, i32 %188, float* %189, float* %190) - br label %kcall.end141 - -kcall.end141: ; preds = %kcall.configok140, %for.end100 - %191 = load %struct.BPNN*, %struct.BPNN** %net.addr, align 8 - %input_units142 = getelementptr inbounds %struct.BPNN, %struct.BPNN* %191, i32 0, i32 3 - %192 = load float*, float** %input_units142, align 8 - %193 = bitcast float* %192 to i8* - %194 = load float*, float** %input_cuda, align 8 - %195 = bitcast float* %194 to i8* - %196 = load i32, i32* %in, align 4 - %add143 = add nsw i32 %196, 1 - %conv144 = sext i32 %add143 to i64 - %mul145 = mul i64 %conv144, 4 - %call146 = call i32 @cudaMemcpy(i8* %193, i8* %195, i64 %mul145, i32 2) - %197 = load float*, float** %input_weights_one_dim, align 8 - %198 = bitcast float* %197 to i8* - %199 = load float*, float** %input_hidden_cuda, align 8 - %200 = bitcast float* %199 to i8* - %201 = load i32, i32* %in, align 4 - %add147 = add nsw i32 %201, 1 - %202 = load i32, i32* %hid, align 4 - %add148 = add nsw i32 %202, 1 - %mul149 = mul nsw i32 %add147, %add148 - %conv150 = sext i32 %mul149 to i64 - %mul151 = mul i64 %conv150, 4 - %call152 = call i32 @cudaMemcpy(i8* %198, i8* %200, i64 %mul151, i32 2) - store i32 0, i32* %i, align 4 - br label %for.cond153 - -for.cond153: ; preds = %for.inc163, %kcall.end141 - %203 = load i32, i32* %i, align 4 - %204 = load i32, i32* %in, align 4 - %add154 = add nsw i32 %204, 1 - %205 = load i32, i32* %hid, align 4 - %add155 = add nsw i32 %205, 1 - %mul156 = mul nsw i32 %add154, %add155 - %cmp157 = icmp slt i32 %203, %mul156 - br i1 %cmp157, label %for.body158, label %for.end165 - -for.body158: ; preds = %for.cond153 - %206 = load float*, float** %input_weights_one_dim, align 8 - %207 = load i32, i32* %i, align 4 - %idxprom159 = sext i32 %207 to i64 - %arrayidx160 = getelementptr inbounds float, float* %206, i64 %idxprom159 - %208 = load float, float* %arrayidx160, align 4 - %conv161 = fpext float %208 to double - %call162 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), double %conv161) - br label %for.inc163 - -for.inc163: ; preds = %for.body158 - %209 = load i32, i32* %i, align 4 - %inc164 = add nsw i32 %209, 1 - store i32 %inc164, i32* %i, align 4 - br label %for.cond153 - -for.end165: ; preds = %for.cond153 - %call166 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i64 0, i64 0)) - %210 = load float*, float** %input_cuda, align 8 - %211 = bitcast float* %210 to i8* - %call167 = call i32 @cudaFree(i8* %211) - %212 = load float*, float** %output_hidden_cuda, align 8 - %213 = bitcast float* %212 to i8* - %call168 = call i32 @cudaFree(i8* %213) - %214 = load float*, float** %input_hidden_cuda, align 8 - %215 = bitcast float* %214 to i8* - %call169 = call i32 @cudaFree(i8* %215) - %216 = load float*, float** %hidden_partial_sum, align 8 - %217 = bitcast float* %216 to i8* - %call170 = call i32 @cudaFree(i8* %217) - %218 = load float*, float** %input_prev_weights_cuda, align 8 - %219 = bitcast float* %218 to i8* - %call171 = call i32 @cudaFree(i8* %219) - %220 = load float*, float** %hidden_delta_cuda, align 8 - %221 = bitcast float* %220 to i8* - %call172 = call i32 @cudaFree(i8* %221) - %222 = load float*, float** %partial_sum, align 8 - %223 = bitcast float* %222 to i8* - call void @free(i8* %223) #7 - %224 = load float*, float** %input_weights_one_dim, align 8 - %225 = bitcast float* %224 to i8* - call void @free(i8* %225) #7 - %226 = load float*, float** %input_weights_prev_one_dim, align 8 - %227 = bitcast float* %226 to i8* - call void @free(i8* %227) #7 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #2 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #3 - -declare dso_local i32 @cudaMalloc(i8**, i64) #5 - -declare dso_local i32 @printf(i8*, ...) #5 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #5 - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #5 - -declare dso_local i32 @cudaThreadSynchronize() #5 - -declare dso_local i32 @cudaGetLastError() #5 - -declare dso_local i8* @cudaGetErrorString(i32) #5 - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #6 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local float @_ZSt3expf(float %__x) #2 comdat { -entry: - %__x.addr = alloca float, align 4 - store float %__x, float* %__x.addr, align 4 - %0 = load float, float* %__x.addr, align 4 - %call = call float @expf(float %0) #7 - ret float %call -} - -declare dso_local void @bpnn_layerforward(float*, float*, float**, i32, i32) #5 - -declare dso_local void @bpnn_output_error(float*, float*, float*, i32, float*) #5 - -declare dso_local void @bpnn_hidden_error(float*, i32, float*, i32, float**, float*, float*) #5 - -declare dso_local void @bpnn_adjust_weights(float*, i32, float*, i32, float**, float**) #5 - -declare dso_local i32 @cudaFree(i8*) #5 - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #3 - -; Function Attrs: nounwind -declare dso_local float @expf(float) #3 - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, float*, float*, float*, i32, i32)* @_Z22bpnn_layerforward_CUDAPfS_S_S_ii to i8*), i8* getelementptr inbounds ([37 x i8], [37 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([37 x i8], [37 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %2 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, i32, float*, i32, float*, float*)* @_Z24bpnn_adjust_weights_cudaPfiS_iS_S_ to i8*), i8* getelementptr inbounds ([39 x i8], [39 x i8]* @1, i64 0, i64 0), i8* getelementptr inbounds ([39 x i8], [39 x i8]* @1, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { nounwind } -attributes #8 = { noreturn nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/backprop/backprop_cuda.cu b/examples/backprop/backprop_cuda.cu deleted file mode 100644 index 9641fe6..0000000 --- a/examples/backprop/backprop_cuda.cu +++ /dev/null @@ -1,195 +0,0 @@ -#include -#include -#include -#include -#include -#include - -// includes, kernels -#include "backprop.h" -#include "backprop_cuda_kernel.cu" - -//////////////////////////////////////////////////////////////////////////////// - -extern "C" void bpnn_layerforward(float *l1, float *l2, float **conn, int n1, - int n2); - -extern "C" void bpnn_output_error(float *delta, float *target, float *output, - int nj, float *err); - -extern "C" void bpnn_hidden_error(float *delta_h, int nh, float *delta_o, - int no, float **who, float *hidden, - float *err); - -extern "C" void bpnn_adjust_weights(float *delta, int ndelta, float *ly, - int nly, float **w, float **oldw); - -extern "C" int setup(int argc, char **argv); - -extern "C" float **alloc_2d_dbl(int m, int n); - -extern "C" float squash(float x); - -double gettime() { - struct timeval t; - gettimeofday(&t, NULL); - return t.tv_sec + t.tv_usec * 1e-6; -} - -unsigned int num_threads = 0; -unsigned int num_blocks = 0; - -//////////////////////////////////////////////////////////////////////////////// -// Program main -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { - cudaSetDevice(0); - setup(argc, argv); -} - -extern "C" void bpnn_train_cuda(BPNN *net, float *eo, float *eh) { - int in, hid, out; - float out_err, hid_err; - - in = net->input_n; - hid = net->hidden_n; - out = net->output_n; - -#ifdef GPU - int m = 0; - float *input_hidden_cuda; - float *input_cuda; - float *output_hidden_cuda; - float *partial_sum; - float *hidden_partial_sum; - float *hidden_delta_cuda; - float *input_prev_weights_cuda; - float sum; - float *input_weights_one_dim; - float *input_weights_prev_one_dim; - num_blocks = in / 16; - dim3 grid(1, num_blocks); - dim3 threads(16, 16); - - input_weights_one_dim = (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); - input_weights_prev_one_dim = - (float *)malloc((in + 1) * (hid + 1) * sizeof(float)); - partial_sum = (float *)malloc(num_blocks * WIDTH * sizeof(float)); - - // this preprocessing stage is added to correct the bugs of wrong memcopy - // using two-dimensional net->inputweights - for (int k = 0; k <= in; k++) { - for (int j = 0; j <= hid; j++) { - input_weights_one_dim[m] = net->input_weights[k][j]; - input_weights_prev_one_dim[m] = net->input_prev_weights[k][j]; - m++; - } - } - - cudaMalloc((void **)&input_cuda, (in + 1) * sizeof(float)); - cudaMalloc((void **)&output_hidden_cuda, (hid + 1) * sizeof(float)); - cudaMalloc((void **)&input_hidden_cuda, (in + 1) * (hid + 1) * sizeof(float)); - cudaMalloc((void **)&hidden_partial_sum, num_blocks * WIDTH * sizeof(float)); - -#endif - -#ifdef CPU - - printf("Performing CPU computation\n"); - bpnn_layerforward(net->input_units, net->hidden_units, net->input_weights, in, - hid); - -#endif - -#ifdef GPU - - printf("Performing GPU computation\n"); - - // printf("in= %d, hid = %d, numblocks = %d\n", in, hid, num_blocks); - - cudaMemcpy(input_cuda, net->input_units, (in + 1) * sizeof(float), - cudaMemcpyHostToDevice); - cudaMemcpy(input_hidden_cuda, input_weights_one_dim, - (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); - - bpnn_layerforward_CUDA<<>>(input_cuda, output_hidden_cuda, - input_hidden_cuda, - hidden_partial_sum, in, hid); - - cudaThreadSynchronize(); - - cudaError_t error = cudaGetLastError(); - if (error != cudaSuccess) { - printf("bpnn kernel error: %s\n", cudaGetErrorString(error)); - exit(EXIT_FAILURE); - } - - cudaMemcpy(partial_sum, hidden_partial_sum, - num_blocks * WIDTH * sizeof(float), cudaMemcpyDeviceToHost); - - for (int j = 1; j <= hid; j++) { - sum = 0.0; - for (int k = 0; k < num_blocks; k++) { - sum += partial_sum[k * hid + j - 1]; - } - sum += net->input_weights[0][j]; - net->hidden_units[j] = float(1.0 / (1.0 + exp(-sum))); - } -#endif - - bpnn_layerforward(net->hidden_units, net->output_units, net->hidden_weights, - hid, out); - bpnn_output_error(net->output_delta, net->target, net->output_units, out, - &out_err); - bpnn_hidden_error(net->hidden_delta, hid, net->output_delta, out, - net->hidden_weights, net->hidden_units, &hid_err); - bpnn_adjust_weights(net->output_delta, out, net->hidden_units, hid, - net->hidden_weights, net->hidden_prev_weights); - -#ifdef CPU - - bpnn_adjust_weights(net->hidden_delta, hid, net->input_units, in, - net->input_weights, net->input_prev_weights); - -#endif - -#ifdef GPU - - cudaMalloc((void **)&hidden_delta_cuda, (hid + 1) * sizeof(float)); - cudaMalloc((void **)&input_prev_weights_cuda, - (in + 1) * (hid + 1) * sizeof(float)); - - cudaMemcpy(hidden_delta_cuda, net->hidden_delta, (hid + 1) * sizeof(float), - cudaMemcpyHostToDevice); - cudaMemcpy(input_prev_weights_cuda, input_weights_prev_one_dim, - (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(input_hidden_cuda, input_weights_one_dim, - (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyHostToDevice); - - bpnn_adjust_weights_cuda<<>>(hidden_delta_cuda, hid, - input_cuda, in, input_hidden_cuda, - input_prev_weights_cuda); - - cudaMemcpy(net->input_units, input_cuda, (in + 1) * sizeof(float), - cudaMemcpyDeviceToHost); - cudaMemcpy(input_weights_one_dim, input_hidden_cuda, - (in + 1) * (hid + 1) * sizeof(float), cudaMemcpyDeviceToHost); - - for (int i = 0; i < (in + 1) * (hid + 1); i++) { - printf("%f ", input_weights_one_dim[i]); - } - printf("\n"); - - cudaFree(input_cuda); - cudaFree(output_hidden_cuda); - cudaFree(input_hidden_cuda); - cudaFree(hidden_partial_sum); - cudaFree(input_prev_weights_cuda); - cudaFree(hidden_delta_cuda); - - free(partial_sum); - free(input_weights_one_dim); - free(input_weights_prev_one_dim); - -#endif -} diff --git a/examples/backprop/backprop_cuda_kernel.cu b/examples/backprop/backprop_cuda_kernel.cu deleted file mode 100644 index 96b7d9b..0000000 --- a/examples/backprop/backprop_cuda_kernel.cu +++ /dev/null @@ -1,96 +0,0 @@ -#ifndef _BACKPROP_CUDA_KERNEL_H_ -#define _BACKPROP_CUDA_KERNEL_H_ - -#include "backprop.h" -#include "cuda.h" -#include "math.h" -#include - -__global__ void bpnn_layerforward_CUDA(float *input_cuda, - float *output_hidden_cuda, - float *input_hidden_cuda, - float *hidden_partial_sum, int in, - int hid) { - int by = blockIdx.y; - int tx = threadIdx.x; - int ty = threadIdx.y; - - int index = (hid + 1) * HEIGHT * by + (hid + 1) * ty + tx + 1 + (hid + 1); - - int index_in = HEIGHT * by + ty + 1; - - __shared__ float input_node[HEIGHT]; - __shared__ float weight_matrix[HEIGHT][WIDTH]; - - if (tx == 0) - input_node[ty] = input_cuda[index_in]; - - __syncthreads(); - - weight_matrix[ty][tx] = input_hidden_cuda[index]; - - __syncthreads(); - - weight_matrix[ty][tx] = weight_matrix[ty][tx] * input_node[ty]; - - __syncthreads(); - - for (int i = 1; i <= __log2f(HEIGHT); i++) { - - int power_two = __powf(2, i); - - if (ty % power_two == 0) - weight_matrix[ty][tx] = - weight_matrix[ty][tx] + weight_matrix[ty + power_two / 2][tx]; - - __syncthreads(); - } - - //__syncthreads(); - - input_hidden_cuda[index] = weight_matrix[ty][tx]; - - /* - for ( unsigned int i = 2 ; i <= HEIGHT ; i *= 2){ - - unsigned int power_two = i - 1; - if( (ty & power_two) == 0 ) { - weight_matrix[ty][tx] = weight_matrix[ty][tx] + - weight_matrix[ty + power_two/2][tx]; - } - } - */ - - __syncthreads(); - - if (tx == 0) { - hidden_partial_sum[by * hid + ty] = weight_matrix[tx][ty]; - } -} - -__global__ void bpnn_adjust_weights_cuda(float *delta, int hid, float *ly, - int in, float *w, float *oldw) { - - int by = blockIdx.y; - - int tx = threadIdx.x; - int ty = threadIdx.y; - - int index = (hid + 1) * HEIGHT * by + (hid + 1) * ty + tx + 1 + (hid + 1); - int index_y = HEIGHT * by + ty + 1; - int index_x = tx + 1; - // eta = 0.3; - // momentum = 0.3; - - w[index] += ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); - oldw[index] = - ((ETA * delta[index_x] * ly[index_y]) + (MOMENTUM * oldw[index])); - - __syncthreads(); - - if (ty == 0 && by == 0) { - w[index_x] += ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); - oldw[index_x] = ((ETA * delta[index_x]) + (MOMENTUM * oldw[index_x])); - } -} -#endif diff --git a/examples/backprop/facetrain.c b/examples/backprop/facetrain.c deleted file mode 100644 index 4f9aaab..0000000 --- a/examples/backprop/facetrain.c +++ /dev/null @@ -1,48 +0,0 @@ -#include "backprop.h" -#include -#include -#include - -extern char *strcpy(); -extern void exit(); - -int layer_size = 0; - -backprop_face() { - BPNN *net; - int i; - float out_err, hid_err; - net = bpnn_create(layer_size, 16, 1); // (16, 1 can not be changed) - - printf("Input layer size : %d\n", layer_size); - load(net); - // entering the training kernel, only one iteration - printf("Starting training kernel\n"); - bpnn_train_cuda(net, &out_err, &hid_err); - bpnn_free(net); - printf("Training done\n"); -} - -int setup(argc, argv) -int argc; -char *argv[]; -{ - - int seed; - - if (argc != 2) { - fprintf(stderr, "usage: backprop \n"); - exit(0); - } - layer_size = atoi(argv[1]); - if (layer_size % 16 != 0) { - fprintf(stderr, "The number of input points must be divided by 16\n"); - exit(0); - } - - seed = 7; - bpnn_initialize(seed); - backprop_face(); - - exit(0); -} diff --git a/examples/backprop/imagenet.c b/examples/backprop/imagenet.c deleted file mode 100644 index 807df38..0000000 --- a/examples/backprop/imagenet.c +++ /dev/null @@ -1,22 +0,0 @@ -#include "backprop.h" -#include -#include - -extern layer_size; - -load(net) BPNN *net; -{ - float *units; - int nr, nc, imgsize, i, j, k; - - nr = layer_size; - - imgsize = nr * nc; - units = net->input_units; - - k = 1; - for (i = 0; i < nr; i++) { - units[k] = (float)rand() / RAND_MAX; - k++; - } -} diff --git a/examples/backprop/run.sh b/examples/backprop/run.sh deleted file mode 100644 index 18083f5..0000000 --- a/examples/backprop/run.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -set -e -clang -c -emit-llvm backprop.c -clang -c -emit-llvm facetrain.c -clang -c -emit-llvm imagenet.c - -llvm-as backprop_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as backprop_cuda-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator backprop_cuda-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator backprop_cuda-host-x86_64-unknown-linux-gnu.bc host.bc - -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc -llc --relocation-model=pic --filetype=obj backprop.bc -llc --relocation-model=pic --filetype=obj facetrain.bc -llc --relocation-model=pic --filetype=obj imagenet.bc -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -g++ -Wall -L../../build/runtime -L../../build/runtime/threadPool -o demo \ - -fPIC -no-pie host.o kernel.o backprop.o facetrain.o imagenet.o \ - -lc -lx86Runtime -lthreadPool -lpthread - -./demo 1024 > res.log -if grep -q -e "0.173289 0.259645 0.350836" res.log; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/bfs/bfs-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/bfs/bfs-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 5592d33..0000000 --- a/examples/bfs/bfs-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,307 +0,0 @@ -; ModuleID = 'bfs-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "bfs.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } -%struct.Node = type { i32, i32 } - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z6KernelP4NodePiPbS2_S2_S1_i(%struct.Node* %g_graph_nodes, i32* %g_graph_edges, i8* %g_graph_mask, i8* %g_updating_graph_mask, i8* %g_graph_visited, i32* %g_cost, i32 %no_of_nodes) #0 { -entry: - %g_graph_nodes.addr = alloca %struct.Node*, align 8 - %g_graph_edges.addr = alloca i32*, align 8 - %g_graph_mask.addr = alloca i8*, align 8 - %g_updating_graph_mask.addr = alloca i8*, align 8 - %g_graph_visited.addr = alloca i8*, align 8 - %g_cost.addr = alloca i32*, align 8 - %no_of_nodes.addr = alloca i32, align 4 - %tid = alloca i32, align 4 - %i = alloca i32, align 4 - %id = alloca i32, align 4 - store %struct.Node* %g_graph_nodes, %struct.Node** %g_graph_nodes.addr, align 8 - store i32* %g_graph_edges, i32** %g_graph_edges.addr, align 8 - store i8* %g_graph_mask, i8** %g_graph_mask.addr, align 8 - store i8* %g_updating_graph_mask, i8** %g_updating_graph_mask.addr, align 8 - store i8* %g_graph_visited, i8** %g_graph_visited.addr, align 8 - store i32* %g_cost, i32** %g_cost.addr, align 8 - store i32 %no_of_nodes, i32* %no_of_nodes.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #3 - %mul = mul i32 %call, 512 - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #3 - %add = add i32 %mul, %call1 - store i32 %add, i32* %tid, align 4 - %0 = load i32, i32* %tid, align 4 - %1 = load i32, i32* %no_of_nodes.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %land.lhs.true, label %if.end26 - -land.lhs.true: ; preds = %entry - %2 = load i8*, i8** %g_graph_mask.addr, align 8 - %3 = load i32, i32* %tid, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds i8, i8* %2, i64 %idxprom - %4 = load i8, i8* %arrayidx, align 1 - %tobool = trunc i8 %4 to i1 - br i1 %tobool, label %if.then, label %if.end26 - -if.then: ; preds = %land.lhs.true - %5 = load i8*, i8** %g_graph_mask.addr, align 8 - %6 = load i32, i32* %tid, align 4 - %idxprom2 = sext i32 %6 to i64 - %arrayidx3 = getelementptr inbounds i8, i8* %5, i64 %idxprom2 - store i8 0, i8* %arrayidx3, align 1 - %7 = load %struct.Node*, %struct.Node** %g_graph_nodes.addr, align 8 - %8 = load i32, i32* %tid, align 4 - %idxprom4 = sext i32 %8 to i64 - %arrayidx5 = getelementptr inbounds %struct.Node, %struct.Node* %7, i64 %idxprom4 - %starting = getelementptr inbounds %struct.Node, %struct.Node* %arrayidx5, i32 0, i32 0 - %9 = load i32, i32* %starting, align 4 - store i32 %9, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.then - %10 = load i32, i32* %i, align 4 - %11 = load %struct.Node*, %struct.Node** %g_graph_nodes.addr, align 8 - %12 = load i32, i32* %tid, align 4 - %idxprom6 = sext i32 %12 to i64 - %arrayidx7 = getelementptr inbounds %struct.Node, %struct.Node* %11, i64 %idxprom6 - %no_of_edges = getelementptr inbounds %struct.Node, %struct.Node* %arrayidx7, i32 0, i32 1 - %13 = load i32, i32* %no_of_edges, align 4 - %14 = load %struct.Node*, %struct.Node** %g_graph_nodes.addr, align 8 - %15 = load i32, i32* %tid, align 4 - %idxprom8 = sext i32 %15 to i64 - %arrayidx9 = getelementptr inbounds %struct.Node, %struct.Node* %14, i64 %idxprom8 - %starting10 = getelementptr inbounds %struct.Node, %struct.Node* %arrayidx9, i32 0, i32 0 - %16 = load i32, i32* %starting10, align 4 - %add11 = add nsw i32 %13, %16 - %cmp12 = icmp slt i32 %10, %add11 - br i1 %cmp12, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %17 = load i32*, i32** %g_graph_edges.addr, align 8 - %18 = load i32, i32* %i, align 4 - %idxprom13 = sext i32 %18 to i64 - %arrayidx14 = getelementptr inbounds i32, i32* %17, i64 %idxprom13 - %19 = load i32, i32* %arrayidx14, align 4 - store i32 %19, i32* %id, align 4 - %20 = load i8*, i8** %g_graph_visited.addr, align 8 - %21 = load i32, i32* %id, align 4 - %idxprom15 = sext i32 %21 to i64 - %arrayidx16 = getelementptr inbounds i8, i8* %20, i64 %idxprom15 - %22 = load i8, i8* %arrayidx16, align 1 - %tobool17 = trunc i8 %22 to i1 - br i1 %tobool17, label %if.end, label %if.then18 - -if.then18: ; preds = %for.body - %23 = load i32*, i32** %g_cost.addr, align 8 - %24 = load i32, i32* %tid, align 4 - %idxprom19 = sext i32 %24 to i64 - %arrayidx20 = getelementptr inbounds i32, i32* %23, i64 %idxprom19 - %25 = load i32, i32* %arrayidx20, align 4 - %add21 = add nsw i32 %25, 1 - %26 = load i32*, i32** %g_cost.addr, align 8 - %27 = load i32, i32* %id, align 4 - %idxprom22 = sext i32 %27 to i64 - %arrayidx23 = getelementptr inbounds i32, i32* %26, i64 %idxprom22 - store i32 %add21, i32* %arrayidx23, align 4 - %28 = load i8*, i8** %g_updating_graph_mask.addr, align 8 - %29 = load i32, i32* %id, align 4 - %idxprom24 = sext i32 %29 to i64 - %arrayidx25 = getelementptr inbounds i8, i8* %28, i64 %idxprom24 - store i8 1, i8* %arrayidx25, align 1 - br label %if.end - -if.end: ; preds = %if.then18, %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %30 = load i32, i32* %i, align 4 - %inc = add nsw i32 %30, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - br label %if.end26 - -if.end26: ; preds = %for.end, %land.lhs.true, %entry - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z7Kernel2PbS_S_S_i(i8* %g_graph_mask, i8* %g_updating_graph_mask, i8* %g_graph_visited, i8* %g_over, i32 %no_of_nodes) #0 { -entry: - %g_graph_mask.addr = alloca i8*, align 8 - %g_updating_graph_mask.addr = alloca i8*, align 8 - %g_graph_visited.addr = alloca i8*, align 8 - %g_over.addr = alloca i8*, align 8 - %no_of_nodes.addr = alloca i32, align 4 - %tid = alloca i32, align 4 - store i8* %g_graph_mask, i8** %g_graph_mask.addr, align 8 - store i8* %g_updating_graph_mask, i8** %g_updating_graph_mask.addr, align 8 - store i8* %g_graph_visited, i8** %g_graph_visited.addr, align 8 - store i8* %g_over, i8** %g_over.addr, align 8 - store i32 %no_of_nodes, i32* %no_of_nodes.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #3 - %mul = mul i32 %call, 512 - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #3 - %add = add i32 %mul, %call1 - store i32 %add, i32* %tid, align 4 - %0 = load i32, i32* %tid, align 4 - %1 = load i32, i32* %no_of_nodes.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %land.lhs.true, label %if.end - -land.lhs.true: ; preds = %entry - %2 = load i8*, i8** %g_updating_graph_mask.addr, align 8 - %3 = load i32, i32* %tid, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds i8, i8* %2, i64 %idxprom - %4 = load i8, i8* %arrayidx, align 1 - %tobool = trunc i8 %4 to i1 - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %land.lhs.true - %5 = load i8*, i8** %g_graph_mask.addr, align 8 - %6 = load i32, i32* %tid, align 4 - %idxprom2 = sext i32 %6 to i64 - %arrayidx3 = getelementptr inbounds i8, i8* %5, i64 %idxprom2 - store i8 1, i8* %arrayidx3, align 1 - %7 = load i8*, i8** %g_graph_visited.addr, align 8 - %8 = load i32, i32* %tid, align 4 - %idxprom4 = sext i32 %8 to i64 - %arrayidx5 = getelementptr inbounds i8, i8* %7, i64 %idxprom4 - store i8 1, i8* %arrayidx5, align 1 - %9 = load i8*, i8** %g_over.addr, align 8 - store i8 1, i8* %9, align 1 - %10 = load i8*, i8** %g_updating_graph_mask.addr, align 8 - %11 = load i32, i32* %tid, align 4 - %idxprom6 = sext i32 %11 to i64 - %arrayidx7 = getelementptr inbounds i8, i8* %10, i64 %idxprom6 - store i8 0, i8* %arrayidx7, align 1 - br label %if.end - -if.end: ; preds = %if.then, %land.lhs.true, %entry - ret void -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } -attributes #3 = { convergent nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !6, !5, !7, !7, !7, !7, !8, !8, !7} -!llvm.ident = !{!9} -!nvvmir.version = !{!10} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (%struct.Node*, i32*, i8*, i8*, i8*, i32*, i32)* @_Z6KernelP4NodePiPbS2_S2_S1_i, !"kernel", i32 1} -!4 = !{void (i8*, i8*, i8*, i8*, i32)* @_Z7Kernel2PbS_S_S_i, !"kernel", i32 1} -!5 = !{null, !"align", i32 8} -!6 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!7 = !{null, !"align", i32 16} -!8 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!9 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!10 = !{i32 1, i32 4} diff --git a/examples/bfs/bfs-host-x86_64-unknown-linux-gnu.ll b/examples/bfs/bfs-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index bbb02fc..0000000 --- a/examples/bfs/bfs-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,825 +0,0 @@ -; ModuleID = 'bfs-host-x86_64-unknown-linux-gnu.bc' -source_filename = "bfs.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.Node = type { i32, i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque - -$_ZN4dim3C2Ejjj = comdat any - -@no_of_nodes = dso_local global i32 0, align 4 -@edge_list_size = dso_local global i32 0, align 4 -@fp = dso_local global %struct._IO_FILE* null, align 8 -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str = private unnamed_addr constant [24 x i8] c"Usage: %s \0A\00", align 1 -@.str.1 = private unnamed_addr constant [14 x i8] c"Reading File\0A\00", align 1 -@.str.2 = private unnamed_addr constant [2 x i8] c"r\00", align 1 -@.str.3 = private unnamed_addr constant [26 x i8] c"Error Reading graph file\0A\00", align 1 -@.str.4 = private unnamed_addr constant [3 x i8] c"%d\00", align 1 -@.str.5 = private unnamed_addr constant [6 x i8] c"%d %d\00", align 1 -@.str.6 = private unnamed_addr constant [11 x i8] c"Read File\0A\00", align 1 -@.str.7 = private unnamed_addr constant [33 x i8] c"Copied Everything to GPU memory\0A\00", align 1 -@.str.8 = private unnamed_addr constant [27 x i8] c"Start traversing the tree\0A\00", align 1 -@.str.9 = private unnamed_addr constant [26 x i8] c"Kernel Executed %d times\0A\00", align 1 -@.str.10 = private unnamed_addr constant [11 x i8] c"result.txt\00", align 1 -@.str.11 = private unnamed_addr constant [2 x i8] c"w\00", align 1 -@.str.12 = private unnamed_addr constant [13 x i8] c"%d) cost:%d\0A\00", align 1 -@.str.13 = private unnamed_addr constant [29 x i8] c"Result stored in result.txt\0A\00", align 1 -@0 = private unnamed_addr constant [30 x i8] c"_Z6KernelP4NodePiPbS2_S2_S1_i\00", align 1 -@1 = private unnamed_addr constant [20 x i8] c"_Z7Kernel2PbS_S_S_i\00", align 1 -@2 = private constant [15329 x i8] c"P\EDU\BA\01\00\10\00\D0;\00\00\00\00\00\00\02\00\01\01@\00\00\00H2\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\A01\00\00\00\00\00\00\A0.\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0C\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z7Kernel2PbS_S_S_i\00.nv.info._Z7Kernel2PbS_S_S_i\00.nv.shared._Z7Kernel2PbS_S_S_i\00.nv.global\00.nv.constant0._Z7Kernel2PbS_S_S_i\00.text._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.info._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.shared._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.constant0._Z6KernelP4NodePiPbS2_S2_S1_i\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z7Kernel2PbS_S_S_i\00.text._Z7Kernel2PbS_S_S_i\00.nv.info._Z7Kernel2PbS_S_S_i\00.nv.shared._Z7Kernel2PbS_S_S_i\00.nv.global\00blockIdx\00threadIdx\00.nv.constant0._Z7Kernel2PbS_S_S_i\00_param\00_Z6KernelP4NodePiPbS2_S2_S1_i\00.text._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.info._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.shared._Z6KernelP4NodePiPbS2_S2_S1_i\00.nv.constant0._Z6KernelP4NodePiPbS2_S2_S1_i\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00F\00\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\9C\00\00\00\03\00\0B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A7\00\00\00\01\00\0B\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\B0\00\00\00\01\00\0B\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\BA\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\01\00\00\03\00\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00u\01\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\09\00\00\00\00\00\00\00\00\00\80\0D\00\00\00\00\00\00\E3\00\00\00\12\10\0A\00\00\00\00\00\00\00\00\00\80\18\00\00\00\00\00\00\04/\08\00\09\00\00\00\13\00\00\00\04#\08\00\09\00\00\00\00\00\00\00\04\12\08\00\09\00\00\00@\00\00\00\04\11\08\00\09\00\00\00@\00\00\00\04/\08\00\08\00\00\00\0F\00\00\00\04#\08\00\08\00\00\00\00\00\00\00\04\12\08\00\08\00\00\00(\00\00\00\04\11\08\00\08\00\00\00(\00\00\00\010\00\00\01*\00\00\04\0A\08\00\05\00\00\00@\01$\00\03\19$\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\C8\04\00\00\04\1C\04\00H\0D\00\00\04\1E\04\000\00\00\00\010\00\00\01*\00\00\04\0A\08\00\07\00\00\00@\014\00\03\194\00\04\17\0C\00\00\00\00\00\06\000\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\A8\06\00\00\04\1C\04\008\18\00\00\04\1E\04\00p\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F0\16visible .entry _Z6KernelP4NodePiPbS2_\03\0061_i\A6\04\00\A0\00\0F+\00\0A\0E\8D\04\0F3\00\15\1F13\00\1F\1F23\00\1F\1F33\00\1F\1F43\00\1F\1753\00/323\00\12\1F6\8F\04\13O6[64\8F\04\15\96pred %p<5\90\04\9B16 %rs<7>\B3\04-18\B4\04/50\B5\04\0C\1F6\B5\04\18\00b\03\0F\06\01\12\0F\9E\03\00\1F6<\00\14\1F5<\00\00\1F5<\00\14\0F\14\05\01\1F4<\00\14\1F3P\05\01\0F<\00\14\0F\E3\04\02\0F<\00\14\0F\CA\04\01\0Fh\01\15#0]\BD\01#to\BB\14\04B\00\117w\04\04\DC\01\0A\1C\00\118\1C\00\1F7;\00\05\119\1F\00\1F5;\00\02!10\1D\00\1F9<\00\05!11 \00\1F4=\00\03\122O\05\1F1>\00\06\143-\05\0F>\00\01\124>\00\1F3>\00\06\145\98\05\0F>\00\01\126>\00\1F5>\00\06\147\EC\05\0F>\00\01\023\01/17 \06\03\1F8!\06\02*16\17\00\03\22\06?d14$\06\03*12\18\00\03%\06:d10\18\00\134w\00\1A8T\06\154\8E\10\09*\0B\F4\00%ctaid.x;\0Ashl.bk\06\02F\0B\08,\00\00_\01\12t*\00Qadd.s\15\00$5,/\00\1A4n\00\125\9D\00\115\BC\02\02A\00%6,\1B\00\07\16\00%7,\9F\00\92;\0Asetp.ge]\002p1,6\00\F2\0E%r7;\0A@%p1 bra LBB6_9;\0Abra.uni\10\0021;\0A\08\00\11:Z\00\03\96\01%9,Z\01\01r\00\02\B4\008d20\8A\00\01\CD\00\03\93\03$1,8\00\01'\00\02\A7\00\108L\05\00r\03\01\22\00\002\00$ndc\05#2,\1D\00\131\BD\00\22eq\1B\003p2, \00\8F1;\0A@!%p2\BD\00\07\132\BD\00\182\BD\00/22\BD\00\04\1F3\BD\00\05$4,8\00\01'\00\03\BC\01\02\A8\0033, &\02\128\C8\00\02T\028s3;s\00$5,\B8\02\09r\00\09\B9\01\01&\02\030\00$7, \00\1A3\8B\00$8,P\00\01'\00\07\EF\01\138H\01+8]0\02\02\FD\02\1B8\1B\01\133\1B\01&3:C\00%9,3\00\09\BE\00\1F9\BE\00\02/30\BE\00\04431, \00\0A\BE\00432,P\00\01'\00\07\BE\00\2210\BF\00X32+4]\18\00\141\18\00\18]\1E\03\02\D1\04\02\14\05,11\DF\02\223,\CD\00\00(\00\01\E0\02\163#\02\0C\08\01\134\08\01\184#\02\143\0B\02\1A8\F2\00\184!\01\08\F2\00$5, \00\1A2\F2\00$6,P\00\01'\00\08\DA\00\133\DA\00\1B6\B1\01\136\12\13\09B\02537,\9C\04\09\93\00%8,6\00\0Az\00$9,8\00\01'\00\07r\03\134y\00\1A9r\03#5,\1D\00\0Dr\03#4, \00\111N\01\164N\01\1B6N\01\135N\01\185N\01\144)\02\1A4A\02/41A\02\04442, \00\0AO\01443,Q\00\01'\00\08O\01\03\D6\00*43)\02#5,\1D\00\191{\00\1847\01\08{\00$5, \00\0B{\00\196{\00\175\B4\01\00\1D\00\02\B3\01(5;\F7\00%7,g\06\09|\00\0F\B3\01\05449,8\00\01'\00\09h\04\126\98\01\05h\04\2249h\04\0C\82\01\136\82\01*6:\18\00\137\18\00\177\F0\03(16\D0\02\075\01\01\82\00\161s\00\0BL\04/17M\04\04\1B8u\00\139\18\00/9:9\0D\09\127n\09P2PbS_\02\00\0D/\0D\0D!\00\0E%\0D\0F)\00\0B\1F1)\00\15\1F2)\00\15\1C3\A1\0C\0E)\00\0F&\11\1A\1E7&\11\0F\97\0C\0E\1D3\97\0C\1C5\97\0C\0E\96\0C/26\96\0C\0C\1F7\96\0C\1E\0E\FB\00\0F\14\0C\0D\0EV\01\0F\0A\0C\0D\0E\B1\01\0F\00\0C\0D\0E\0C\02\0F\F6\0B\0D\0Eg\02\0F\EC\0B\0D\1F5t\0B\08\196\CD\0B\0F'\0C\04\1F3'\0C'\1F2'\0C)\1F1'\0C\0D\0Fm\0B\01\1F2m\0B\03\1F0m\0B\03\1F8l\0B\03\1E6\90\11\0F<\0BW/36<\0B\06/36<\0B\01/32<\0B\1797_3\17\05\137<\0B\1B7<\0B\0F\\\08\03(14\89\00\07\16\06\03]\0D%13\C3\0C\0C;\0B/15;\0B.\0E\BC\00\132\BC\00\09;\0B/16\0A\0A\02/17\BC\00\05(8,\03\0E\1C7:\0B\09\D2\06.18:\0B\0Fj\0C\0F/36j\0C\0D\07\9C\0B\1F1\9C\0B\01\182\A7\07\07-\00\1F2-\00\01\0F\BD\01\03/24\8E\00\05$5,7\00\01'\00\09\01\01\1C4;\0C\125r\00\1B4\8B\01\133\8B\01\B03:\0Aret;\0A\0A}\0A\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([15329 x i8], [15329 x i8]* @2, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z6KernelP4NodePiPbS2_S2_S1_i(%struct.Node* %g_graph_nodes, i32* %g_graph_edges, i8* %g_graph_mask, i8* %g_updating_graph_mask, i8* %g_graph_visited, i32* %g_cost, i32 %no_of_nodes) #0 { -entry: - %g_graph_nodes.addr = alloca %struct.Node*, align 8 - %g_graph_edges.addr = alloca i32*, align 8 - %g_graph_mask.addr = alloca i8*, align 8 - %g_updating_graph_mask.addr = alloca i8*, align 8 - %g_graph_visited.addr = alloca i8*, align 8 - %g_cost.addr = alloca i32*, align 8 - %no_of_nodes.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store %struct.Node* %g_graph_nodes, %struct.Node** %g_graph_nodes.addr, align 8 - store i32* %g_graph_edges, i32** %g_graph_edges.addr, align 8 - store i8* %g_graph_mask, i8** %g_graph_mask.addr, align 8 - store i8* %g_updating_graph_mask, i8** %g_updating_graph_mask.addr, align 8 - store i8* %g_graph_visited, i8** %g_graph_visited.addr, align 8 - store i32* %g_cost, i32** %g_cost.addr, align 8 - store i32 %no_of_nodes, i32* %no_of_nodes.addr, align 4 - %kernel_args = alloca i8*, i64 7, align 16 - %0 = bitcast %struct.Node** %g_graph_nodes.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32** %g_graph_edges.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i8** %g_graph_mask.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i8** %g_updating_graph_mask.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i8** %g_graph_visited.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32** %g_cost.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = bitcast i32* %no_of_nodes.addr to i8* - %13 = getelementptr i8*, i8** %kernel_args, i32 6 - store i8* %12, i8** %13 - %14 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %15 = load i64, i64* %shmem_size, align 8 - %16 = load i8*, i8** %stream, align 8 - %17 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %18 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %17, i8* align 8 %18, i64 12, i1 false) - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %20 = load i64, i64* %19, align 8 - %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %22 = load i32, i32* %21, align 8 - %23 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %24 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %23, i8* align 8 %24, i64 12, i1 false) - %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %26 = load i64, i64* %25, align 8 - %27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %28 = load i32, i32* %27, align 8 - %29 = bitcast i8* %16 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (%struct.Node*, i32*, i8*, i8*, i8*, i32*, i32)* @_Z6KernelP4NodePiPbS2_S2_S1_i to i8*), i64 %20, i32 %22, i64 %26, i32 %28, i8** %kernel_args, i64 %15, %struct.CUstream_st* %29) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z7Kernel2PbS_S_S_i(i8* %g_graph_mask, i8* %g_updating_graph_mask, i8* %g_graph_visited, i8* %g_over, i32 %no_of_nodes) #0 { -entry: - %g_graph_mask.addr = alloca i8*, align 8 - %g_updating_graph_mask.addr = alloca i8*, align 8 - %g_graph_visited.addr = alloca i8*, align 8 - %g_over.addr = alloca i8*, align 8 - %no_of_nodes.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i8* %g_graph_mask, i8** %g_graph_mask.addr, align 8 - store i8* %g_updating_graph_mask, i8** %g_updating_graph_mask.addr, align 8 - store i8* %g_graph_visited, i8** %g_graph_visited.addr, align 8 - store i8* %g_over, i8** %g_over.addr, align 8 - store i32 %no_of_nodes, i32* %no_of_nodes.addr, align 4 - %kernel_args = alloca i8*, i64 5, align 16 - %0 = bitcast i8** %g_graph_mask.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i8** %g_updating_graph_mask.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i8** %g_graph_visited.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i8** %g_over.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %no_of_nodes.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %11 = load i64, i64* %shmem_size, align 8 - %12 = load i8*, i8** %stream, align 8 - %13 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %14 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %13, i8* align 8 %14, i64 12, i1 false) - %15 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %16 = load i64, i64* %15, align 8 - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %18 = load i32, i32* %17, align 8 - %19 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %20 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %19, i8* align 8 %20, i64 12, i1 false) - %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %22 = load i64, i64* %21, align 8 - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %24 = load i32, i32* %23, align 8 - %25 = bitcast i8* %12 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i8*, i8*, i8*, i8*, i32)* @_Z7Kernel2PbS_S_S_i to i8*), i64 %16, i32 %18, i64 %22, i32 %24, i8** %kernel_args, i64 %11, %struct.CUstream_st* %25) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #2 { -entry: - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call i32 @cudaSetDevice(i32 0) - store i32 0, i32* @no_of_nodes, align 4 - store i32 0, i32* @edge_list_size, align 4 - %0 = load i32, i32* %argc.addr, align 4 - %1 = load i8**, i8*** %argv.addr, align 8 - call void @_Z8BFSGraphiPPc(i32 %0, i8** %1) - ret i32 0 -} - -declare dso_local i32 @cudaSetDevice(i32) #3 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z8BFSGraphiPPc(i32 %argc, i8** %argv) #0 { -entry: - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %input_f = alloca i8*, align 8 - %source = alloca i32, align 4 - %num_of_blocks = alloca i32, align 4 - %num_of_threads_per_block = alloca i32, align 4 - %h_graph_nodes = alloca %struct.Node*, align 8 - %h_graph_mask = alloca i8*, align 8 - %h_updating_graph_mask = alloca i8*, align 8 - %h_graph_visited = alloca i8*, align 8 - %start = alloca i32, align 4 - %edgeno = alloca i32, align 4 - %i = alloca i32, align 4 - %id = alloca i32, align 4 - %cost = alloca i32, align 4 - %h_graph_edges = alloca i32*, align 8 - %i41 = alloca i32, align 4 - %d_graph_nodes = alloca %struct.Node*, align 8 - %d_graph_edges = alloca i32*, align 8 - %d_graph_mask = alloca i8*, align 8 - %d_updating_graph_mask = alloca i8*, align 8 - %d_graph_visited = alloca i8*, align 8 - %h_cost = alloca i32*, align 8 - %i90 = alloca i32, align 4 - %d_cost = alloca i32*, align 8 - %d_over = alloca i8*, align 8 - %grid = alloca %struct.dim3, align 4 - %threads = alloca %struct.dim3, align 4 - %k = alloca i32, align 4 - %stop = alloca i8, align 1 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp111 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp111.coerce = alloca { i64, i32 }, align 4 - %agg.tmp115 = alloca %struct.dim3, align 4 - %agg.tmp116 = alloca %struct.dim3, align 4 - %agg.tmp115.coerce = alloca { i64, i32 }, align 4 - %agg.tmp116.coerce = alloca { i64, i32 }, align 4 - %fpo = alloca %struct._IO_FILE*, align 8 - %i130 = alloca i32, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %0 = load i32, i32* %argc.addr, align 4 - %cmp = icmp ne i32 %0, 2 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load i32, i32* %argc.addr, align 4 - %2 = load i8**, i8*** %argv.addr, align 8 - call void @_Z5UsageiPPc(i32 %1, i8** %2) - call void @exit(i32 0) #8 - unreachable - -if.end: ; preds = %entry - %3 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %3, i64 1 - %4 = load i8*, i8** %arrayidx, align 8 - store i8* %4, i8** %input_f, align 8 - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0)) - %5 = load i8*, i8** %input_f, align 8 - %call1 = call %struct._IO_FILE* @fopen(i8* %5, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i64 0, i64 0)) - store %struct._IO_FILE* %call1, %struct._IO_FILE** @fp, align 8 - %6 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %tobool = icmp ne %struct._IO_FILE* %6, null - br i1 %tobool, label %if.end4, label %if.then2 - -if.then2: ; preds = %if.end - %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.3, i64 0, i64 0)) - br label %return - -if.end4: ; preds = %if.end - store i32 0, i32* %source, align 4 - %7 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.4, i64 0, i64 0), i32* @no_of_nodes) - store i32 1, i32* %num_of_blocks, align 4 - %8 = load i32, i32* @no_of_nodes, align 4 - store i32 %8, i32* %num_of_threads_per_block, align 4 - %9 = load i32, i32* @no_of_nodes, align 4 - %cmp6 = icmp sgt i32 %9, 512 - br i1 %cmp6, label %if.then7, label %if.end9 - -if.then7: ; preds = %if.end4 - %10 = load i32, i32* @no_of_nodes, align 4 - %conv = sitofp i32 %10 to double - %div = fdiv double %conv, 5.120000e+02 - %11 = call double @llvm.ceil.f64(double %div) - %conv8 = fptosi double %11 to i32 - store i32 %conv8, i32* %num_of_blocks, align 4 - store i32 512, i32* %num_of_threads_per_block, align 4 - br label %if.end9 - -if.end9: ; preds = %if.then7, %if.end4 - %12 = load i32, i32* @no_of_nodes, align 4 - %conv10 = sext i32 %12 to i64 - %mul = mul i64 8, %conv10 - %call11 = call noalias i8* @malloc(i64 %mul) #9 - %13 = bitcast i8* %call11 to %struct.Node* - store %struct.Node* %13, %struct.Node** %h_graph_nodes, align 8 - %14 = load i32, i32* @no_of_nodes, align 4 - %conv12 = sext i32 %14 to i64 - %mul13 = mul i64 1, %conv12 - %call14 = call noalias i8* @malloc(i64 %mul13) #9 - store i8* %call14, i8** %h_graph_mask, align 8 - %15 = load i32, i32* @no_of_nodes, align 4 - %conv15 = sext i32 %15 to i64 - %mul16 = mul i64 1, %conv15 - %call17 = call noalias i8* @malloc(i64 %mul16) #9 - store i8* %call17, i8** %h_updating_graph_mask, align 8 - %16 = load i32, i32* @no_of_nodes, align 4 - %conv18 = sext i32 %16 to i64 - %mul19 = mul i64 1, %conv18 - %call20 = call noalias i8* @malloc(i64 %mul19) #9 - store i8* %call20, i8** %h_graph_visited, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end9 - %17 = load i32, i32* %i, align 4 - %18 = load i32, i32* @no_of_nodes, align 4 - %cmp21 = icmp ult i32 %17, %18 - br i1 %cmp21, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %19 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %call22 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %19, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.5, i64 0, i64 0), i32* %start, i32* %edgeno) - %20 = load i32, i32* %start, align 4 - %21 = load %struct.Node*, %struct.Node** %h_graph_nodes, align 8 - %22 = load i32, i32* %i, align 4 - %idxprom = zext i32 %22 to i64 - %arrayidx23 = getelementptr inbounds %struct.Node, %struct.Node* %21, i64 %idxprom - %starting = getelementptr inbounds %struct.Node, %struct.Node* %arrayidx23, i32 0, i32 0 - store i32 %20, i32* %starting, align 4 - %23 = load i32, i32* %edgeno, align 4 - %24 = load %struct.Node*, %struct.Node** %h_graph_nodes, align 8 - %25 = load i32, i32* %i, align 4 - %idxprom24 = zext i32 %25 to i64 - %arrayidx25 = getelementptr inbounds %struct.Node, %struct.Node* %24, i64 %idxprom24 - %no_of_edges = getelementptr inbounds %struct.Node, %struct.Node* %arrayidx25, i32 0, i32 1 - store i32 %23, i32* %no_of_edges, align 4 - %26 = load i8*, i8** %h_graph_mask, align 8 - %27 = load i32, i32* %i, align 4 - %idxprom26 = zext i32 %27 to i64 - %arrayidx27 = getelementptr inbounds i8, i8* %26, i64 %idxprom26 - store i8 0, i8* %arrayidx27, align 1 - %28 = load i8*, i8** %h_updating_graph_mask, align 8 - %29 = load i32, i32* %i, align 4 - %idxprom28 = zext i32 %29 to i64 - %arrayidx29 = getelementptr inbounds i8, i8* %28, i64 %idxprom28 - store i8 0, i8* %arrayidx29, align 1 - %30 = load i8*, i8** %h_graph_visited, align 8 - %31 = load i32, i32* %i, align 4 - %idxprom30 = zext i32 %31 to i64 - %arrayidx31 = getelementptr inbounds i8, i8* %30, i64 %idxprom30 - store i8 0, i8* %arrayidx31, align 1 - br label %for.inc - -for.inc: ; preds = %for.body - %32 = load i32, i32* %i, align 4 - %inc = add i32 %32, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %33 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %call32 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %33, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.4, i64 0, i64 0), i32* %source) - store i32 0, i32* %source, align 4 - %34 = load i8*, i8** %h_graph_mask, align 8 - %35 = load i32, i32* %source, align 4 - %idxprom33 = sext i32 %35 to i64 - %arrayidx34 = getelementptr inbounds i8, i8* %34, i64 %idxprom33 - store i8 1, i8* %arrayidx34, align 1 - %36 = load i8*, i8** %h_graph_visited, align 8 - %37 = load i32, i32* %source, align 4 - %idxprom35 = sext i32 %37 to i64 - %arrayidx36 = getelementptr inbounds i8, i8* %36, i64 %idxprom35 - store i8 1, i8* %arrayidx36, align 1 - %38 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %call37 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %38, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.4, i64 0, i64 0), i32* @edge_list_size) - %39 = load i32, i32* @edge_list_size, align 4 - %conv38 = sext i32 %39 to i64 - %mul39 = mul i64 4, %conv38 - %call40 = call noalias i8* @malloc(i64 %mul39) #9 - %40 = bitcast i8* %call40 to i32* - store i32* %40, i32** %h_graph_edges, align 8 - store i32 0, i32* %i41, align 4 - br label %for.cond42 - -for.cond42: ; preds = %for.inc49, %for.end - %41 = load i32, i32* %i41, align 4 - %42 = load i32, i32* @edge_list_size, align 4 - %cmp43 = icmp slt i32 %41, %42 - br i1 %cmp43, label %for.body44, label %for.end51 - -for.body44: ; preds = %for.cond42 - %43 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %call45 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %43, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.4, i64 0, i64 0), i32* %id) - %44 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %call46 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %44, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.4, i64 0, i64 0), i32* %cost) - %45 = load i32, i32* %id, align 4 - %46 = load i32*, i32** %h_graph_edges, align 8 - %47 = load i32, i32* %i41, align 4 - %idxprom47 = sext i32 %47 to i64 - %arrayidx48 = getelementptr inbounds i32, i32* %46, i64 %idxprom47 - store i32 %45, i32* %arrayidx48, align 4 - br label %for.inc49 - -for.inc49: ; preds = %for.body44 - %48 = load i32, i32* %i41, align 4 - %inc50 = add nsw i32 %48, 1 - store i32 %inc50, i32* %i41, align 4 - br label %for.cond42 - -for.end51: ; preds = %for.cond42 - %49 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %tobool52 = icmp ne %struct._IO_FILE* %49, null - br i1 %tobool52, label %if.then53, label %if.end55 - -if.then53: ; preds = %for.end51 - %50 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %call54 = call i32 @fclose(%struct._IO_FILE* %50) - br label %if.end55 - -if.end55: ; preds = %if.then53, %for.end51 - %call56 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.6, i64 0, i64 0)) - %51 = bitcast %struct.Node** %d_graph_nodes to i8** - %52 = load i32, i32* @no_of_nodes, align 4 - %conv57 = sext i32 %52 to i64 - %mul58 = mul i64 8, %conv57 - %call59 = call i32 @cudaMalloc(i8** %51, i64 %mul58) - %53 = load %struct.Node*, %struct.Node** %d_graph_nodes, align 8 - %54 = bitcast %struct.Node* %53 to i8* - %55 = load %struct.Node*, %struct.Node** %h_graph_nodes, align 8 - %56 = bitcast %struct.Node* %55 to i8* - %57 = load i32, i32* @no_of_nodes, align 4 - %conv60 = sext i32 %57 to i64 - %mul61 = mul i64 8, %conv60 - %call62 = call i32 @cudaMemcpy(i8* %54, i8* %56, i64 %mul61, i32 1) - %58 = bitcast i32** %d_graph_edges to i8** - %59 = load i32, i32* @edge_list_size, align 4 - %conv63 = sext i32 %59 to i64 - %mul64 = mul i64 4, %conv63 - %call65 = call i32 @cudaMalloc(i8** %58, i64 %mul64) - %60 = load i32*, i32** %d_graph_edges, align 8 - %61 = bitcast i32* %60 to i8* - %62 = load i32*, i32** %h_graph_edges, align 8 - %63 = bitcast i32* %62 to i8* - %64 = load i32, i32* @edge_list_size, align 4 - %conv66 = sext i32 %64 to i64 - %mul67 = mul i64 4, %conv66 - %call68 = call i32 @cudaMemcpy(i8* %61, i8* %63, i64 %mul67, i32 1) - %65 = load i32, i32* @no_of_nodes, align 4 - %conv69 = sext i32 %65 to i64 - %mul70 = mul i64 1, %conv69 - %call71 = call i32 @cudaMalloc(i8** %d_graph_mask, i64 %mul70) - %66 = load i8*, i8** %d_graph_mask, align 8 - %67 = load i8*, i8** %h_graph_mask, align 8 - %68 = load i32, i32* @no_of_nodes, align 4 - %conv72 = sext i32 %68 to i64 - %mul73 = mul i64 1, %conv72 - %call74 = call i32 @cudaMemcpy(i8* %66, i8* %67, i64 %mul73, i32 1) - %69 = load i32, i32* @no_of_nodes, align 4 - %conv75 = sext i32 %69 to i64 - %mul76 = mul i64 1, %conv75 - %call77 = call i32 @cudaMalloc(i8** %d_updating_graph_mask, i64 %mul76) - %70 = load i8*, i8** %d_updating_graph_mask, align 8 - %71 = load i8*, i8** %h_updating_graph_mask, align 8 - %72 = load i32, i32* @no_of_nodes, align 4 - %conv78 = sext i32 %72 to i64 - %mul79 = mul i64 1, %conv78 - %call80 = call i32 @cudaMemcpy(i8* %70, i8* %71, i64 %mul79, i32 1) - %73 = load i32, i32* @no_of_nodes, align 4 - %conv81 = sext i32 %73 to i64 - %mul82 = mul i64 1, %conv81 - %call83 = call i32 @cudaMalloc(i8** %d_graph_visited, i64 %mul82) - %74 = load i8*, i8** %d_graph_visited, align 8 - %75 = load i8*, i8** %h_graph_visited, align 8 - %76 = load i32, i32* @no_of_nodes, align 4 - %conv84 = sext i32 %76 to i64 - %mul85 = mul i64 1, %conv84 - %call86 = call i32 @cudaMemcpy(i8* %74, i8* %75, i64 %mul85, i32 1) - %77 = load i32, i32* @no_of_nodes, align 4 - %conv87 = sext i32 %77 to i64 - %mul88 = mul i64 4, %conv87 - %call89 = call noalias i8* @malloc(i64 %mul88) #9 - %78 = bitcast i8* %call89 to i32* - store i32* %78, i32** %h_cost, align 8 - store i32 0, i32* %i90, align 4 - br label %for.cond91 - -for.cond91: ; preds = %for.inc96, %if.end55 - %79 = load i32, i32* %i90, align 4 - %80 = load i32, i32* @no_of_nodes, align 4 - %cmp92 = icmp slt i32 %79, %80 - br i1 %cmp92, label %for.body93, label %for.end98 - -for.body93: ; preds = %for.cond91 - %81 = load i32*, i32** %h_cost, align 8 - %82 = load i32, i32* %i90, align 4 - %idxprom94 = sext i32 %82 to i64 - %arrayidx95 = getelementptr inbounds i32, i32* %81, i64 %idxprom94 - store i32 -1, i32* %arrayidx95, align 4 - br label %for.inc96 - -for.inc96: ; preds = %for.body93 - %83 = load i32, i32* %i90, align 4 - %inc97 = add nsw i32 %83, 1 - store i32 %inc97, i32* %i90, align 4 - br label %for.cond91 - -for.end98: ; preds = %for.cond91 - %84 = load i32*, i32** %h_cost, align 8 - %85 = load i32, i32* %source, align 4 - %idxprom99 = sext i32 %85 to i64 - %arrayidx100 = getelementptr inbounds i32, i32* %84, i64 %idxprom99 - store i32 0, i32* %arrayidx100, align 4 - %86 = bitcast i32** %d_cost to i8** - %87 = load i32, i32* @no_of_nodes, align 4 - %conv101 = sext i32 %87 to i64 - %mul102 = mul i64 4, %conv101 - %call103 = call i32 @cudaMalloc(i8** %86, i64 %mul102) - %88 = load i32*, i32** %d_cost, align 8 - %89 = bitcast i32* %88 to i8* - %90 = load i32*, i32** %h_cost, align 8 - %91 = bitcast i32* %90 to i8* - %92 = load i32, i32* @no_of_nodes, align 4 - %conv104 = sext i32 %92 to i64 - %mul105 = mul i64 4, %conv104 - %call106 = call i32 @cudaMemcpy(i8* %89, i8* %91, i64 %mul105, i32 1) - %call107 = call i32 @cudaMalloc(i8** %d_over, i64 1) - %call108 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.7, i64 0, i64 0)) - %93 = load i32, i32* %num_of_blocks, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid, i32 %93, i32 1, i32 1) - %94 = load i32, i32* %num_of_threads_per_block, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %threads, i32 %94, i32 1, i32 1) - store i32 0, i32* %k, align 4 - %call109 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.8, i64 0, i64 0)) - br label %do.body - -do.body: ; preds = %do.cond, %for.end98 - store i8 0, i8* %stop, align 1 - %95 = load i8*, i8** %d_over, align 8 - %call110 = call i32 @cudaMemcpy(i8* %95, i8* %stop, i64 1, i32 1) - %96 = bitcast %struct.dim3* %agg.tmp to i8* - %97 = bitcast %struct.dim3* %grid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %96, i8* align 4 %97, i64 12, i1 false) - %98 = bitcast %struct.dim3* %agg.tmp111 to i8* - %99 = bitcast %struct.dim3* %threads to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %98, i8* align 4 %99, i64 12, i1 false) - %100 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %101 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %100, i8* align 4 %101, i64 12, i1 false) - %102 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %103 = load i64, i64* %102, align 4 - %104 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %105 = load i32, i32* %104, align 4 - %106 = bitcast { i64, i32 }* %agg.tmp111.coerce to i8* - %107 = bitcast %struct.dim3* %agg.tmp111 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %106, i8* align 4 %107, i64 12, i1 false) - %108 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp111.coerce, i32 0, i32 0 - %109 = load i64, i64* %108, align 4 - %110 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp111.coerce, i32 0, i32 1 - %111 = load i32, i32* %110, align 4 - %call112 = call i32 @__cudaPushCallConfiguration(i64 %103, i32 %105, i64 %109, i32 %111, i64 0, i8* null) - %tobool113 = icmp ne i32 %call112, 0 - br i1 %tobool113, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %do.body - %112 = load %struct.Node*, %struct.Node** %d_graph_nodes, align 8 - %113 = load i32*, i32** %d_graph_edges, align 8 - %114 = load i8*, i8** %d_graph_mask, align 8 - %115 = load i8*, i8** %d_updating_graph_mask, align 8 - %116 = load i8*, i8** %d_graph_visited, align 8 - %117 = load i32*, i32** %d_cost, align 8 - %118 = load i32, i32* @no_of_nodes, align 4 - call void @_Z6KernelP4NodePiPbS2_S2_S1_i(%struct.Node* %112, i32* %113, i8* %114, i8* %115, i8* %116, i32* %117, i32 %118) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %do.body - %call114 = call i32 @cudaDeviceSynchronize() - %119 = bitcast %struct.dim3* %agg.tmp115 to i8* - %120 = bitcast %struct.dim3* %grid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %119, i8* align 4 %120, i64 12, i1 false) - %121 = bitcast %struct.dim3* %agg.tmp116 to i8* - %122 = bitcast %struct.dim3* %threads to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %121, i8* align 4 %122, i64 12, i1 false) - %123 = bitcast { i64, i32 }* %agg.tmp115.coerce to i8* - %124 = bitcast %struct.dim3* %agg.tmp115 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %123, i8* align 4 %124, i64 12, i1 false) - %125 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp115.coerce, i32 0, i32 0 - %126 = load i64, i64* %125, align 4 - %127 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp115.coerce, i32 0, i32 1 - %128 = load i32, i32* %127, align 4 - %129 = bitcast { i64, i32 }* %agg.tmp116.coerce to i8* - %130 = bitcast %struct.dim3* %agg.tmp116 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %129, i8* align 4 %130, i64 12, i1 false) - %131 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp116.coerce, i32 0, i32 0 - %132 = load i64, i64* %131, align 4 - %133 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp116.coerce, i32 0, i32 1 - %134 = load i32, i32* %133, align 4 - %call117 = call i32 @__cudaPushCallConfiguration(i64 %126, i32 %128, i64 %132, i32 %134, i64 0, i8* null) - %tobool118 = icmp ne i32 %call117, 0 - br i1 %tobool118, label %kcall.end120, label %kcall.configok119 - -kcall.configok119: ; preds = %kcall.end - %135 = load i8*, i8** %d_graph_mask, align 8 - %136 = load i8*, i8** %d_updating_graph_mask, align 8 - %137 = load i8*, i8** %d_graph_visited, align 8 - %138 = load i8*, i8** %d_over, align 8 - %139 = load i32, i32* @no_of_nodes, align 4 - call void @_Z7Kernel2PbS_S_S_i(i8* %135, i8* %136, i8* %137, i8* %138, i32 %139) - br label %kcall.end120 - -kcall.end120: ; preds = %kcall.configok119, %kcall.end - %call121 = call i32 @cudaDeviceSynchronize() - %140 = load i8*, i8** %d_over, align 8 - %call122 = call i32 @cudaMemcpy(i8* %stop, i8* %140, i64 1, i32 2) - %141 = load i32, i32* %k, align 4 - %inc123 = add nsw i32 %141, 1 - store i32 %inc123, i32* %k, align 4 - br label %do.cond - -do.cond: ; preds = %kcall.end120 - %142 = load i8, i8* %stop, align 1 - %tobool124 = trunc i8 %142 to i1 - br i1 %tobool124, label %do.body, label %do.end - -do.end: ; preds = %do.cond - %143 = load i32, i32* %k, align 4 - %call125 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.9, i64 0, i64 0), i32 %143) - %144 = load i32*, i32** %h_cost, align 8 - %145 = bitcast i32* %144 to i8* - %146 = load i32*, i32** %d_cost, align 8 - %147 = bitcast i32* %146 to i8* - %148 = load i32, i32* @no_of_nodes, align 4 - %conv126 = sext i32 %148 to i64 - %mul127 = mul i64 4, %conv126 - %call128 = call i32 @cudaMemcpy(i8* %145, i8* %147, i64 %mul127, i32 2) - %call129 = call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.10, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.11, i64 0, i64 0)) - store %struct._IO_FILE* %call129, %struct._IO_FILE** %fpo, align 8 - store i32 0, i32* %i130, align 4 - br label %for.cond131 - -for.cond131: ; preds = %for.inc137, %do.end - %149 = load i32, i32* %i130, align 4 - %150 = load i32, i32* @no_of_nodes, align 4 - %cmp132 = icmp slt i32 %149, %150 - br i1 %cmp132, label %for.body133, label %for.end139 - -for.body133: ; preds = %for.cond131 - %151 = load %struct._IO_FILE*, %struct._IO_FILE** %fpo, align 8 - %152 = load i32, i32* %i130, align 4 - %153 = load i32*, i32** %h_cost, align 8 - %154 = load i32, i32* %i130, align 4 - %idxprom134 = sext i32 %154 to i64 - %arrayidx135 = getelementptr inbounds i32, i32* %153, i64 %idxprom134 - %155 = load i32, i32* %arrayidx135, align 4 - %call136 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %151, i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.12, i64 0, i64 0), i32 %152, i32 %155) - br label %for.inc137 - -for.inc137: ; preds = %for.body133 - %156 = load i32, i32* %i130, align 4 - %inc138 = add nsw i32 %156, 1 - store i32 %inc138, i32* %i130, align 4 - br label %for.cond131 - -for.end139: ; preds = %for.cond131 - %157 = load %struct._IO_FILE*, %struct._IO_FILE** %fpo, align 8 - %call140 = call i32 @fclose(%struct._IO_FILE* %157) - %call141 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str.13, i64 0, i64 0)) - %158 = load %struct.Node*, %struct.Node** %h_graph_nodes, align 8 - %159 = bitcast %struct.Node* %158 to i8* - call void @free(i8* %159) #9 - %160 = load i32*, i32** %h_graph_edges, align 8 - %161 = bitcast i32* %160 to i8* - call void @free(i8* %161) #9 - %162 = load i8*, i8** %h_graph_mask, align 8 - call void @free(i8* %162) #9 - %163 = load i8*, i8** %h_updating_graph_mask, align 8 - call void @free(i8* %163) #9 - %164 = load i8*, i8** %h_graph_visited, align 8 - call void @free(i8* %164) #9 - %165 = load i32*, i32** %h_cost, align 8 - %166 = bitcast i32* %165 to i8* - call void @free(i8* %166) #9 - %167 = load %struct.Node*, %struct.Node** %d_graph_nodes, align 8 - %168 = bitcast %struct.Node* %167 to i8* - %call142 = call i32 @cudaFree(i8* %168) - %169 = load i32*, i32** %d_graph_edges, align 8 - %170 = bitcast i32* %169 to i8* - %call143 = call i32 @cudaFree(i8* %170) - %171 = load i8*, i8** %d_graph_mask, align 8 - %call144 = call i32 @cudaFree(i8* %171) - %172 = load i8*, i8** %d_updating_graph_mask, align 8 - %call145 = call i32 @cudaFree(i8* %172) - %173 = load i8*, i8** %d_graph_visited, align 8 - %call146 = call i32 @cudaFree(i8* %173) - %174 = load i32*, i32** %d_cost, align 8 - %175 = bitcast i32* %174 to i8* - %call147 = call i32 @cudaFree(i8* %175) - br label %return - -return: ; preds = %for.end139, %if.then2 - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z5UsageiPPc(i32 %argc, i8** %argv) #0 { -entry: - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %1 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %1, i64 0 - %2 = load i8*, i8** %arrayidx, align 8 - %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str, i64 0, i64 0), i8* %2) - ret void -} - -declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #3 - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #4 - -declare dso_local i32 @printf(i8*, ...) #3 - -declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #3 - -declare dso_local i32 @fscanf(%struct._IO_FILE*, i8*, ...) #3 - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.ceil.f64(double) #5 - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #6 - -declare dso_local i32 @fclose(%struct._IO_FILE*) #3 - -declare dso_local i32 @cudaMalloc(i8**, i64) #3 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #3 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #7 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #3 - -declare dso_local i32 @cudaDeviceSynchronize() #3 - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #6 - -declare dso_local i32 @cudaFree(i8*) #3 - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (%struct.Node*, i32*, i8*, i8*, i8*, i32*, i32)* @_Z6KernelP4NodePiPbS2_S2_S1_i to i8*), i8* getelementptr inbounds ([30 x i8], [30 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([30 x i8], [30 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %2 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i8*, i8*, i8*, i8*, i32)* @_Z7Kernel2PbS_S_S_i to i8*), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @1, i64 0, i64 0), i8* getelementptr inbounds ([20 x i8], [20 x i8]* @1, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind readnone speculatable willreturn } -attributes #6 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { noreturn nounwind } -attributes #9 = { nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/bfs/bfs.cu b/examples/bfs/bfs.cu deleted file mode 100644 index 252341d..0000000 --- a/examples/bfs/bfs.cu +++ /dev/null @@ -1,213 +0,0 @@ -#include -#include -#include -#include -#include - -#define MAX_THREADS_PER_BLOCK 512 - -int no_of_nodes; -int edge_list_size; -FILE *fp; - -// Structure to hold a node information -struct Node { - int starting; - int no_of_edges; -}; - -#include "kernel.cu" -#include "kernel2.cu" - -void BFSGraph(int argc, char **argv); - -//////////////////////////////////////////////////////////////////////////////// -// Main Program -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { - cudaSetDevice(0); - no_of_nodes = 0; - edge_list_size = 0; - BFSGraph(argc, argv); -} - -void Usage(int argc, char **argv) { - - fprintf(stderr, "Usage: %s \n", argv[0]); -} -//////////////////////////////////////////////////////////////////////////////// -// Apply BFS on a Graph using CUDA -//////////////////////////////////////////////////////////////////////////////// -void BFSGraph(int argc, char **argv) { - - char *input_f; - if (argc != 2) { - Usage(argc, argv); - exit(0); - } - - input_f = argv[1]; - printf("Reading File\n"); - // Read in Graph from a file - fp = fopen(input_f, "r"); - if (!fp) { - printf("Error Reading graph file\n"); - return; - } - - int source = 0; - - fscanf(fp, "%d", &no_of_nodes); - - int num_of_blocks = 1; - int num_of_threads_per_block = no_of_nodes; - - // Make execution Parameters according to the number of nodes - // Distribute threads across multiple Blocks if necessary - if (no_of_nodes > MAX_THREADS_PER_BLOCK) { - num_of_blocks = (int)ceil(no_of_nodes / (double)MAX_THREADS_PER_BLOCK); - num_of_threads_per_block = MAX_THREADS_PER_BLOCK; - } - - // allocate host memory - Node *h_graph_nodes = (Node *)malloc(sizeof(Node) * no_of_nodes); - bool *h_graph_mask = (bool *)malloc(sizeof(bool) * no_of_nodes); - bool *h_updating_graph_mask = (bool *)malloc(sizeof(bool) * no_of_nodes); - bool *h_graph_visited = (bool *)malloc(sizeof(bool) * no_of_nodes); - - int start, edgeno; - // initalize the memory - for (unsigned int i = 0; i < no_of_nodes; i++) { - fscanf(fp, "%d %d", &start, &edgeno); - h_graph_nodes[i].starting = start; - h_graph_nodes[i].no_of_edges = edgeno; - h_graph_mask[i] = false; - h_updating_graph_mask[i] = false; - h_graph_visited[i] = false; - } - - // read the source node from the file - fscanf(fp, "%d", &source); - source = 0; - - // set the source node as true in the mask - h_graph_mask[source] = true; - h_graph_visited[source] = true; - - fscanf(fp, "%d", &edge_list_size); - - int id, cost; - int *h_graph_edges = (int *)malloc(sizeof(int) * edge_list_size); - for (int i = 0; i < edge_list_size; i++) { - fscanf(fp, "%d", &id); - fscanf(fp, "%d", &cost); - h_graph_edges[i] = id; - } - - if (fp) - fclose(fp); - - printf("Read File\n"); - - // Copy the Node list to device memory - Node *d_graph_nodes; - cudaMalloc((void **)&d_graph_nodes, sizeof(Node) * no_of_nodes); - cudaMemcpy(d_graph_nodes, h_graph_nodes, sizeof(Node) * no_of_nodes, - cudaMemcpyHostToDevice); - - // Copy the Edge List to device Memory - int *d_graph_edges; - cudaMalloc((void **)&d_graph_edges, sizeof(int) * edge_list_size); - cudaMemcpy(d_graph_edges, h_graph_edges, sizeof(int) * edge_list_size, - cudaMemcpyHostToDevice); - - // Copy the Mask to device memory - bool *d_graph_mask; - cudaMalloc((void **)&d_graph_mask, sizeof(bool) * no_of_nodes); - cudaMemcpy(d_graph_mask, h_graph_mask, sizeof(bool) * no_of_nodes, - cudaMemcpyHostToDevice); - - bool *d_updating_graph_mask; - cudaMalloc((void **)&d_updating_graph_mask, sizeof(bool) * no_of_nodes); - cudaMemcpy(d_updating_graph_mask, h_updating_graph_mask, - sizeof(bool) * no_of_nodes, cudaMemcpyHostToDevice); - - // Copy the Visited nodes array to device memory - bool *d_graph_visited; - cudaMalloc((void **)&d_graph_visited, sizeof(bool) * no_of_nodes); - cudaMemcpy(d_graph_visited, h_graph_visited, sizeof(bool) * no_of_nodes, - cudaMemcpyHostToDevice); - - // allocate mem for the result on host side - int *h_cost = (int *)malloc(sizeof(int) * no_of_nodes); - for (int i = 0; i < no_of_nodes; i++) - h_cost[i] = -1; - h_cost[source] = 0; - - // allocate device memory for result - int *d_cost; - cudaMalloc((void **)&d_cost, sizeof(int) * no_of_nodes); - cudaMemcpy(d_cost, h_cost, sizeof(int) * no_of_nodes, cudaMemcpyHostToDevice); - - // make a bool to check if the execution is over - bool *d_over; - cudaMalloc((void **)&d_over, sizeof(bool)); - - printf("Copied Everything to GPU memory\n"); - - // setup execution parameters - dim3 grid(num_of_blocks, 1, 1); - dim3 threads(num_of_threads_per_block, 1, 1); - - int k = 0; - printf("Start traversing the tree\n"); - bool stop; - // Call the Kernel untill all the elements of Frontier are not false - do { - // if no thread changes this value then the loop stops - stop = false; - cudaMemcpy(d_over, &stop, sizeof(bool), cudaMemcpyHostToDevice); - - Kernel<<>>(d_graph_nodes, d_graph_edges, d_graph_mask, - d_updating_graph_mask, d_graph_visited, d_cost, - no_of_nodes); - cudaDeviceSynchronize(); - // check if kernel execution generated and error - - Kernel2<<>>(d_graph_mask, d_updating_graph_mask, - d_graph_visited, d_over, no_of_nodes); - cudaDeviceSynchronize(); - // check if kernel execution generated and error - - cudaMemcpy(&stop, d_over, sizeof(bool), cudaMemcpyDeviceToHost); - - k++; - } while (stop); - - printf("Kernel Executed %d times\n", k); - - // copy result from device to host - cudaMemcpy(h_cost, d_cost, sizeof(int) * no_of_nodes, cudaMemcpyDeviceToHost); - - // Store the result into a file - FILE *fpo = fopen("result.txt", "w"); - for (int i = 0; i < no_of_nodes; i++) - fprintf(fpo, "%d) cost:%d\n", i, h_cost[i]); - fclose(fpo); - printf("Result stored in result.txt\n"); - - // cleanup memory - free(h_graph_nodes); - free(h_graph_edges); - free(h_graph_mask); - free(h_updating_graph_mask); - free(h_graph_visited); - free(h_cost); - - cudaFree(d_graph_nodes); - cudaFree(d_graph_edges); - cudaFree(d_graph_mask); - cudaFree(d_updating_graph_mask); - cudaFree(d_graph_visited); - cudaFree(d_cost); -} diff --git a/examples/bfs/kernel.cu b/examples/bfs/kernel.cu deleted file mode 100644 index 7cf0df4..0000000 --- a/examples/bfs/kernel.cu +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef _KERNEL_H_ -#define _KERNEL_H_ - -__global__ void -Kernel( Node* g_graph_nodes, int* g_graph_edges, bool* g_graph_mask, bool* g_updating_graph_mask, bool *g_graph_visited, int* g_cost, int no_of_nodes) -{ - int tid = blockIdx.x*MAX_THREADS_PER_BLOCK + threadIdx.x; - if( tid // (in path known to compiler) needed by true/false, bool -#include // (in path known to compiler) needed by uint32_t -#include // (in path known to compiler) needed by malloc - -//======================================================================================================================================================150 -// DEFINE -//======================================================================================================================================================150 - -#define fp float - -#define Version "1.5" - -#ifdef WINDOWS -#define bool char -#define false 0 -#define true 1 -#endif - -/* #define DEFAULT_ORDER 256 */ - -#ifdef RD_WG_SIZE_0_0 -#define DEFAULT_ORDER RD_WG_SIZE_0_0 -#elif defined(RD_WG_SIZE_0) -#define DEFAULT_ORDER RD_WG_SIZE_0 -#elif defined(RD_WG_SIZE) -#define DEFAULT_ORDER RD_WG_SIZE -#else -#define DEFAULT_ORDER 256 -#endif - -/* #ifdef RD_WG_SIZE_1_0 */ -/* #define DEFAULT_ORDER_2 RD_WG_SIZE_1_0 */ -/* #elif defined(RD_WG_SIZE_1) */ -/* #define DEFAULT_ORDER_2 RD_WG_SIZE_1 */ -/* #elif defined(RD_WG_SIZE) */ -/* #define DEFAULT_ORDER_2 RD_WG_SIZE */ -/* #else */ -/* #define DEFAULT_ORDER_2 256 */ -/* #endif */ - -/* #define DEFAULT_ORDER 508 */ - -#define malloc(size) \ - ({ \ - void *_tmp; \ - \ - if (!(_tmp = malloc(size))) { \ - fprintf(stderr, "Allocation failed at %s:%d!\n", __FILE__, __LINE__); \ - exit(-1); \ - } \ - \ - _tmp; \ - }) - -//======================================================================================================================================================150 -// STRUCTURES -//======================================================================================================================================================150 - -// struct list_item; -typedef struct list_item list_item_t; - -typedef struct list_t { - list_item_t *head, *tail; - uint32_t length; - int32_t (*compare)(const void *key, const void *with); - void (*datum_delete)(void *); -} list_t; - -typedef list_item_t *list_iterator_t; -typedef list_item_t *list_reverse_iterator_t; - -/* Type representing the record - * to which a given key refers. - * In a real B+ tree system, the - * record would hold data (in a database) - * or a file (in an operating system) - * or some other information. - * Users can rewrite this part of the code - * to change the type and content - * of the value field. - */ -typedef struct record { - int value; -} record; - -/* Type representing a node in the B+ tree. - * This type is general enough to serve for both - * the leaf and the internal node. - * The heart of the node is the array - * of keys and the array of corresponding - * pointers. The relation between keys - * and pointers differs between leaves and - * internal nodes. In a leaf, the index - * of each key equals the index of its corresponding - * pointer, with a maximum of order - 1 key-pointer - * pairs. The last pointer points to the - * leaf to the right (or NULL in the case - * of the rightmost leaf). - * In an internal node, the first pointer - * refers to lower nodes with keys less than - * the smallest key in the keys array. Then, - * with indices i starting at 0, the pointer - * at i + 1 points to the subtree with keys - * greater than or equal to the key in this - * node at index i. - * The num_keys field is used to keep - * track of the number of valid keys. - * In an internal node, the number of valid - * pointers is always num_keys + 1. - * In a leaf, the number of valid pointers - * to data is always num_keys. The - * last leaf pointer points to the next leaf. - */ -typedef struct node { - void **pointers; - int *keys; - struct node *parent; - bool is_leaf; - int num_keys; - struct node *next; // Used for queue. -} node; - -// -typedef struct knode { - int location; - int indices[DEFAULT_ORDER + 1]; - int keys[DEFAULT_ORDER + 1]; - bool is_leaf; - int num_keys; -} knode; - -struct list_item { - struct list_item *pred, *next; - void *datum; -}; - -//===============================================================================================================================================================================================================200 -// PROTOTYPES -//===============================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// Other -//======================================================================================================================================================150 - -void list_item_init(list_item_t *li, void *datum); - -void list_item_delete(list_item_t *li, void (*datum_delete)(void *datum)); - -void list_insert_item_tail(list_t *l, list_item_t *i); - -void list_insert_item_before(list_t *l, list_item_t *next, list_item_t *i); - -void list_insert_item_after(list_t *l, list_item_t *pred, list_item_t *i); - -void list_insert_item_sorted(list_t *l, list_item_t *i); - -//======================================================================================================================================================150 -// ??? -//======================================================================================================================================================150 - -void list_init(list_t *l, int32_t (*compare)(const void *key, const void *with), - void (*datum_delete)(void *datum)); - -void list_delete(list_t *l); - -void list_reset(list_t *l); - -void list_insert_head(list_t *l, void *v); - -void list_insert_tail(list_t *l, void *v); - -void list_insert_before(list_t *l, list_item_t *next, void *v); - -void list_insert_after(list_t *l, list_item_t *pred, void *v); - -void list_insert_sorted(list_t *l, void *v); - -void list_insert_item_head(list_t *l, list_item_t *i); - -void list_remove_item(list_t *l, list_item_t *i); - -void list_remove_head(list_t *l); - -void list_remove_tail(list_t *l); - -list_item_t *list_find_item(list_t *l, void *datum); - -list_item_t *list_get_head_item(list_t *l); - -list_item_t *list_get_tail_item(list_t *l); - -void *list_find(list_t *l, void *datum); - -void *list_get_head(list_t *l); - -void *list_get_tail(list_t *l); - -uint32_t list_get_length(list_t *l); - -bool list_is_empty(list_t *l); - -bool list_not_empty(list_t *l); - -void list_visit_items(list_t *l, void (*visitor)(void *v)); - -void *list_item_get_datum(list_item_t *li); - -void list_iterator_init(list_t *l, list_iterator_t *li); - -void list_iterator_delete(list_iterator_t *li); - -void list_iterator_next(list_iterator_t *li); - -void list_iterator_prev(list_iterator_t *li); - -void *list_iterator_get_datum(list_iterator_t *li); - -bool list_iterator_is_valid(list_iterator_t *li); - -void list_reverse_iterator_init(list_t *l, list_iterator_t *li); - -void list_reverse_iterator_delete(list_iterator_t *li); - -void list_reverse_iterator_next(list_iterator_t *li); - -void list_reverse_iterator_prev(list_iterator_t *li); - -void *list_reverse_iterator_get_datum(list_iterator_t *li); - -bool list_reverse_iterator_is_valid(list_reverse_iterator_t *li); - -//======================================================================================================================================================150 -// Output and utility -//======================================================================================================================================================150 - -void *kmalloc(int size); - -long transform_to_cuda(node *n, - bool verbose); // returns actual mem used in a long - -void usage_1(void); - -void usage_2(void); - -void enqueue(node *new_node); - -node *dequeue(void); - -int height(node *root); - -int path_to_root(node *root, node *child); - -void print_leaves(node *root); - -void print_tree(node *root); - -node *find_leaf(node *root, int key, bool verbose); - -record *find(node *root, int key, bool verbose); - -int cut(int length); - -//======================================================================================================================================================150 -// Insertion -//======================================================================================================================================================150 - -record *make_record(int value); - -node *make_node(void); - -node *make_leaf(void); - -int get_left_index(node *parent, node *left); - -node *insert_into_leaf(node *leaf, int key, record *pointer); - -node *insert_into_leaf_after_splitting(node *root, node *leaf, int key, - record *pointer); - -node *insert_into_node(node *root, node *parent, int left_index, int key, - node *right); - -node *insert_into_node_after_splitting(node *root, node *parent, int left_index, - int key, node *right); - -node *insert_into_parent(node *root, node *left, int key, node *right); - -node *insert_into_new_root(node *left, int key, node *right); - -node *start_new_tree(int key, record *pointer); - -node *insert(node *root, int key, int value); - -//======================================================================================================================================================150 -// Deletion -//======================================================================================================================================================150 - -int get_neighbor_index(node *n); - -node *adjust_root(node *root); - -node *coalesce_nodes(node *root, node *n, node *neighbor, int neighbor_index, - int k_prime); - -node *redistribute_nodes(node *root, node *n, node *neighbor, - int neighbor_index, int k_prime_index, int k_prime); - -node *delete_entry(node *root, node *n, int key, void *pointer); - -node *deleteVal(node *root, int key); - -//===============================================================================================================================================================================================================200 -// HEADER -//===============================================================================================================================================================================================================200 - -// int main( int argc, -// char *argv []); - -//===============================================================================================================================================================================================================200 -// END -//===============================================================================================================================================================================================================200 - -// #endif - -// # ifdef __cplusplus -// } -// # endif diff --git a/examples/btree/kernel/kernel_gpu_cuda.cu b/examples/btree/kernel/kernel_gpu_cuda.cu deleted file mode 100755 index 57170c8..0000000 --- a/examples/btree/kernel/kernel_gpu_cuda.cu +++ /dev/null @@ -1,54 +0,0 @@ -//========================================================================================================================================================================================================200 -// findK function -//========================================================================================================================================================================================================200 - -__global__ void -findK( long height, - knode *knodesD, - long knodes_elem, - record *recordsD, - - long *currKnodeD, - long *offsetD, - int *keysD, - record *ansD) -{ - - // private thread IDs - int thid = threadIdx.x; - int bid = blockIdx.x; - - // processtree levels - int i; - for(i = 0; i < height; i++){ - - // if value is between the two keys - if((knodesD[currKnodeD[bid]].keys[thid]) <= keysD[bid] && (knodesD[currKnodeD[bid]].keys[thid+1] > keysD[bid])){ - // this conditional statement is inserted to avoid crush due to but in original code - // "offset[bid]" calculated below that addresses knodes[] in the next iteration goes outside of its bounds cause segmentation fault - // more specifically, values saved into knodes->indices in the main function are out of bounds of knodes that they address - if(knodesD[offsetD[bid]].indices[thid] < knodes_elem){ - offsetD[bid] = knodesD[offsetD[bid]].indices[thid]; - } - } - __syncthreads(); - - // set for next tree level - if(thid==0){ - currKnodeD[bid] = offsetD[bid]; - } - __syncthreads(); - - } - - //At this point, we have a candidate leaf node which may contain - //the target record. Check each key to hopefully find the record - if(knodesD[currKnodeD[bid]].keys[thid] == keysD[bid]){ - ansD[bid].value = recordsD[knodesD[currKnodeD[bid]].indices[thid]].value; - } - -} - -//========================================================================================================================================================================================================200 -// End -//========================================================================================================================================================================================================200 diff --git a/examples/btree/kernel/kernel_gpu_cuda_2.cu b/examples/btree/kernel/kernel_gpu_cuda_2.cu deleted file mode 100755 index 1bb8d7b..0000000 --- a/examples/btree/kernel/kernel_gpu_cuda_2.cu +++ /dev/null @@ -1,70 +0,0 @@ -//========================================================================================================================================================================================================200 -// findRangeK function -//========================================================================================================================================================================================================200 - -__global__ void -findRangeK( long height, - - knode *knodesD, - long knodes_elem, - - long *currKnodeD, - long *offsetD, - long *lastKnodeD, - long *offset_2D, - int *startD, - int *endD, - int *RecstartD, - int *ReclenD) -{ - - // private thread IDs - int thid = threadIdx.x; - int bid = blockIdx.x; - - // ??? - int i; - for(i = 0; i < height; i++){ - - if((knodesD[currKnodeD[bid]].keys[thid] <= startD[bid]) && (knodesD[currKnodeD[bid]].keys[thid+1] > startD[bid])){ - // this conditional statement is inserted to avoid crush due to but in original code - // "offset[bid]" calculated below that later addresses part of knodes goes outside of its bounds cause segmentation fault - // more specifically, values saved into knodes->indices in the main function are out of bounds of knodes that they address - if(knodesD[currKnodeD[bid]].indices[thid] < knodes_elem){ - offsetD[bid] = knodesD[currKnodeD[bid]].indices[thid]; - } - } - if((knodesD[lastKnodeD[bid]].keys[thid] <= endD[bid]) && (knodesD[lastKnodeD[bid]].keys[thid+1] > endD[bid])){ - // this conditional statement is inserted to avoid crush due to but in original code - // "offset_2[bid]" calculated below that later addresses part of knodes goes outside of its bounds cause segmentation fault - // more specifically, values saved into knodes->indices in the main function are out of bounds of knodes that they address - if(knodesD[lastKnodeD[bid]].indices[thid] < knodes_elem){ - offset_2D[bid] = knodesD[lastKnodeD[bid]].indices[thid]; - } - } - __syncthreads(); - - // set for next tree level - if(thid==0){ - currKnodeD[bid] = offsetD[bid]; - lastKnodeD[bid] = offset_2D[bid]; - } - __syncthreads(); - } - - // Find the index of the starting record - if(knodesD[currKnodeD[bid]].keys[thid] == startD[bid]){ - RecstartD[bid] = knodesD[currKnodeD[bid]].indices[thid]; - } - __syncthreads(); - - // Find the index of the ending record - if(knodesD[lastKnodeD[bid]].keys[thid] == endD[bid]){ - ReclenD[bid] = knodesD[lastKnodeD[bid]].indices[thid] - RecstartD[bid]+1; - } - -} - -//========================================================================================================================================================================================================200 -// End -//========================================================================================================================================================================================================200 diff --git a/examples/btree/kernel/kernel_gpu_cuda_wrapper.cu b/examples/btree/kernel/kernel_gpu_cuda_wrapper.cu deleted file mode 100755 index 361f9bb..0000000 --- a/examples/btree/kernel/kernel_gpu_cuda_wrapper.cu +++ /dev/null @@ -1,292 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//========================================================================================================================================================================================================200 -// DEFINE/INCLUDE -//========================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// COMMON -//======================================================================================================================================================150 - -#include "../common.h" // (in main program directory) needed to recognized input variables - -//======================================================================================================================================================150 -// UTILITIES -//======================================================================================================================================================150 - -#include "../util/cuda/cuda.h" // (in path specified to compiler) needed by for device functions -#include "../util/timer/timer.h" // (in path specified to compiler) needed by timer - -//======================================================================================================================================================150 -// KERNEL -//======================================================================================================================================================150 - -#include "./kernel_gpu_cuda.cu" // (in current directory) GPU kernel, cannot include with header file because of complications with passing of constant memory variables - -//======================================================================================================================================================150 -// HEADER -//======================================================================================================================================================150 - -#include "./kernel_gpu_cuda_wrapper.h" // (in current directory) - -//========================================================================================================================================================================================================200 -// KERNEL_GPU_CUDA_WRAPPER FUNCTION -//========================================================================================================================================================================================================200 - -void -kernel_gpu_cuda_wrapper(record *records, - long records_mem, - knode *knodes, - long knodes_elem, - long knodes_mem, - - int order, - long maxheight, - int count, - - long *currKnode, - long *offset, - int *keys, - record *ans) -{ - - //======================================================================================================================================================150 - // CPU VARIABLES - //======================================================================================================================================================150 - - // timer - long long time0; - long long time1; - long long time2; - long long time3; - long long time4; - long long time5; - long long time6; - - time0 = get_time(); - - //======================================================================================================================================================150 - // GPU SETUP - //======================================================================================================================================================150 - - //====================================================================================================100 - // INITIAL DRIVER OVERHEAD - //====================================================================================================100 - - cudaThreadSynchronize(); - - //====================================================================================================100 - // EXECUTION PARAMETERS - //====================================================================================================100 - - int numBlocks; - numBlocks = count; // max # of blocks can be 65,535 - int threadsPerBlock; - threadsPerBlock = order < 1024 ? order : 1024; - - printf("# of blocks = %d, # of threads/block = %d (ensure that device can handle)\n", numBlocks, threadsPerBlock); - - time1 = get_time(); - - //======================================================================================================================================================150 - // GPU MEMORY (MALLOC) - //======================================================================================================================================================150 - - //====================================================================================================100 - // DEVICE IN - //====================================================================================================100 - - //==================================================50 - // recordsD - //==================================================50 - - record *recordsD; - cudaMalloc((void**)&recordsD, records_mem); - checkCUDAError("cudaMalloc recordsD"); - - //==================================================50 - // knodesD - //==================================================50 - - knode *knodesD; - cudaMalloc((void**)&knodesD, knodes_mem); - checkCUDAError("cudaMalloc recordsD"); - - //==================================================50 - // currKnodeD - //==================================================50 - - long *currKnodeD; - cudaMalloc((void**)&currKnodeD, count*sizeof(long)); - checkCUDAError("cudaMalloc currKnodeD"); - - //==================================================50 - // offsetD - //==================================================50 - - long *offsetD; - cudaMalloc((void**)&offsetD, count*sizeof(long)); - checkCUDAError("cudaMalloc offsetD"); - - //==================================================50 - // keysD - //==================================================50 - - int *keysD; - cudaMalloc((void**)&keysD, count*sizeof(int)); - checkCUDAError("cudaMalloc keysD"); - - //====================================================================================================100 - // DEVICE IN/OUT - //====================================================================================================100 - - //==================================================50 - // ansD - //==================================================50 - - record *ansD; - cudaMalloc((void**)&ansD, count*sizeof(record)); - checkCUDAError("cudaMalloc ansD"); - - time2 = get_time(); - - //======================================================================================================================================================150 - // GPU MEMORY COPY - //======================================================================================================================================================150 - - //====================================================================================================100 - // GPU MEMORY (MALLOC) COPY IN - //====================================================================================================100 - - //==================================================50 - // recordsD - //==================================================50 - - cudaMemcpy(recordsD, records, records_mem, cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy memD"); - - //==================================================50 - // knodesD - //==================================================50 - - cudaMemcpy(knodesD, knodes, knodes_mem, cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy memD"); - - //==================================================50 - // currKnodeD - //==================================================50 - - cudaMemcpy(currKnodeD, currKnode, count*sizeof(long), cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy currKnodeD"); - - //==================================================50 - // offsetD - //==================================================50 - - cudaMemcpy(offsetD, offset, count*sizeof(long), cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy offsetD"); - - //==================================================50 - // keysD - //==================================================50 - - cudaMemcpy(keysD, keys, count*sizeof(int), cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy keysD"); - - //====================================================================================================100 - // DEVICE IN/OUT - //====================================================================================================100 - - //==================================================50 - // ansD - //==================================================50 - - cudaMemcpy(ansD, ans, count*sizeof(record), cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy ansD"); - - time3 = get_time(); - - //======================================================================================================================================================150 - // findK kernel - //======================================================================================================================================================150 - - findK<<>>( maxheight, - - knodesD, - knodes_elem, - - recordsD, - - currKnodeD, - offsetD, - keysD, - ansD); - cudaThreadSynchronize(); - checkCUDAError("findK"); - - time4 = get_time(); - - //======================================================================================================================================================150 - // GPU MEMORY COPY (CONTD.) - //======================================================================================================================================================150 - - //====================================================================================================100 - // DEVICE IN/OUT - //====================================================================================================100 - - //==================================================50 - // ansD - //==================================================50 - - cudaMemcpy(ans, ansD, count*sizeof(record), cudaMemcpyDeviceToHost); - checkCUDAError("cudaMemcpy ansD"); - - time5 = get_time(); - - //======================================================================================================================================================150 - // GPU MEMORY DEALLOCATION - //======================================================================================================================================================150 - - cudaFree(recordsD); - cudaFree(knodesD); - - cudaFree(currKnodeD); - cudaFree(offsetD); - cudaFree(keysD); - cudaFree(ansD); - - time6 = get_time(); - - //======================================================================================================================================================150 - // DISPLAY TIMING - //======================================================================================================================================================150 - - printf("Time spent in different stages of GPU_CUDA KERNEL:\n"); - - printf("%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\n", (float) (time1-time0) / 1000000, (float) (time1-time0) / (float) (time6-time0) * 100); - printf("%15.12f s, %15.12f % : GPU MEM: ALO\n", (float) (time2-time1) / 1000000, (float) (time2-time1) / (float) (time6-time0) * 100); - printf("%15.12f s, %15.12f % : GPU MEM: COPY IN\n", (float) (time3-time2) / 1000000, (float) (time3-time2) / (float) (time6-time0) * 100); - - printf("%15.12f s, %15.12f % : GPU: KERNEL\n", (float) (time4-time3) / 1000000, (float) (time4-time3) / (float) (time6-time0) * 100); - - printf("%15.12f s, %15.12f % : GPU MEM: COPY OUT\n", (float) (time5-time4) / 1000000, (float) (time5-time4) / (float) (time6-time0) * 100); - printf("%15.12f s, %15.12f % : GPU MEM: FRE\n", (float) (time6-time5) / 1000000, (float) (time6-time5) / (float) (time6-time0) * 100); - - printf("Total time:\n"); - printf("%.12f s\n", (float) (time6-time0) / 1000000); - -//========================================================================================================================================================================================================200 -// End -//========================================================================================================================================================================================================200 - -} - -//========================================================================================================================================================================================================200 -// END -//========================================================================================================================================================================================================200 - -#ifdef __cplusplus -} -#endif diff --git a/examples/btree/kernel/kernel_gpu_cuda_wrapper.h b/examples/btree/kernel/kernel_gpu_cuda_wrapper.h deleted file mode 100644 index b27c428..0000000 --- a/examples/btree/kernel/kernel_gpu_cuda_wrapper.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//========================================================================================================================================================================================================200 -// KERNEL_GPU_CUDA_WRAPPER HEADER -//========================================================================================================================================================================================================200 - -void kernel_gpu_cuda_wrapper(record *records, long records_mem, knode *knodes, - long knodes_elem, long knodes_mem, - - int order, long maxheight, int count, - - long *currKnode, long *offset, int *keys, - record *ans); - -//========================================================================================================================================================================================================200 -// End -//========================================================================================================================================================================================================200 - -#ifdef __cplusplus -} -#endif diff --git a/examples/btree/kernel/kernel_gpu_cuda_wrapper_2.cu b/examples/btree/kernel/kernel_gpu_cuda_wrapper_2.cu deleted file mode 100755 index baa6f11..0000000 --- a/examples/btree/kernel/kernel_gpu_cuda_wrapper_2.cu +++ /dev/null @@ -1,347 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//========================================================================================================================================================================================================200 -// INCLUDE -//========================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// COMMON -//======================================================================================================================================================150 - -#include "../common.h" // (in the main program folder) needed to recognized input parameters - -//======================================================================================================================================================150 -// UTILITIES -//======================================================================================================================================================150 - -#include "../util/cuda/cuda.h" // (in library path specified to compiler) needed by for device functions -#include "../util/timer/timer.h" // (in library path specified to compiler) needed by timer - -//======================================================================================================================================================150 -// KERNEL -//======================================================================================================================================================150 - -#include "./kernel_gpu_cuda_2.cu" // (in the current directory) GPU kernel, cannot include with header file because of complications with passing of constant memory variables - -//======================================================================================================================================================150 -// HEADER -//======================================================================================================================================================150 - -#include "./kernel_gpu_cuda_wrapper_2.h" // (in the current directory) - -//========================================================================================================================================================================================================200 -// FUNCTION -//========================================================================================================================================================================================================200 - -void -kernel_gpu_cuda_wrapper_2( knode *knodes, - long knodes_elem, - long knodes_mem, - - int order, - long maxheight, - int count, - - long *currKnode, - long *offset, - long *lastKnode, - long *offset_2, - int *start, - int *end, - int *recstart, - int *reclength) -{ - - //======================================================================================================================================================150 - // CPU VARIABLES - //======================================================================================================================================================150 - - // timer - long long time0; - long long time1; - long long time2; - long long time3; - long long time4; - long long time5; - long long time6; - - time0 = get_time(); - - //======================================================================================================================================================150 - // GPU SETUP - //======================================================================================================================================================150 - - //====================================================================================================100 - // INITIAL DRIVER OVERHEAD - //====================================================================================================100 - - cudaThreadSynchronize(); - - //====================================================================================================100 - // EXECUTION PARAMETERS - //====================================================================================================100 - - int numBlocks; - numBlocks = count; - int threadsPerBlock; - threadsPerBlock = order < 1024 ? order : 1024; - - printf("# of blocks = %d, # of threads/block = %d (ensure that device can handle)\n", numBlocks, threadsPerBlock); - - time1 = get_time(); - - //======================================================================================================================================================150 - // GPU MEMORY MALLOC - //======================================================================================================================================================150 - - //====================================================================================================100 - // DEVICE IN - //====================================================================================================100 - - //==================================================50 - // knodesD - //==================================================50 - - knode *knodesD; - cudaMalloc((void**)&knodesD, knodes_mem); - checkCUDAError("cudaMalloc recordsD"); - - //==================================================50 - // currKnodeD - //==================================================50 - - long *currKnodeD; - cudaMalloc((void**)&currKnodeD, count*sizeof(long)); - checkCUDAError("cudaMalloc currKnodeD"); - - //==================================================50 - // offsetD - //==================================================50 - - long *offsetD; - cudaMalloc((void**)&offsetD, count*sizeof(long)); - checkCUDAError("cudaMalloc offsetD"); - - //==================================================50 - // lastKnodeD - //==================================================50 - - long *lastKnodeD; - cudaMalloc((void**)&lastKnodeD, count*sizeof(long)); - checkCUDAError("cudaMalloc lastKnodeD"); - - //==================================================50 - // offset_2D - //==================================================50 - - long *offset_2D; - cudaMalloc((void**)&offset_2D, count*sizeof(long)); - checkCUDAError("cudaMalloc offset_2D"); - - //==================================================50 - // startD - //==================================================50 - - int *startD; - cudaMalloc((void**)&startD, count*sizeof(int)); - checkCUDAError("cudaMalloc startD"); - - //==================================================50 - // endD - //==================================================50 - - int *endD; - cudaMalloc((void**)&endD, count*sizeof(int)); - checkCUDAError("cudaMalloc endD"); - - //====================================================================================================100 - // DEVICE IN/OUT - //====================================================================================================100 - - //==================================================50 - // ansDStart - //==================================================50 - - int *ansDStart; - cudaMalloc((void**)&ansDStart, count*sizeof(int)); - checkCUDAError("cudaMalloc ansDStart"); - - //==================================================50 - // ansDLength - //==================================================50 - - int *ansDLength; - cudaMalloc((void**)&ansDLength, count*sizeof(int)); - checkCUDAError("cudaMalloc ansDLength"); - - time2 = get_time(); - - //======================================================================================================================================================150 - // GPU MEMORY COPY - //======================================================================================================================================================150 - - //====================================================================================================100 - // DEVICE IN - //====================================================================================================100 - - //==================================================50 - // knodesD - //==================================================50 - - cudaMemcpy(knodesD, knodes, knodes_mem, cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy memD"); - - //==================================================50 - // currKnodeD - //==================================================50 - - cudaMemcpy(currKnodeD, currKnode, count*sizeof(long), cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy currKnodeD"); - - //==================================================50 - // offsetD - //==================================================50 - - cudaMemcpy(offsetD, offset, count*sizeof(long), cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy offsetD"); - - //==================================================50 - // lastKnodeD - //==================================================50 - - cudaMemcpy(lastKnodeD, lastKnode, count*sizeof(long), cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy lastKnodeD"); - - //==================================================50 - // offset_2D - //==================================================50 - - cudaMemcpy(offset_2D, offset_2, count*sizeof(long), cudaMemcpyHostToDevice); - checkCUDAError("cudaMalloc cudaMemcpy offset_2D"); - - //==================================================50 - // startD - //==================================================50 - - cudaMemcpy(startD, start, count*sizeof(int), cudaMemcpyHostToDevice); - checkCUDAError("cudaMemcpy startD"); - - //==================================================50 - // endD - //==================================================50 - - cudaMemcpy(endD, end, count*sizeof(int), cudaMemcpyHostToDevice); - checkCUDAError("cudaMemcpy endD"); - - //====================================================================================================100 - // DEVICE IN/OUT - //====================================================================================================100 - - //==================================================50 - // ansDStart - //==================================================50 - - cudaMemcpy(ansDStart, recstart, count*sizeof(int), cudaMemcpyHostToDevice); - checkCUDAError("cudaMemcpy ansDStart"); - - //==================================================50 - // ansDLength - //==================================================50 - - cudaMemcpy(ansDLength, reclength, count*sizeof(int), cudaMemcpyHostToDevice); - checkCUDAError("cudaMemcpy ansDLength"); - - time3 = get_time(); - - //======================================================================================================================================================150 - // KERNEL - //======================================================================================================================================================150 - - // [GPU] findRangeK kernel - findRangeK<<>>( maxheight, - knodesD, - knodes_elem, - - currKnodeD, - offsetD, - lastKnodeD, - offset_2D, - startD, - endD, - ansDStart, - ansDLength); - cudaThreadSynchronize(); - checkCUDAError("findRangeK"); - - time4 = get_time(); - - //======================================================================================================================================================150 - // GPU MEMORY COPY (CONTD.) - //======================================================================================================================================================150 - - //====================================================================================================100 - // DEVICE IN/OUT - //====================================================================================================100 - - //==================================================50 - // ansDStart - //==================================================50 - - cudaMemcpy(recstart, ansDStart, count*sizeof(int), cudaMemcpyDeviceToHost); - checkCUDAError("cudaMemcpy ansDStart"); - - //==================================================50 - // ansDLength - //==================================================50 - - cudaMemcpy(reclength, ansDLength, count*sizeof(int), cudaMemcpyDeviceToHost); - checkCUDAError("cudaMemcpy ansDLength"); - - time5 = get_time(); - - //======================================================================================================================================================150 - // GPU MEMORY DEALLOCATION - //======================================================================================================================================================150 - - cudaFree(knodesD); - - cudaFree(currKnodeD); - cudaFree(offsetD); - cudaFree(lastKnodeD); - cudaFree(offset_2D); - cudaFree(startD); - cudaFree(endD); - cudaFree(ansDStart); - cudaFree(ansDLength); - - time6 = get_time(); - - //======================================================================================================================================================150 - // DISPLAY TIMING - //======================================================================================================================================================150 - - printf("Time spent in different stages of GPU_CUDA KERNEL:\n"); - - printf("%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\n", (float) (time1-time0) / 1000000, (float) (time1-time0) / (float) (time6-time0) * 100); - printf("%15.12f s, %15.12f % : GPU MEM: ALO\n", (float) (time2-time1) / 1000000, (float) (time2-time1) / (float) (time6-time0) * 100); - printf("%15.12f s, %15.12f % : GPU MEM: COPY IN\n", (float) (time3-time2) / 1000000, (float) (time3-time2) / (float) (time6-time0) * 100); - - printf("%15.12f s, %15.12f % : GPU: KERNEL\n", (float) (time4-time3) / 1000000, (float) (time4-time3) / (float) (time6-time0) * 100); - - printf("%15.12f s, %15.12f % : GPU MEM: COPY OUT\n", (float) (time5-time4) / 1000000, (float) (time5-time4) / (float) (time6-time0) * 100); - printf("%15.12f s, %15.12f % : GPU MEM: FRE\n", (float) (time6-time5) / 1000000, (float) (time6-time5) / (float) (time6-time0) * 100); - - printf("Total time:\n"); - printf("%.12f s\n", (float) (time6-time0) / 1000000); - -} - -//========================================================================================================================================================================================================200 -// END -//========================================================================================================================================================================================================200 - -#ifdef __cplusplus -} -#endif diff --git a/examples/btree/kernel/kernel_gpu_cuda_wrapper_2.h b/examples/btree/kernel/kernel_gpu_cuda_wrapper_2.h deleted file mode 100644 index 43b07ae..0000000 --- a/examples/btree/kernel/kernel_gpu_cuda_wrapper_2.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//========================================================================================================================================================================================================200 -// KERNEL_GPU_CUDA_WRAPPER HEADER -//========================================================================================================================================================================================================200 - -void kernel_gpu_cuda_wrapper_2(knode *knodes, long knodes_elem, long knodes_mem, - - int order, long maxheight, int count, - - long *currKnode, long *offset, long *lastKnode, - long *offset_2, int *start, int *end, - int *recstart, int *reclength); - -//========================================================================================================================================================================================================200 -// End -//========================================================================================================================================================================================================200 - -#ifdef __cplusplus -} -#endif diff --git a/examples/btree/kernel_gpu_cuda_wrapper-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/btree/kernel_gpu_cuda_wrapper-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 7979fd9..0000000 --- a/examples/btree/kernel_gpu_cuda_wrapper-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,332 +0,0 @@ -; ModuleID = 'kernel_gpu_cuda_wrapper-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "kernel/kernel_gpu_cuda_wrapper.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } -%struct.knode = type { i32, [257 x i32], [257 x i32], i8, i32 } -%struct.record = type { i32 } - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @findK(i64 %height, %struct.knode* %knodesD, i64 %knodes_elem, %struct.record* %recordsD, i64* %currKnodeD, i64* %offsetD, i32* %keysD, %struct.record* %ansD) #0 { -entry: - %height.addr = alloca i64, align 8 - %knodesD.addr = alloca %struct.knode*, align 8 - %knodes_elem.addr = alloca i64, align 8 - %recordsD.addr = alloca %struct.record*, align 8 - %currKnodeD.addr = alloca i64*, align 8 - %offsetD.addr = alloca i64*, align 8 - %keysD.addr = alloca i32*, align 8 - %ansD.addr = alloca %struct.record*, align 8 - %thid = alloca i32, align 4 - %bid = alloca i32, align 4 - %i = alloca i32, align 4 - store i64 %height, i64* %height.addr, align 8 - store %struct.knode* %knodesD, %struct.knode** %knodesD.addr, align 8 - store i64 %knodes_elem, i64* %knodes_elem.addr, align 8 - store %struct.record* %recordsD, %struct.record** %recordsD.addr, align 8 - store i64* %currKnodeD, i64** %currKnodeD.addr, align 8 - store i64* %offsetD, i64** %offsetD.addr, align 8 - store i32* %keysD, i32** %keysD.addr, align 8 - store %struct.record* %ansD, %struct.record** %ansD.addr, align 8 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %thid, align 4 - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - store i32 %call1, i32* %bid, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %conv = sext i32 %0 to i64 - %1 = load i64, i64* %height.addr, align 8 - %cmp = icmp slt i64 %conv, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %3 = load i64*, i64** %currKnodeD.addr, align 8 - %4 = load i32, i32* %bid, align 4 - %idxprom = sext i32 %4 to i64 - %arrayidx = getelementptr inbounds i64, i64* %3, i64 %idxprom - %5 = load i64, i64* %arrayidx, align 8 - %arrayidx2 = getelementptr inbounds %struct.knode, %struct.knode* %2, i64 %5 - %keys = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx2, i32 0, i32 2 - %6 = load i32, i32* %thid, align 4 - %idxprom3 = sext i32 %6 to i64 - %arrayidx4 = getelementptr inbounds [257 x i32], [257 x i32]* %keys, i64 0, i64 %idxprom3 - %7 = load i32, i32* %arrayidx4, align 4 - %8 = load i32*, i32** %keysD.addr, align 8 - %9 = load i32, i32* %bid, align 4 - %idxprom5 = sext i32 %9 to i64 - %arrayidx6 = getelementptr inbounds i32, i32* %8, i64 %idxprom5 - %10 = load i32, i32* %arrayidx6, align 4 - %cmp7 = icmp sle i32 %7, %10 - br i1 %cmp7, label %land.lhs.true, label %if.end34 - -land.lhs.true: ; preds = %for.body - %11 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %12 = load i64*, i64** %currKnodeD.addr, align 8 - %13 = load i32, i32* %bid, align 4 - %idxprom8 = sext i32 %13 to i64 - %arrayidx9 = getelementptr inbounds i64, i64* %12, i64 %idxprom8 - %14 = load i64, i64* %arrayidx9, align 8 - %arrayidx10 = getelementptr inbounds %struct.knode, %struct.knode* %11, i64 %14 - %keys11 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx10, i32 0, i32 2 - %15 = load i32, i32* %thid, align 4 - %add = add nsw i32 %15, 1 - %idxprom12 = sext i32 %add to i64 - %arrayidx13 = getelementptr inbounds [257 x i32], [257 x i32]* %keys11, i64 0, i64 %idxprom12 - %16 = load i32, i32* %arrayidx13, align 4 - %17 = load i32*, i32** %keysD.addr, align 8 - %18 = load i32, i32* %bid, align 4 - %idxprom14 = sext i32 %18 to i64 - %arrayidx15 = getelementptr inbounds i32, i32* %17, i64 %idxprom14 - %19 = load i32, i32* %arrayidx15, align 4 - %cmp16 = icmp sgt i32 %16, %19 - br i1 %cmp16, label %if.then, label %if.end34 - -if.then: ; preds = %land.lhs.true - %20 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %21 = load i64*, i64** %offsetD.addr, align 8 - %22 = load i32, i32* %bid, align 4 - %idxprom17 = sext i32 %22 to i64 - %arrayidx18 = getelementptr inbounds i64, i64* %21, i64 %idxprom17 - %23 = load i64, i64* %arrayidx18, align 8 - %arrayidx19 = getelementptr inbounds %struct.knode, %struct.knode* %20, i64 %23 - %indices = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx19, i32 0, i32 1 - %24 = load i32, i32* %thid, align 4 - %idxprom20 = sext i32 %24 to i64 - %arrayidx21 = getelementptr inbounds [257 x i32], [257 x i32]* %indices, i64 0, i64 %idxprom20 - %25 = load i32, i32* %arrayidx21, align 4 - %conv22 = sext i32 %25 to i64 - %26 = load i64, i64* %knodes_elem.addr, align 8 - %cmp23 = icmp slt i64 %conv22, %26 - br i1 %cmp23, label %if.then24, label %if.end - -if.then24: ; preds = %if.then - %27 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %28 = load i64*, i64** %offsetD.addr, align 8 - %29 = load i32, i32* %bid, align 4 - %idxprom25 = sext i32 %29 to i64 - %arrayidx26 = getelementptr inbounds i64, i64* %28, i64 %idxprom25 - %30 = load i64, i64* %arrayidx26, align 8 - %arrayidx27 = getelementptr inbounds %struct.knode, %struct.knode* %27, i64 %30 - %indices28 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx27, i32 0, i32 1 - %31 = load i32, i32* %thid, align 4 - %idxprom29 = sext i32 %31 to i64 - %arrayidx30 = getelementptr inbounds [257 x i32], [257 x i32]* %indices28, i64 0, i64 %idxprom29 - %32 = load i32, i32* %arrayidx30, align 4 - %conv31 = sext i32 %32 to i64 - %33 = load i64*, i64** %offsetD.addr, align 8 - %34 = load i32, i32* %bid, align 4 - %idxprom32 = sext i32 %34 to i64 - %arrayidx33 = getelementptr inbounds i64, i64* %33, i64 %idxprom32 - store i64 %conv31, i64* %arrayidx33, align 8 - br label %if.end - -if.end: ; preds = %if.then24, %if.then - br label %if.end34 - -if.end34: ; preds = %if.end, %land.lhs.true, %for.body - call void @llvm.nvvm.barrier0() - %35 = load i32, i32* %thid, align 4 - %cmp35 = icmp eq i32 %35, 0 - br i1 %cmp35, label %if.then36, label %if.end41 - -if.then36: ; preds = %if.end34 - %36 = load i64*, i64** %offsetD.addr, align 8 - %37 = load i32, i32* %bid, align 4 - %idxprom37 = sext i32 %37 to i64 - %arrayidx38 = getelementptr inbounds i64, i64* %36, i64 %idxprom37 - %38 = load i64, i64* %arrayidx38, align 8 - %39 = load i64*, i64** %currKnodeD.addr, align 8 - %40 = load i32, i32* %bid, align 4 - %idxprom39 = sext i32 %40 to i64 - %arrayidx40 = getelementptr inbounds i64, i64* %39, i64 %idxprom39 - store i64 %38, i64* %arrayidx40, align 8 - br label %if.end41 - -if.end41: ; preds = %if.then36, %if.end34 - call void @llvm.nvvm.barrier0() - br label %for.inc - -for.inc: ; preds = %if.end41 - %41 = load i32, i32* %i, align 4 - %inc = add nsw i32 %41, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %42 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %43 = load i64*, i64** %currKnodeD.addr, align 8 - %44 = load i32, i32* %bid, align 4 - %idxprom42 = sext i32 %44 to i64 - %arrayidx43 = getelementptr inbounds i64, i64* %43, i64 %idxprom42 - %45 = load i64, i64* %arrayidx43, align 8 - %arrayidx44 = getelementptr inbounds %struct.knode, %struct.knode* %42, i64 %45 - %keys45 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx44, i32 0, i32 2 - %46 = load i32, i32* %thid, align 4 - %idxprom46 = sext i32 %46 to i64 - %arrayidx47 = getelementptr inbounds [257 x i32], [257 x i32]* %keys45, i64 0, i64 %idxprom46 - %47 = load i32, i32* %arrayidx47, align 4 - %48 = load i32*, i32** %keysD.addr, align 8 - %49 = load i32, i32* %bid, align 4 - %idxprom48 = sext i32 %49 to i64 - %arrayidx49 = getelementptr inbounds i32, i32* %48, i64 %idxprom48 - %50 = load i32, i32* %arrayidx49, align 4 - %cmp50 = icmp eq i32 %47, %50 - br i1 %cmp50, label %if.then51, label %if.end63 - -if.then51: ; preds = %for.end - %51 = load %struct.record*, %struct.record** %recordsD.addr, align 8 - %52 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %53 = load i64*, i64** %currKnodeD.addr, align 8 - %54 = load i32, i32* %bid, align 4 - %idxprom52 = sext i32 %54 to i64 - %arrayidx53 = getelementptr inbounds i64, i64* %53, i64 %idxprom52 - %55 = load i64, i64* %arrayidx53, align 8 - %arrayidx54 = getelementptr inbounds %struct.knode, %struct.knode* %52, i64 %55 - %indices55 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx54, i32 0, i32 1 - %56 = load i32, i32* %thid, align 4 - %idxprom56 = sext i32 %56 to i64 - %arrayidx57 = getelementptr inbounds [257 x i32], [257 x i32]* %indices55, i64 0, i64 %idxprom56 - %57 = load i32, i32* %arrayidx57, align 4 - %idxprom58 = sext i32 %57 to i64 - %arrayidx59 = getelementptr inbounds %struct.record, %struct.record* %51, i64 %idxprom58 - %value = getelementptr inbounds %struct.record, %struct.record* %arrayidx59, i32 0, i32 0 - %58 = load i32, i32* %value, align 4 - %59 = load %struct.record*, %struct.record** %ansD.addr, align 8 - %60 = load i32, i32* %bid, align 4 - %idxprom60 = sext i32 %60 to i64 - %arrayidx61 = getelementptr inbounds %struct.record, %struct.record* %59, i64 %idxprom60 - %value62 = getelementptr inbounds %struct.record, %struct.record* %arrayidx61, i32 0, i32 0 - store i32 %58, i32* %value62, align 4 - br label %if.end63 - -if.end63: ; preds = %if.then51, %for.end - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: convergent nounwind -declare void @llvm.nvvm.barrier0() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #3 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind } -attributes #3 = { nounwind readnone } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !4, !6, !6, !6, !6, !7, !7, !6} -!llvm.ident = !{!8} -!nvvmir.version = !{!9} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (i64, %struct.knode*, i64, %struct.record*, i64*, i64*, i32*, %struct.record*)* @findK, !"kernel", i32 1} -!4 = !{null, !"align", i32 8} -!5 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!6 = !{null, !"align", i32 16} -!7 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!8 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!9 = !{i32 1, i32 4} diff --git a/examples/btree/kernel_gpu_cuda_wrapper-host-x86_64-unknown-linux-gnu.ll b/examples/btree/kernel_gpu_cuda_wrapper-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 37c05f6..0000000 --- a/examples/btree/kernel_gpu_cuda_wrapper-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,553 +0,0 @@ -; ModuleID = 'kernel_gpu_cuda_wrapper-host-x86_64-unknown-linux-gnu.bc' -source_filename = "kernel/kernel_gpu_cuda_wrapper.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct.knode = type { i32, [257 x i32], [257 x i32], i8, i32 } -%struct.record = type { i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque - -$_ZN4dim3C2Ejjj = comdat any - -@.str = private unnamed_addr constant [75 x i8] c"# of blocks = %d, # of threads/block = %d (ensure that device can handle)\0A\00", align 1 -@.str.1 = private unnamed_addr constant [21 x i8] c"cudaMalloc recordsD\00", align 1 -@.str.2 = private unnamed_addr constant [23 x i8] c"cudaMalloc currKnodeD\00", align 1 -@.str.3 = private unnamed_addr constant [20 x i8] c"cudaMalloc offsetD\00", align 1 -@.str.4 = private unnamed_addr constant [18 x i8] c"cudaMalloc keysD\00", align 1 -@.str.5 = private unnamed_addr constant [16 x i8] c"cudaMalloc ansD\00", align 1 -@.str.6 = private unnamed_addr constant [27 x i8] c"cudaMalloc cudaMemcpy memD\00", align 1 -@.str.7 = private unnamed_addr constant [33 x i8] c"cudaMalloc cudaMemcpy currKnodeD\00", align 1 -@.str.8 = private unnamed_addr constant [30 x i8] c"cudaMalloc cudaMemcpy offsetD\00", align 1 -@.str.9 = private unnamed_addr constant [28 x i8] c"cudaMalloc cudaMemcpy keysD\00", align 1 -@.str.10 = private unnamed_addr constant [27 x i8] c"cudaMalloc cudaMemcpy ansD\00", align 1 -@.str.11 = private unnamed_addr constant [6 x i8] c"findK\00", align 1 -@.str.12 = private unnamed_addr constant [16 x i8] c"cudaMemcpy ansD\00", align 1 -@.str.13 = private unnamed_addr constant [52 x i8] c"Time spent in different stages of GPU_CUDA KERNEL:\0A\00", align 1 -@.str.14 = private unnamed_addr constant [54 x i8] c"%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\0A\00", align 1 -@.str.15 = private unnamed_addr constant [37 x i8] c"%15.12f s, %15.12f % : GPU MEM: ALO\0A\00", align 1 -@.str.16 = private unnamed_addr constant [41 x i8] c"%15.12f s, %15.12f % : GPU MEM: COPY IN\0A\00", align 1 -@.str.17 = private unnamed_addr constant [36 x i8] c"%15.12f s, %15.12f % : GPU: KERNEL\0A\00", align 1 -@.str.18 = private unnamed_addr constant [42 x i8] c"%15.12f s, %15.12f % : GPU MEM: COPY OUT\0A\00", align 1 -@.str.19 = private unnamed_addr constant [37 x i8] c"%15.12f s, %15.12f % : GPU MEM: FRE\0A\00", align 1 -@.str.20 = private unnamed_addr constant [13 x i8] c"Total time:\0A\00", align 1 -@.str.21 = private unnamed_addr constant [9 x i8] c"%.12f s\0A\00", align 1 -@0 = private constant [15913 x i8] c"P\EDU\BA\01\00\10\00\18>\00\00\00\00\00\00\02\00\01\01@\00\00\00\A83\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\003\00\00\00\00\00\00\C00\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\09\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text.findK\00.nv.info.findK\00.nv.shared.findK\00.nv.global\00.nv.constant0.findK\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00findK\00.text.findK\00.nv.info.findK\00.nv.shared.findK\00.nv.global\00threadIdx\00blockIdx\00.nv.constant0.findK\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\008\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00d\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00o\00\00\00\01\00\08\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00y\00\00\00\01\00\08\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\82\00\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00@,\00\00\00\00\00\00\04/\08\00\06\00\00\00\16\00\00\00\04#\08\00\06\00\00\00\00\00\00\00\04\12\08\00\06\00\00\00P\00\00\00\04\11\08\00\06\00\00\00P\00\00\00\010\00\00\01*\00\00\04\0A\08\00\05\00\00\00@\01@\00\03\19@\00\04\17\0C\00\00\00\00\00\07\008\00\00\F0!\00\04\17\0C\00\00\00\00\00\06\000\00\00\F0!\00\04\17\0C\00\00\00\00\00\05\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\F0\07\00\00\04\1C\04\00\F8+\00\00\04\1E\04\00p\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03\00\00\00\00\00\00p\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\008\02\00\00\00\00\00\00\B4\00\00\00\00\00\00\00\03\00\00\00\07\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00i\00\00\00\01\00\00\00\02\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EC\02\00\00\00\00\00\00\80\01\00\00\00\00\00\00\00\00\00\00\07\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\01\00\00\00\06\00\10\00\00\00\00\00\00\00\00\00\00\00\00\00\80\04\00\00\00\00\00\00@,\00\00\00\00\00\00\03\00\00\00\06\00\00\16 \00\00\00\00\00\00\00\00\00\00\00\00\00\00\00^\00\00\00\08\00\00\00\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\C00\00\00\00\00\00\00\02\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\06\00\00\00\05\00\00\00\003\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A8\00\00\00\00\00\00\00\A8\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00\01\00\00\00\05\00\00\00\EC\02\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\C0-\00\00\00\00\00\00\C0-\00\00\00\00\00\00\08\00\00\00\00\00\00\00\01\00\00\00\06\00\00\00\C00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00\01\00\01\01H\00\00\00\E8\09\00\00\00\00\00\00\E6\09\00\00@\00\00\00\04\00\06\00=\00\00\00\00\00\00\00\00\00\00\00\11 \00\00\00\00\00\00\00\00\00\00\00\00\00\00\FB#\00\00\00\00\00\00\00\00\00\00\00\00\00\00\F0 \0A\0A\0A\0A.version 6.4\0A.target sm_61\0A.address_size 64.\00\FF\12global .align 1 .b8 threadIdx[1];#\00\03Tblock\22\00\F0\0B\0A.weak .func (.param .b32 \12\00\F5\07_retval0) cudaMalloc(\0A&\00'64\18\00\11_\16\00?_0, \00\0B\A61\0A)\0A{\0A.loc\98\00\118\98\00!__\15\00\A0_depot0[16\C1\002regI\00;%SP\0F\00\15L\10\00\8932 %r<2>!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveB3\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F6\05visible .entry findK\8E\04\00\88\00\01\13\00\0E]\04\0C\1B\00\1F1\1B\00\07\1F2\1B\00\07\1F3\1B\00\07\1F4\1B\00\07\1F5\1B\00\07\1F6\1B\00\07\1F7\EA\03\13?6[8\EA\03\16\ABpred %p<7>\FC\03-16\FD\03?115\FF\03\0C\1F6G\08\19I8, [\DE\00\0F\D1\02\00\1B7$\00\1F6$\00\00\1B6$\00\1F5$\00\00\1B5$\00\0F;\04\01\1B4$\00\1F3_\04\04\09\19\02\0F\DA\03\04\09X\02\0F\A9\03\04\09\97\02\13]:\01#to\81\13\04*\00\119>\03\138\1F\00\0A\1C\00!10\1D\00\1F9<\00\05!11 \00\1F7=\00\03\122\DB\03\1F1>\00\06\113!\00\1F6>\00\03\124>\00\1F3>\00\06\115!\00\1F5>\00\03\126>\00\1F5>\00\06\117!\00\1F4>\00\03\128>\00\1F7>\00\06\149\A0\04\0F>\00\00\1225\01/19\EA\04\19\1A0\17\00)16\D5\04\0C\EC\04*18\18\00\03\ED\04:d16\18\00\144u\00\1B4\18\00\03w\00*12\18\00\135w\00+10\DC\0Dj%tid.xb\05$64\8F\05\098\0AO%cta-\00\00\1F8\8F\05\02\1A0&\00$72\7F\05\F2\01bra.uni LBB6_1;\0A\08\00\10:\E8\02\11s=\00Ed21,5\00\01\00\03\04\7F\01$2,q\01\B0;\0Asetp.ge.s\1C\004p1,9\00\01(\00\A3;\0A@%p1 brag\00\1B1x\00\132x\00'2:`\00455,\BB\01\08w\00556,\8C\01\17;\A7\00557,\02\01T;\0Ashl\EE\04458, \00\833;\0Aadd.s\19\00$9,Q\00\01'\00\08f\00 60N\00\00$\00\94];\0Amul.lo7\00461,\22\00I2068S\00462,\BB\00\01*\00\08\A1\00563,\D0\01\09\A1\00464, \00\1A2N\00'5,U\00\2264\A1\00\01\D3\01\127\9F\00j65+103\98\01\146!\01\1B4\09\01%67\09\01\0Bh\00$8,9\00\01'\00\07h\00\138h\00\158\E4\01\12t\D0\002p3,\87\001%r8\E0\01\163\E0\01\1B7\DF\01\133\DF\01\183\DF\01/69\DF\01\02/70\DF\01\03/71\DF\01\04472, \00\0A\DF\01473,Q\00\01'\00\09f\00\124\D8\00-73\DF\01475,\22\00\0D\DF\01476,\BB\00\01*\00\07)\01\189\DD\01$ad\B8\00\02\D9\0529, \9D\05\00I\00\05\D2\00\01=\01*10\CE\00\02\8B\01\1D7\A4\01779,\82\00'78{\00#11\CD\00\1E9\0D\02/80\0D\02\04%817\01\0Ai\00482,9\00\01'\00\08i\00\122i\00$82\0E\02#le\0E\02#4,\89\00\00&\00\01\10\02\1F4\10\02\07\134\10\02\184\10\02/83\10\02\02584,c\05\08\10\02/85\10\02\04486, \00\0A\10\02487,Q\00\01'\00\09f\00\03\E8\02-87\10\02\138\B8\07\1F8\10\02\00490,\BB\00\01*\00\08\A1\00/91\EF\03\04\129a\01\1D9z\01793,U\00)92N\00\03\B1\02I93+4 \01595,\CA\06\0C\87\05$5,;\00\01)\00\01\97\01\165\97\01\1B6\97\01\135\97\01\185\97\01/96\97\01\02/97\97\01\03/98\97\01\04499, \00\0A\97\01D100,R\00\01(\00\08g\00#10\DC\02=100\9A\01E102,%\00\0D\9C\01D103,\C1\00\02-\00\08\A8\00?104\9F\01\04E105,\22\00\1B2S\00(6,[\00:105T\00\147\AD\0086+4\80\0D\02\C0\00\13,%\00\0Bw\01\136w\01*6:\18\00\137\18\00\D87:\0Abar.sync 0\8E\03\0AP\06\00\FA\01\14n\8F\03#6,!\00\110\F5\01\166\F5\01\1B9~\00\138f\00)8:w\01\1F8\DF\01\03?109\E0\01\04\131\C3\0A\01\22\00\0B\E2\01\03\BE\0A#10\EF\09)10\E4\01\04W\04J111]\19\00\183\0C\06\09R\00%4,\22\00\04R\00\08n\01\141\C0\09+12\F0\00\139\F0\00\1A9V\01\0A\07\09\140\08\09'0:p\01\184\08\09\07\E3\05#5,\1E\00\1F1e\09\02/15f\09\05\181t\01\1F2\00\05\03/24\10\07\03\1F2\00\05\05\122\A4\02\1D2\00\05427,Q\00\01'\00\09f\00\03\00\05\1E2\00\05\132\83\0B\0F\00\05\01430,\BB\00\01*\00\08\A1\00\1F3\00\05\05\123\B6\03\1D3\00\05733,U\00'32\EC\02\03\FE\04.33\E2\06/34\E2\06\04%35\09\01\0Bh\00$6,9\00\01'\00\07h\00\135h\00\1D6R\03\222,\87\00\22%rH\05\172\CF\0A\0DG\0B\04\D0\0A\191\D1\0A\1434\05\1A2\C3\05/38\FA\01\02/39\FA\01\03/40\FA\01\04441, \00\0A\FA\01442,Q\00\01'\00\08f\00\2243\F3\00-42\FA\01444,\22\00\0D\FA\01445,\BB\00\01*\00\09\A1\00\1F6\FA\01\04\134\0C\0E\1C6\92\01748,U\00)47N\00\139\A1\00+8+O\00450,!\00\0AO\00451,p\01\01'\00\07\E1\01\136\D7\0B\0Aj\04552,\98\0D\0Ac\00\153S\01\0Bc\00$4,9\00\01'\00\07\FA\03\00\1D\00\01\F9\03\0C`\07$13\17\02\B03:\0Aret;\0A\0A}\0A\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([15913 x i8], [15913 x i8]* @0, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline optnone uwtable -define dso_local void @findK(i64 %height, %struct.knode* %knodesD, i64 %knodes_elem, %struct.record* %recordsD, i64* %currKnodeD, i64* %offsetD, i32* %keysD, %struct.record* %ansD) #0 { -entry: - %height.addr = alloca i64, align 8 - %knodesD.addr = alloca %struct.knode*, align 8 - %knodes_elem.addr = alloca i64, align 8 - %recordsD.addr = alloca %struct.record*, align 8 - %currKnodeD.addr = alloca i64*, align 8 - %offsetD.addr = alloca i64*, align 8 - %keysD.addr = alloca i32*, align 8 - %ansD.addr = alloca %struct.record*, align 8 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i64 %height, i64* %height.addr, align 8 - store %struct.knode* %knodesD, %struct.knode** %knodesD.addr, align 8 - store i64 %knodes_elem, i64* %knodes_elem.addr, align 8 - store %struct.record* %recordsD, %struct.record** %recordsD.addr, align 8 - store i64* %currKnodeD, i64** %currKnodeD.addr, align 8 - store i64* %offsetD, i64** %offsetD.addr, align 8 - store i32* %keysD, i32** %keysD.addr, align 8 - store %struct.record* %ansD, %struct.record** %ansD.addr, align 8 - %kernel_args = alloca i8*, i64 8, align 16 - %0 = bitcast i64* %height.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast %struct.knode** %knodesD.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i64* %knodes_elem.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast %struct.record** %recordsD.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i64** %currKnodeD.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i64** %offsetD.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = bitcast i32** %keysD.addr to i8* - %13 = getelementptr i8*, i8** %kernel_args, i32 6 - store i8* %12, i8** %13 - %14 = bitcast %struct.record** %ansD.addr to i8* - %15 = getelementptr i8*, i8** %kernel_args, i32 7 - store i8* %14, i8** %15 - %16 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %17 = load i64, i64* %shmem_size, align 8 - %18 = load i8*, i8** %stream, align 8 - %19 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %20 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %19, i8* align 8 %20, i64 12, i1 false) - %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %22 = load i64, i64* %21, align 8 - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %24 = load i32, i32* %23, align 8 - %25 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %26 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %25, i8* align 8 %26, i64 12, i1 false) - %27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %28 = load i64, i64* %27, align 8 - %29 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %30 = load i32, i32* %29, align 8 - %31 = bitcast i8* %18 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i64, %struct.knode*, i64, %struct.record*, i64*, i64*, i32*, %struct.record*)* @findK to i8*), i64 %22, i32 %24, i64 %28, i32 %30, i8** %kernel_args, i64 %17, %struct.CUstream_st* %31) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @kernel_gpu_cuda_wrapper(%struct.record* %records, i64 %records_mem, %struct.knode* %knodes, i64 %knodes_elem, i64 %knodes_mem, i32 %order, i64 %maxheight, i32 %count, i64* %currKnode, i64* %offset, i32* %keys, %struct.record* %ans) #0 { -entry: - %records.addr = alloca %struct.record*, align 8 - %records_mem.addr = alloca i64, align 8 - %knodes.addr = alloca %struct.knode*, align 8 - %knodes_elem.addr = alloca i64, align 8 - %knodes_mem.addr = alloca i64, align 8 - %order.addr = alloca i32, align 4 - %maxheight.addr = alloca i64, align 8 - %count.addr = alloca i32, align 4 - %currKnode.addr = alloca i64*, align 8 - %offset.addr = alloca i64*, align 8 - %keys.addr = alloca i32*, align 8 - %ans.addr = alloca %struct.record*, align 8 - %time0 = alloca i64, align 8 - %time1 = alloca i64, align 8 - %time2 = alloca i64, align 8 - %time3 = alloca i64, align 8 - %time4 = alloca i64, align 8 - %time5 = alloca i64, align 8 - %time6 = alloca i64, align 8 - %numBlocks = alloca i32, align 4 - %threadsPerBlock = alloca i32, align 4 - %recordsD = alloca %struct.record*, align 8 - %knodesD = alloca %struct.knode*, align 8 - %currKnodeD = alloca i64*, align 8 - %offsetD = alloca i64*, align 8 - %keysD = alloca i32*, align 8 - %ansD = alloca %struct.record*, align 8 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp32 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp32.coerce = alloca { i64, i32 }, align 4 - store %struct.record* %records, %struct.record** %records.addr, align 8 - store i64 %records_mem, i64* %records_mem.addr, align 8 - store %struct.knode* %knodes, %struct.knode** %knodes.addr, align 8 - store i64 %knodes_elem, i64* %knodes_elem.addr, align 8 - store i64 %knodes_mem, i64* %knodes_mem.addr, align 8 - store i32 %order, i32* %order.addr, align 4 - store i64 %maxheight, i64* %maxheight.addr, align 8 - store i32 %count, i32* %count.addr, align 4 - store i64* %currKnode, i64** %currKnode.addr, align 8 - store i64* %offset, i64** %offset.addr, align 8 - store i32* %keys, i32** %keys.addr, align 8 - store %struct.record* %ans, %struct.record** %ans.addr, align 8 - %call = call i64 @get_time() - store i64 %call, i64* %time0, align 8 - %call1 = call i32 @cudaThreadSynchronize() - %0 = load i32, i32* %count.addr, align 4 - store i32 %0, i32* %numBlocks, align 4 - %1 = load i32, i32* %order.addr, align 4 - %cmp = icmp slt i32 %1, 1024 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %2 = load i32, i32* %order.addr, align 4 - br label %cond.end - -cond.false: ; preds = %entry - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %2, %cond.true ], [ 1024, %cond.false ] - store i32 %cond, i32* %threadsPerBlock, align 4 - %3 = load i32, i32* %numBlocks, align 4 - %4 = load i32, i32* %threadsPerBlock, align 4 - %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([75 x i8], [75 x i8]* @.str, i64 0, i64 0), i32 %3, i32 %4) - %call3 = call i64 @get_time() - store i64 %call3, i64* %time1, align 8 - %5 = bitcast %struct.record** %recordsD to i8** - %6 = load i64, i64* %records_mem.addr, align 8 - %call4 = call i32 @cudaMalloc(i8** %5, i64 %6) - call void @checkCUDAError(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0)) - %7 = bitcast %struct.knode** %knodesD to i8** - %8 = load i64, i64* %knodes_mem.addr, align 8 - %call5 = call i32 @cudaMalloc(i8** %7, i64 %8) - call void @checkCUDAError(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0)) - %9 = bitcast i64** %currKnodeD to i8** - %10 = load i32, i32* %count.addr, align 4 - %conv = sext i32 %10 to i64 - %mul = mul i64 %conv, 8 - %call6 = call i32 @cudaMalloc(i8** %9, i64 %mul) - call void @checkCUDAError(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0)) - %11 = bitcast i64** %offsetD to i8** - %12 = load i32, i32* %count.addr, align 4 - %conv7 = sext i32 %12 to i64 - %mul8 = mul i64 %conv7, 8 - %call9 = call i32 @cudaMalloc(i8** %11, i64 %mul8) - call void @checkCUDAError(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.3, i64 0, i64 0)) - %13 = bitcast i32** %keysD to i8** - %14 = load i32, i32* %count.addr, align 4 - %conv10 = sext i32 %14 to i64 - %mul11 = mul i64 %conv10, 4 - %call12 = call i32 @cudaMalloc(i8** %13, i64 %mul11) - call void @checkCUDAError(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.4, i64 0, i64 0)) - %15 = bitcast %struct.record** %ansD to i8** - %16 = load i32, i32* %count.addr, align 4 - %conv13 = sext i32 %16 to i64 - %mul14 = mul i64 %conv13, 4 - %call15 = call i32 @cudaMalloc(i8** %15, i64 %mul14) - call void @checkCUDAError(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.5, i64 0, i64 0)) - %call16 = call i64 @get_time() - store i64 %call16, i64* %time2, align 8 - %17 = load %struct.record*, %struct.record** %recordsD, align 8 - %18 = bitcast %struct.record* %17 to i8* - %19 = load %struct.record*, %struct.record** %records.addr, align 8 - %20 = bitcast %struct.record* %19 to i8* - %21 = load i64, i64* %records_mem.addr, align 8 - %call17 = call i32 @cudaMemcpy(i8* %18, i8* %20, i64 %21, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.6, i64 0, i64 0)) - %22 = load %struct.knode*, %struct.knode** %knodesD, align 8 - %23 = bitcast %struct.knode* %22 to i8* - %24 = load %struct.knode*, %struct.knode** %knodes.addr, align 8 - %25 = bitcast %struct.knode* %24 to i8* - %26 = load i64, i64* %knodes_mem.addr, align 8 - %call18 = call i32 @cudaMemcpy(i8* %23, i8* %25, i64 %26, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.6, i64 0, i64 0)) - %27 = load i64*, i64** %currKnodeD, align 8 - %28 = bitcast i64* %27 to i8* - %29 = load i64*, i64** %currKnode.addr, align 8 - %30 = bitcast i64* %29 to i8* - %31 = load i32, i32* %count.addr, align 4 - %conv19 = sext i32 %31 to i64 - %mul20 = mul i64 %conv19, 8 - %call21 = call i32 @cudaMemcpy(i8* %28, i8* %30, i64 %mul20, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.7, i64 0, i64 0)) - %32 = load i64*, i64** %offsetD, align 8 - %33 = bitcast i64* %32 to i8* - %34 = load i64*, i64** %offset.addr, align 8 - %35 = bitcast i64* %34 to i8* - %36 = load i32, i32* %count.addr, align 4 - %conv22 = sext i32 %36 to i64 - %mul23 = mul i64 %conv22, 8 - %call24 = call i32 @cudaMemcpy(i8* %33, i8* %35, i64 %mul23, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str.8, i64 0, i64 0)) - %37 = load i32*, i32** %keysD, align 8 - %38 = bitcast i32* %37 to i8* - %39 = load i32*, i32** %keys.addr, align 8 - %40 = bitcast i32* %39 to i8* - %41 = load i32, i32* %count.addr, align 4 - %conv25 = sext i32 %41 to i64 - %mul26 = mul i64 %conv25, 4 - %call27 = call i32 @cudaMemcpy(i8* %38, i8* %40, i64 %mul26, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.9, i64 0, i64 0)) - %42 = load %struct.record*, %struct.record** %ansD, align 8 - %43 = bitcast %struct.record* %42 to i8* - %44 = load %struct.record*, %struct.record** %ans.addr, align 8 - %45 = bitcast %struct.record* %44 to i8* - %46 = load i32, i32* %count.addr, align 4 - %conv28 = sext i32 %46 to i64 - %mul29 = mul i64 %conv28, 4 - %call30 = call i32 @cudaMemcpy(i8* %43, i8* %45, i64 %mul29, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.10, i64 0, i64 0)) - %call31 = call i64 @get_time() - store i64 %call31, i64* %time3, align 8 - %47 = load i32, i32* %numBlocks, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp, i32 %47, i32 1, i32 1) - %48 = load i32, i32* %threadsPerBlock, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp32, i32 %48, i32 1, i32 1) - %49 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %50 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %49, i8* align 4 %50, i64 12, i1 false) - %51 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %52 = load i64, i64* %51, align 4 - %53 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %54 = load i32, i32* %53, align 4 - %55 = bitcast { i64, i32 }* %agg.tmp32.coerce to i8* - %56 = bitcast %struct.dim3* %agg.tmp32 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %55, i8* align 4 %56, i64 12, i1 false) - %57 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp32.coerce, i32 0, i32 0 - %58 = load i64, i64* %57, align 4 - %59 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp32.coerce, i32 0, i32 1 - %60 = load i32, i32* %59, align 4 - %call33 = call i32 @__cudaPushCallConfiguration(i64 %52, i32 %54, i64 %58, i32 %60, i64 0, i8* null) - %tobool = icmp ne i32 %call33, 0 - br i1 %tobool, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %cond.end - %61 = load i64, i64* %maxheight.addr, align 8 - %62 = load %struct.knode*, %struct.knode** %knodesD, align 8 - %63 = load i64, i64* %knodes_elem.addr, align 8 - %64 = load %struct.record*, %struct.record** %recordsD, align 8 - %65 = load i64*, i64** %currKnodeD, align 8 - %66 = load i64*, i64** %offsetD, align 8 - %67 = load i32*, i32** %keysD, align 8 - %68 = load %struct.record*, %struct.record** %ansD, align 8 - call void @findK(i64 %61, %struct.knode* %62, i64 %63, %struct.record* %64, i64* %65, i64* %66, i32* %67, %struct.record* %68) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %cond.end - %call34 = call i32 @cudaThreadSynchronize() - call void @checkCUDAError(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.11, i64 0, i64 0)) - %call35 = call i64 @get_time() - store i64 %call35, i64* %time4, align 8 - %69 = load %struct.record*, %struct.record** %ans.addr, align 8 - %70 = bitcast %struct.record* %69 to i8* - %71 = load %struct.record*, %struct.record** %ansD, align 8 - %72 = bitcast %struct.record* %71 to i8* - %73 = load i32, i32* %count.addr, align 4 - %conv36 = sext i32 %73 to i64 - %mul37 = mul i64 %conv36, 4 - %call38 = call i32 @cudaMemcpy(i8* %70, i8* %72, i64 %mul37, i32 2) - call void @checkCUDAError(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.12, i64 0, i64 0)) - %call39 = call i64 @get_time() - store i64 %call39, i64* %time5, align 8 - %74 = load %struct.record*, %struct.record** %recordsD, align 8 - %75 = bitcast %struct.record* %74 to i8* - %call40 = call i32 @cudaFree(i8* %75) - %76 = load %struct.knode*, %struct.knode** %knodesD, align 8 - %77 = bitcast %struct.knode* %76 to i8* - %call41 = call i32 @cudaFree(i8* %77) - %78 = load i64*, i64** %currKnodeD, align 8 - %79 = bitcast i64* %78 to i8* - %call42 = call i32 @cudaFree(i8* %79) - %80 = load i64*, i64** %offsetD, align 8 - %81 = bitcast i64* %80 to i8* - %call43 = call i32 @cudaFree(i8* %81) - %82 = load i32*, i32** %keysD, align 8 - %83 = bitcast i32* %82 to i8* - %call44 = call i32 @cudaFree(i8* %83) - %84 = load %struct.record*, %struct.record** %ansD, align 8 - %85 = bitcast %struct.record* %84 to i8* - %call45 = call i32 @cudaFree(i8* %85) - %call46 = call i64 @get_time() - store i64 %call46, i64* %time6, align 8 - %call47 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @.str.13, i64 0, i64 0)) - %86 = load i64, i64* %time1, align 8 - %87 = load i64, i64* %time0, align 8 - %sub = sub nsw i64 %86, %87 - %conv48 = sitofp i64 %sub to float - %div = fdiv float %conv48, 1.000000e+06 - %conv49 = fpext float %div to double - %88 = load i64, i64* %time1, align 8 - %89 = load i64, i64* %time0, align 8 - %sub50 = sub nsw i64 %88, %89 - %conv51 = sitofp i64 %sub50 to float - %90 = load i64, i64* %time6, align 8 - %91 = load i64, i64* %time0, align 8 - %sub52 = sub nsw i64 %90, %91 - %conv53 = sitofp i64 %sub52 to float - %div54 = fdiv float %conv51, %conv53 - %mul55 = fmul contract float %div54, 1.000000e+02 - %conv56 = fpext float %mul55 to double - %call57 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([54 x i8], [54 x i8]* @.str.14, i64 0, i64 0), double %conv49, double %conv56) - %92 = load i64, i64* %time2, align 8 - %93 = load i64, i64* %time1, align 8 - %sub58 = sub nsw i64 %92, %93 - %conv59 = sitofp i64 %sub58 to float - %div60 = fdiv float %conv59, 1.000000e+06 - %conv61 = fpext float %div60 to double - %94 = load i64, i64* %time2, align 8 - %95 = load i64, i64* %time1, align 8 - %sub62 = sub nsw i64 %94, %95 - %conv63 = sitofp i64 %sub62 to float - %96 = load i64, i64* %time6, align 8 - %97 = load i64, i64* %time0, align 8 - %sub64 = sub nsw i64 %96, %97 - %conv65 = sitofp i64 %sub64 to float - %div66 = fdiv float %conv63, %conv65 - %mul67 = fmul contract float %div66, 1.000000e+02 - %conv68 = fpext float %mul67 to double - %call69 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.15, i64 0, i64 0), double %conv61, double %conv68) - %98 = load i64, i64* %time3, align 8 - %99 = load i64, i64* %time2, align 8 - %sub70 = sub nsw i64 %98, %99 - %conv71 = sitofp i64 %sub70 to float - %div72 = fdiv float %conv71, 1.000000e+06 - %conv73 = fpext float %div72 to double - %100 = load i64, i64* %time3, align 8 - %101 = load i64, i64* %time2, align 8 - %sub74 = sub nsw i64 %100, %101 - %conv75 = sitofp i64 %sub74 to float - %102 = load i64, i64* %time6, align 8 - %103 = load i64, i64* %time0, align 8 - %sub76 = sub nsw i64 %102, %103 - %conv77 = sitofp i64 %sub76 to float - %div78 = fdiv float %conv75, %conv77 - %mul79 = fmul contract float %div78, 1.000000e+02 - %conv80 = fpext float %mul79 to double - %call81 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.16, i64 0, i64 0), double %conv73, double %conv80) - %104 = load i64, i64* %time4, align 8 - %105 = load i64, i64* %time3, align 8 - %sub82 = sub nsw i64 %104, %105 - %conv83 = sitofp i64 %sub82 to float - %div84 = fdiv float %conv83, 1.000000e+06 - %conv85 = fpext float %div84 to double - %106 = load i64, i64* %time4, align 8 - %107 = load i64, i64* %time3, align 8 - %sub86 = sub nsw i64 %106, %107 - %conv87 = sitofp i64 %sub86 to float - %108 = load i64, i64* %time6, align 8 - %109 = load i64, i64* %time0, align 8 - %sub88 = sub nsw i64 %108, %109 - %conv89 = sitofp i64 %sub88 to float - %div90 = fdiv float %conv87, %conv89 - %mul91 = fmul contract float %div90, 1.000000e+02 - %conv92 = fpext float %mul91 to double - %call93 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.17, i64 0, i64 0), double %conv85, double %conv92) - %110 = load i64, i64* %time5, align 8 - %111 = load i64, i64* %time4, align 8 - %sub94 = sub nsw i64 %110, %111 - %conv95 = sitofp i64 %sub94 to float - %div96 = fdiv float %conv95, 1.000000e+06 - %conv97 = fpext float %div96 to double - %112 = load i64, i64* %time5, align 8 - %113 = load i64, i64* %time4, align 8 - %sub98 = sub nsw i64 %112, %113 - %conv99 = sitofp i64 %sub98 to float - %114 = load i64, i64* %time6, align 8 - %115 = load i64, i64* %time0, align 8 - %sub100 = sub nsw i64 %114, %115 - %conv101 = sitofp i64 %sub100 to float - %div102 = fdiv float %conv99, %conv101 - %mul103 = fmul contract float %div102, 1.000000e+02 - %conv104 = fpext float %mul103 to double - %call105 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.18, i64 0, i64 0), double %conv97, double %conv104) - %116 = load i64, i64* %time6, align 8 - %117 = load i64, i64* %time5, align 8 - %sub106 = sub nsw i64 %116, %117 - %conv107 = sitofp i64 %sub106 to float - %div108 = fdiv float %conv107, 1.000000e+06 - %conv109 = fpext float %div108 to double - %118 = load i64, i64* %time6, align 8 - %119 = load i64, i64* %time5, align 8 - %sub110 = sub nsw i64 %118, %119 - %conv111 = sitofp i64 %sub110 to float - %120 = load i64, i64* %time6, align 8 - %121 = load i64, i64* %time0, align 8 - %sub112 = sub nsw i64 %120, %121 - %conv113 = sitofp i64 %sub112 to float - %div114 = fdiv float %conv111, %conv113 - %mul115 = fmul contract float %div114, 1.000000e+02 - %conv116 = fpext float %mul115 to double - %call117 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.19, i64 0, i64 0), double %conv109, double %conv116) - %call118 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.20, i64 0, i64 0)) - %122 = load i64, i64* %time6, align 8 - %123 = load i64, i64* %time0, align 8 - %sub119 = sub nsw i64 %122, %123 - %conv120 = sitofp i64 %sub119 to float - %div121 = fdiv float %conv120, 1.000000e+06 - %conv122 = fpext float %div121 to double - %call123 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.21, i64 0, i64 0), double %conv122) - ret void -} - -declare dso_local i64 @get_time() #2 - -declare dso_local i32 @cudaThreadSynchronize() #2 - -declare dso_local i32 @printf(i8*, ...) #2 - -declare dso_local i32 @cudaMalloc(i8**, i64) #2 - -declare dso_local void @checkCUDAError(i8*) #2 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #2 - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #2 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #3 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -declare dso_local i32 @cudaFree(i8*) #2 - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i64, %struct.knode*, i64, %struct.record*, i64*, i64*, i32*, %struct.record*)* @findK to i8*), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.11, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/btree/kernel_gpu_cuda_wrapper_2-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/btree/kernel_gpu_cuda_wrapper_2-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 4247f06..0000000 --- a/examples/btree/kernel_gpu_cuda_wrapper_2-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,475 +0,0 @@ -; ModuleID = 'kernel_gpu_cuda_wrapper_2-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "kernel/kernel_gpu_cuda_wrapper_2.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } -%struct.knode = type { i32, [257 x i32], [257 x i32], i8, i32 } - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @findRangeK(i64 %height, %struct.knode* %knodesD, i64 %knodes_elem, i64* %currKnodeD, i64* %offsetD, i64* %lastKnodeD, i64* %offset_2D, i32* %startD, i32* %endD, i32* %RecstartD, i32* %ReclenD) #0 { -entry: - %height.addr = alloca i64, align 8 - %knodesD.addr = alloca %struct.knode*, align 8 - %knodes_elem.addr = alloca i64, align 8 - %currKnodeD.addr = alloca i64*, align 8 - %offsetD.addr = alloca i64*, align 8 - %lastKnodeD.addr = alloca i64*, align 8 - %offset_2D.addr = alloca i64*, align 8 - %startD.addr = alloca i32*, align 8 - %endD.addr = alloca i32*, align 8 - %RecstartD.addr = alloca i32*, align 8 - %ReclenD.addr = alloca i32*, align 8 - %thid = alloca i32, align 4 - %bid = alloca i32, align 4 - %i = alloca i32, align 4 - store i64 %height, i64* %height.addr, align 8 - store %struct.knode* %knodesD, %struct.knode** %knodesD.addr, align 8 - store i64 %knodes_elem, i64* %knodes_elem.addr, align 8 - store i64* %currKnodeD, i64** %currKnodeD.addr, align 8 - store i64* %offsetD, i64** %offsetD.addr, align 8 - store i64* %lastKnodeD, i64** %lastKnodeD.addr, align 8 - store i64* %offset_2D, i64** %offset_2D.addr, align 8 - store i32* %startD, i32** %startD.addr, align 8 - store i32* %endD, i32** %endD.addr, align 8 - store i32* %RecstartD, i32** %RecstartD.addr, align 8 - store i32* %ReclenD, i32** %ReclenD.addr, align 8 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %thid, align 4 - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - store i32 %call1, i32* %bid, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %conv = sext i32 %0 to i64 - %1 = load i64, i64* %height.addr, align 8 - %cmp = icmp slt i64 %conv, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %3 = load i64*, i64** %currKnodeD.addr, align 8 - %4 = load i32, i32* %bid, align 4 - %idxprom = sext i32 %4 to i64 - %arrayidx = getelementptr inbounds i64, i64* %3, i64 %idxprom - %5 = load i64, i64* %arrayidx, align 8 - %arrayidx2 = getelementptr inbounds %struct.knode, %struct.knode* %2, i64 %5 - %keys = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx2, i32 0, i32 2 - %6 = load i32, i32* %thid, align 4 - %idxprom3 = sext i32 %6 to i64 - %arrayidx4 = getelementptr inbounds [257 x i32], [257 x i32]* %keys, i64 0, i64 %idxprom3 - %7 = load i32, i32* %arrayidx4, align 4 - %8 = load i32*, i32** %startD.addr, align 8 - %9 = load i32, i32* %bid, align 4 - %idxprom5 = sext i32 %9 to i64 - %arrayidx6 = getelementptr inbounds i32, i32* %8, i64 %idxprom5 - %10 = load i32, i32* %arrayidx6, align 4 - %cmp7 = icmp sle i32 %7, %10 - br i1 %cmp7, label %land.lhs.true, label %if.end34 - -land.lhs.true: ; preds = %for.body - %11 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %12 = load i64*, i64** %currKnodeD.addr, align 8 - %13 = load i32, i32* %bid, align 4 - %idxprom8 = sext i32 %13 to i64 - %arrayidx9 = getelementptr inbounds i64, i64* %12, i64 %idxprom8 - %14 = load i64, i64* %arrayidx9, align 8 - %arrayidx10 = getelementptr inbounds %struct.knode, %struct.knode* %11, i64 %14 - %keys11 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx10, i32 0, i32 2 - %15 = load i32, i32* %thid, align 4 - %add = add nsw i32 %15, 1 - %idxprom12 = sext i32 %add to i64 - %arrayidx13 = getelementptr inbounds [257 x i32], [257 x i32]* %keys11, i64 0, i64 %idxprom12 - %16 = load i32, i32* %arrayidx13, align 4 - %17 = load i32*, i32** %startD.addr, align 8 - %18 = load i32, i32* %bid, align 4 - %idxprom14 = sext i32 %18 to i64 - %arrayidx15 = getelementptr inbounds i32, i32* %17, i64 %idxprom14 - %19 = load i32, i32* %arrayidx15, align 4 - %cmp16 = icmp sgt i32 %16, %19 - br i1 %cmp16, label %if.then, label %if.end34 - -if.then: ; preds = %land.lhs.true - %20 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %21 = load i64*, i64** %currKnodeD.addr, align 8 - %22 = load i32, i32* %bid, align 4 - %idxprom17 = sext i32 %22 to i64 - %arrayidx18 = getelementptr inbounds i64, i64* %21, i64 %idxprom17 - %23 = load i64, i64* %arrayidx18, align 8 - %arrayidx19 = getelementptr inbounds %struct.knode, %struct.knode* %20, i64 %23 - %indices = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx19, i32 0, i32 1 - %24 = load i32, i32* %thid, align 4 - %idxprom20 = sext i32 %24 to i64 - %arrayidx21 = getelementptr inbounds [257 x i32], [257 x i32]* %indices, i64 0, i64 %idxprom20 - %25 = load i32, i32* %arrayidx21, align 4 - %conv22 = sext i32 %25 to i64 - %26 = load i64, i64* %knodes_elem.addr, align 8 - %cmp23 = icmp slt i64 %conv22, %26 - br i1 %cmp23, label %if.then24, label %if.end - -if.then24: ; preds = %if.then - %27 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %28 = load i64*, i64** %currKnodeD.addr, align 8 - %29 = load i32, i32* %bid, align 4 - %idxprom25 = sext i32 %29 to i64 - %arrayidx26 = getelementptr inbounds i64, i64* %28, i64 %idxprom25 - %30 = load i64, i64* %arrayidx26, align 8 - %arrayidx27 = getelementptr inbounds %struct.knode, %struct.knode* %27, i64 %30 - %indices28 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx27, i32 0, i32 1 - %31 = load i32, i32* %thid, align 4 - %idxprom29 = sext i32 %31 to i64 - %arrayidx30 = getelementptr inbounds [257 x i32], [257 x i32]* %indices28, i64 0, i64 %idxprom29 - %32 = load i32, i32* %arrayidx30, align 4 - %conv31 = sext i32 %32 to i64 - %33 = load i64*, i64** %offsetD.addr, align 8 - %34 = load i32, i32* %bid, align 4 - %idxprom32 = sext i32 %34 to i64 - %arrayidx33 = getelementptr inbounds i64, i64* %33, i64 %idxprom32 - store i64 %conv31, i64* %arrayidx33, align 8 - br label %if.end - -if.end: ; preds = %if.then24, %if.then - br label %if.end34 - -if.end34: ; preds = %if.end, %land.lhs.true, %for.body - %35 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %36 = load i64*, i64** %lastKnodeD.addr, align 8 - %37 = load i32, i32* %bid, align 4 - %idxprom35 = sext i32 %37 to i64 - %arrayidx36 = getelementptr inbounds i64, i64* %36, i64 %idxprom35 - %38 = load i64, i64* %arrayidx36, align 8 - %arrayidx37 = getelementptr inbounds %struct.knode, %struct.knode* %35, i64 %38 - %keys38 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx37, i32 0, i32 2 - %39 = load i32, i32* %thid, align 4 - %idxprom39 = sext i32 %39 to i64 - %arrayidx40 = getelementptr inbounds [257 x i32], [257 x i32]* %keys38, i64 0, i64 %idxprom39 - %40 = load i32, i32* %arrayidx40, align 4 - %41 = load i32*, i32** %endD.addr, align 8 - %42 = load i32, i32* %bid, align 4 - %idxprom41 = sext i32 %42 to i64 - %arrayidx42 = getelementptr inbounds i32, i32* %41, i64 %idxprom41 - %43 = load i32, i32* %arrayidx42, align 4 - %cmp43 = icmp sle i32 %40, %43 - br i1 %cmp43, label %land.lhs.true44, label %if.end75 - -land.lhs.true44: ; preds = %if.end34 - %44 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %45 = load i64*, i64** %lastKnodeD.addr, align 8 - %46 = load i32, i32* %bid, align 4 - %idxprom45 = sext i32 %46 to i64 - %arrayidx46 = getelementptr inbounds i64, i64* %45, i64 %idxprom45 - %47 = load i64, i64* %arrayidx46, align 8 - %arrayidx47 = getelementptr inbounds %struct.knode, %struct.knode* %44, i64 %47 - %keys48 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx47, i32 0, i32 2 - %48 = load i32, i32* %thid, align 4 - %add49 = add nsw i32 %48, 1 - %idxprom50 = sext i32 %add49 to i64 - %arrayidx51 = getelementptr inbounds [257 x i32], [257 x i32]* %keys48, i64 0, i64 %idxprom50 - %49 = load i32, i32* %arrayidx51, align 4 - %50 = load i32*, i32** %endD.addr, align 8 - %51 = load i32, i32* %bid, align 4 - %idxprom52 = sext i32 %51 to i64 - %arrayidx53 = getelementptr inbounds i32, i32* %50, i64 %idxprom52 - %52 = load i32, i32* %arrayidx53, align 4 - %cmp54 = icmp sgt i32 %49, %52 - br i1 %cmp54, label %if.then55, label %if.end75 - -if.then55: ; preds = %land.lhs.true44 - %53 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %54 = load i64*, i64** %lastKnodeD.addr, align 8 - %55 = load i32, i32* %bid, align 4 - %idxprom56 = sext i32 %55 to i64 - %arrayidx57 = getelementptr inbounds i64, i64* %54, i64 %idxprom56 - %56 = load i64, i64* %arrayidx57, align 8 - %arrayidx58 = getelementptr inbounds %struct.knode, %struct.knode* %53, i64 %56 - %indices59 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx58, i32 0, i32 1 - %57 = load i32, i32* %thid, align 4 - %idxprom60 = sext i32 %57 to i64 - %arrayidx61 = getelementptr inbounds [257 x i32], [257 x i32]* %indices59, i64 0, i64 %idxprom60 - %58 = load i32, i32* %arrayidx61, align 4 - %conv62 = sext i32 %58 to i64 - %59 = load i64, i64* %knodes_elem.addr, align 8 - %cmp63 = icmp slt i64 %conv62, %59 - br i1 %cmp63, label %if.then64, label %if.end74 - -if.then64: ; preds = %if.then55 - %60 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %61 = load i64*, i64** %lastKnodeD.addr, align 8 - %62 = load i32, i32* %bid, align 4 - %idxprom65 = sext i32 %62 to i64 - %arrayidx66 = getelementptr inbounds i64, i64* %61, i64 %idxprom65 - %63 = load i64, i64* %arrayidx66, align 8 - %arrayidx67 = getelementptr inbounds %struct.knode, %struct.knode* %60, i64 %63 - %indices68 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx67, i32 0, i32 1 - %64 = load i32, i32* %thid, align 4 - %idxprom69 = sext i32 %64 to i64 - %arrayidx70 = getelementptr inbounds [257 x i32], [257 x i32]* %indices68, i64 0, i64 %idxprom69 - %65 = load i32, i32* %arrayidx70, align 4 - %conv71 = sext i32 %65 to i64 - %66 = load i64*, i64** %offset_2D.addr, align 8 - %67 = load i32, i32* %bid, align 4 - %idxprom72 = sext i32 %67 to i64 - %arrayidx73 = getelementptr inbounds i64, i64* %66, i64 %idxprom72 - store i64 %conv71, i64* %arrayidx73, align 8 - br label %if.end74 - -if.end74: ; preds = %if.then64, %if.then55 - br label %if.end75 - -if.end75: ; preds = %if.end74, %land.lhs.true44, %if.end34 - call void @llvm.nvvm.barrier0() - %68 = load i32, i32* %thid, align 4 - %cmp76 = icmp eq i32 %68, 0 - br i1 %cmp76, label %if.then77, label %if.end86 - -if.then77: ; preds = %if.end75 - %69 = load i64*, i64** %offsetD.addr, align 8 - %70 = load i32, i32* %bid, align 4 - %idxprom78 = sext i32 %70 to i64 - %arrayidx79 = getelementptr inbounds i64, i64* %69, i64 %idxprom78 - %71 = load i64, i64* %arrayidx79, align 8 - %72 = load i64*, i64** %currKnodeD.addr, align 8 - %73 = load i32, i32* %bid, align 4 - %idxprom80 = sext i32 %73 to i64 - %arrayidx81 = getelementptr inbounds i64, i64* %72, i64 %idxprom80 - store i64 %71, i64* %arrayidx81, align 8 - %74 = load i64*, i64** %offset_2D.addr, align 8 - %75 = load i32, i32* %bid, align 4 - %idxprom82 = sext i32 %75 to i64 - %arrayidx83 = getelementptr inbounds i64, i64* %74, i64 %idxprom82 - %76 = load i64, i64* %arrayidx83, align 8 - %77 = load i64*, i64** %lastKnodeD.addr, align 8 - %78 = load i32, i32* %bid, align 4 - %idxprom84 = sext i32 %78 to i64 - %arrayidx85 = getelementptr inbounds i64, i64* %77, i64 %idxprom84 - store i64 %76, i64* %arrayidx85, align 8 - br label %if.end86 - -if.end86: ; preds = %if.then77, %if.end75 - call void @llvm.nvvm.barrier0() - br label %for.inc - -for.inc: ; preds = %if.end86 - %79 = load i32, i32* %i, align 4 - %inc = add nsw i32 %79, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %80 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %81 = load i64*, i64** %currKnodeD.addr, align 8 - %82 = load i32, i32* %bid, align 4 - %idxprom87 = sext i32 %82 to i64 - %arrayidx88 = getelementptr inbounds i64, i64* %81, i64 %idxprom87 - %83 = load i64, i64* %arrayidx88, align 8 - %arrayidx89 = getelementptr inbounds %struct.knode, %struct.knode* %80, i64 %83 - %keys90 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx89, i32 0, i32 2 - %84 = load i32, i32* %thid, align 4 - %idxprom91 = sext i32 %84 to i64 - %arrayidx92 = getelementptr inbounds [257 x i32], [257 x i32]* %keys90, i64 0, i64 %idxprom91 - %85 = load i32, i32* %arrayidx92, align 4 - %86 = load i32*, i32** %startD.addr, align 8 - %87 = load i32, i32* %bid, align 4 - %idxprom93 = sext i32 %87 to i64 - %arrayidx94 = getelementptr inbounds i32, i32* %86, i64 %idxprom93 - %88 = load i32, i32* %arrayidx94, align 4 - %cmp95 = icmp eq i32 %85, %88 - br i1 %cmp95, label %if.then96, label %if.end105 - -if.then96: ; preds = %for.end - %89 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %90 = load i64*, i64** %currKnodeD.addr, align 8 - %91 = load i32, i32* %bid, align 4 - %idxprom97 = sext i32 %91 to i64 - %arrayidx98 = getelementptr inbounds i64, i64* %90, i64 %idxprom97 - %92 = load i64, i64* %arrayidx98, align 8 - %arrayidx99 = getelementptr inbounds %struct.knode, %struct.knode* %89, i64 %92 - %indices100 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx99, i32 0, i32 1 - %93 = load i32, i32* %thid, align 4 - %idxprom101 = sext i32 %93 to i64 - %arrayidx102 = getelementptr inbounds [257 x i32], [257 x i32]* %indices100, i64 0, i64 %idxprom101 - %94 = load i32, i32* %arrayidx102, align 4 - %95 = load i32*, i32** %RecstartD.addr, align 8 - %96 = load i32, i32* %bid, align 4 - %idxprom103 = sext i32 %96 to i64 - %arrayidx104 = getelementptr inbounds i32, i32* %95, i64 %idxprom103 - store i32 %94, i32* %arrayidx104, align 4 - br label %if.end105 - -if.end105: ; preds = %if.then96, %for.end - call void @llvm.nvvm.barrier0() - %97 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %98 = load i64*, i64** %lastKnodeD.addr, align 8 - %99 = load i32, i32* %bid, align 4 - %idxprom106 = sext i32 %99 to i64 - %arrayidx107 = getelementptr inbounds i64, i64* %98, i64 %idxprom106 - %100 = load i64, i64* %arrayidx107, align 8 - %arrayidx108 = getelementptr inbounds %struct.knode, %struct.knode* %97, i64 %100 - %keys109 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx108, i32 0, i32 2 - %101 = load i32, i32* %thid, align 4 - %idxprom110 = sext i32 %101 to i64 - %arrayidx111 = getelementptr inbounds [257 x i32], [257 x i32]* %keys109, i64 0, i64 %idxprom110 - %102 = load i32, i32* %arrayidx111, align 4 - %103 = load i32*, i32** %endD.addr, align 8 - %104 = load i32, i32* %bid, align 4 - %idxprom112 = sext i32 %104 to i64 - %arrayidx113 = getelementptr inbounds i32, i32* %103, i64 %idxprom112 - %105 = load i32, i32* %arrayidx113, align 4 - %cmp114 = icmp eq i32 %102, %105 - br i1 %cmp114, label %if.then115, label %if.end127 - -if.then115: ; preds = %if.end105 - %106 = load %struct.knode*, %struct.knode** %knodesD.addr, align 8 - %107 = load i64*, i64** %lastKnodeD.addr, align 8 - %108 = load i32, i32* %bid, align 4 - %idxprom116 = sext i32 %108 to i64 - %arrayidx117 = getelementptr inbounds i64, i64* %107, i64 %idxprom116 - %109 = load i64, i64* %arrayidx117, align 8 - %arrayidx118 = getelementptr inbounds %struct.knode, %struct.knode* %106, i64 %109 - %indices119 = getelementptr inbounds %struct.knode, %struct.knode* %arrayidx118, i32 0, i32 1 - %110 = load i32, i32* %thid, align 4 - %idxprom120 = sext i32 %110 to i64 - %arrayidx121 = getelementptr inbounds [257 x i32], [257 x i32]* %indices119, i64 0, i64 %idxprom120 - %111 = load i32, i32* %arrayidx121, align 4 - %112 = load i32*, i32** %RecstartD.addr, align 8 - %113 = load i32, i32* %bid, align 4 - %idxprom122 = sext i32 %113 to i64 - %arrayidx123 = getelementptr inbounds i32, i32* %112, i64 %idxprom122 - %114 = load i32, i32* %arrayidx123, align 4 - %sub = sub nsw i32 %111, %114 - %add124 = add nsw i32 %sub, 1 - %115 = load i32*, i32** %ReclenD.addr, align 8 - %116 = load i32, i32* %bid, align 4 - %idxprom125 = sext i32 %116 to i64 - %arrayidx126 = getelementptr inbounds i32, i32* %115, i64 %idxprom125 - store i32 %add124, i32* %arrayidx126, align 4 - br label %if.end127 - -if.end127: ; preds = %if.then115, %if.end105 - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: convergent nounwind -declare void @llvm.nvvm.barrier0() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #3 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind } -attributes #3 = { nounwind readnone } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !4, !6, !6, !6, !6, !7, !7, !6} -!llvm.ident = !{!8} -!nvvmir.version = !{!9} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (i64, %struct.knode*, i64, i64*, i64*, i64*, i64*, i32*, i32*, i32*, i32*)* @findRangeK, !"kernel", i32 1} -!4 = !{null, !"align", i32 8} -!5 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!6 = !{null, !"align", i32 16} -!7 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!8 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!9 = !{i32 1, i32 4} diff --git a/examples/btree/kernel_gpu_cuda_wrapper_2-host-x86_64-unknown-linux-gnu.ll b/examples/btree/kernel_gpu_cuda_wrapper_2-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index a10890a..0000000 --- a/examples/btree/kernel_gpu_cuda_wrapper_2-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,651 +0,0 @@ -; ModuleID = 'kernel_gpu_cuda_wrapper_2-host-x86_64-unknown-linux-gnu.bc' -source_filename = "kernel/kernel_gpu_cuda_wrapper_2.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct.knode = type { i32, [257 x i32], [257 x i32], i8, i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque - -$_ZN4dim3C2Ejjj = comdat any - -@.str = private unnamed_addr constant [75 x i8] c"# of blocks = %d, # of threads/block = %d (ensure that device can handle)\0A\00", align 1 -@.str.1 = private unnamed_addr constant [21 x i8] c"cudaMalloc recordsD\00", align 1 -@.str.2 = private unnamed_addr constant [23 x i8] c"cudaMalloc currKnodeD\00", align 1 -@.str.3 = private unnamed_addr constant [20 x i8] c"cudaMalloc offsetD\00", align 1 -@.str.4 = private unnamed_addr constant [23 x i8] c"cudaMalloc lastKnodeD\00", align 1 -@.str.5 = private unnamed_addr constant [22 x i8] c"cudaMalloc offset_2D\00", align 1 -@.str.6 = private unnamed_addr constant [18 x i8] c"cudaMalloc startD\00", align 1 -@.str.7 = private unnamed_addr constant [16 x i8] c"cudaMalloc endD\00", align 1 -@.str.8 = private unnamed_addr constant [21 x i8] c"cudaMalloc ansDStart\00", align 1 -@.str.9 = private unnamed_addr constant [22 x i8] c"cudaMalloc ansDLength\00", align 1 -@.str.10 = private unnamed_addr constant [27 x i8] c"cudaMalloc cudaMemcpy memD\00", align 1 -@.str.11 = private unnamed_addr constant [33 x i8] c"cudaMalloc cudaMemcpy currKnodeD\00", align 1 -@.str.12 = private unnamed_addr constant [30 x i8] c"cudaMalloc cudaMemcpy offsetD\00", align 1 -@.str.13 = private unnamed_addr constant [33 x i8] c"cudaMalloc cudaMemcpy lastKnodeD\00", align 1 -@.str.14 = private unnamed_addr constant [32 x i8] c"cudaMalloc cudaMemcpy offset_2D\00", align 1 -@.str.15 = private unnamed_addr constant [18 x i8] c"cudaMemcpy startD\00", align 1 -@.str.16 = private unnamed_addr constant [16 x i8] c"cudaMemcpy endD\00", align 1 -@.str.17 = private unnamed_addr constant [21 x i8] c"cudaMemcpy ansDStart\00", align 1 -@.str.18 = private unnamed_addr constant [22 x i8] c"cudaMemcpy ansDLength\00", align 1 -@.str.19 = private unnamed_addr constant [11 x i8] c"findRangeK\00", align 1 -@.str.20 = private unnamed_addr constant [52 x i8] c"Time spent in different stages of GPU_CUDA KERNEL:\0A\00", align 1 -@.str.21 = private unnamed_addr constant [54 x i8] c"%15.12f s, %15.12f % : GPU: SET DEVICE / DRIVER INIT\0A\00", align 1 -@.str.22 = private unnamed_addr constant [37 x i8] c"%15.12f s, %15.12f % : GPU MEM: ALO\0A\00", align 1 -@.str.23 = private unnamed_addr constant [41 x i8] c"%15.12f s, %15.12f % : GPU MEM: COPY IN\0A\00", align 1 -@.str.24 = private unnamed_addr constant [36 x i8] c"%15.12f s, %15.12f % : GPU: KERNEL\0A\00", align 1 -@.str.25 = private unnamed_addr constant [42 x i8] c"%15.12f s, %15.12f % : GPU MEM: COPY OUT\0A\00", align 1 -@.str.26 = private unnamed_addr constant [37 x i8] c"%15.12f s, %15.12f % : GPU MEM: FRE\0A\00", align 1 -@.str.27 = private unnamed_addr constant [13 x i8] c"Total time:\0A\00", align 1 -@.str.28 = private unnamed_addr constant [9 x i8] c"%.12f s\0A\00", align 1 -@0 = private constant [26033 x i8] c"P\EDU\BA\01\00\10\00\A0e\00\00\00\00\00\00\02\00\01\01@\00\00\00\C8V\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00 V\00\00\00\00\00\00\E0S\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\09\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text.findRangeK\00.nv.info.findRangeK\00.nv.shared.findRangeK\00.nv.global\00.nv.constant0.findRangeK\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00findRangeK\00.text.findRangeK\00.nv.info.findRangeK\00.nv.shared.findRangeK\00.nv.global\00threadIdx\00blockIdx\00.nv.constant0.findRangeK\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00=\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00x\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\83\00\00\00\01\00\08\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\8D\00\00\00\01\00\08\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\96\00\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00\00O\00\00\00\00\00\00\04/\08\00\06\00\00\00\1C\00\00\00\04#\08\00\06\00\00\00\00\00\00\00\04\12\08\00\06\00\00\00h\00\00\00\04\11\08\00\06\00\00\00h\00\00\00\010\00\00\01*\00\00\04\0A\08\00\05\00\00\00@\01X\00\03\19X\00\04\17\0C\00\00\00\00\00\0A\00P\00\00\F0!\00\04\17\0C\00\00\00\00\00\09\00H\00\00\F0!\00\04\17\0C\00\00\00\00\00\08\00@\00\00\F0!\00\04\17\0C\00\00\00\00\00\07\008\00\00\F0!\00\04\17\0C\00\00\00\00\00\06\000\00\00\F0!\00\04\17\0C\00\00\00\00\00\05\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\B8\0A\00\00\04\1C\04\00\D0N\00\00\04\1E\04\00p\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveB3\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F6\0Avisible .entry findRangeK\93\04\00\8D\00\06\18\00\0Eg\04\0F \00\02\1F1 \00\0C\1F2 \00\0C\1F3 \00\0C\1F4 \00\0C\1F5 \00\0C\1F6 \00\0C\1F7 \00\0C\1F8 \00\0C\1F9 \01\0D\0FH\0B\14_6[104y\04\15\BCpred %p<11>]\0B\1E8^\0B\1F1\8F\04\0D\1F6`\0B\1A\00>\03\0F\E7\00\00\0Fh\03\01\1F0+\00\01\1F9*\00\00\1F9)\00\01\1F8)\00\00\1F8)\00\01\1F7)\00\00\1F7)\00\01\1F6)\00\00\1F6)\00\01\1F5)\00\00\1E5)\00\0F]\05\03\1F4)\00\01\1F3\86\05\04\0E1\03\0F\06\05\04\0Es\01\0F\DA\04\04\0E\C3\03\13]\E0\01#to\B7\14\05/\00\122\B8\04\04\B8\11\0B\1E\00\123\1E\00\1F2?\00\06\124!\00\1F0?\00\03\125\1E\00\1F4?\00\06\116!\00\1F9>\00\03\127>\00\1F6>\00\06\118!\00\1F8>\00\03\129>\00\1F8>\00\05!20!\00\1F7>\00\02\2221\B9\05\1F0>\00\06\028\01\1F6>\00\03\027\01\1F27\01\06\1227\01\1F5>\00\03\026\01\1F26\01\06\1226\01\1F4>\00\03\026\01\1F26\01\06$28\94\06\0F>\00\01\026\01/28\DE\06\19\1A9\17\00)16\C9\06\0C\E0\06*27\18\00\03\E1\06:d25\18\00\134u\00\1B2H\00\144w\00\0B\8E\00\135w\00\1B1\8F\00\136x\00\1B1x\00\137x\00\1B1x\00\148\ED\00\1B3\18\10k%tid.x\D7\11\05\D8\11\09t\0CN%cta-\00\1F9\CB\07\03\1B0&\00\146\BB\07\F2\01bra.uni LBB6_1;\0A\08\00\10:\F3\03\11s=\00Ed30,5\00\01\0B\04\04W\04$1,\B9\01\B0;\0Asetp.ge.s\1C\004p1,9\00\01(\00\A3;\0A@%p1 brag\00\1B6x\00\132x\00'2:`\00\149x\00\198w\00591,\EC\01\17;\A7\00592,\02\01T;\0Ashl\9A\06493, \00\833;\0Aadd.s\19\00$4,Q\00\01'\00\09f\00\00\87\05\01$\00\95];\0Amul.lo7\00$6,\22\00J2068S\00$7,\BB\00\01*\00\09\A1\00%8,\D0\01\0A\A1\00$9, \00\192N\00H100,V\00\129\A2\00\02,\02\123\A1\00\89100+1032$\01E101,\B1\02\09l\005102\0E\01\0Cm\00%3,<\00\02*\00\08o\00\144o\00\153\EE\01\12t\DA\003p4,\90\00\00'\00\01\EC\01\164\EC\01\1B7\EB\01\133\EB\01)3:\B0\00\1F4\EC\01\02?105\ED\01\03?106\EE\01\04E107,\22\00\1B3\E2\00%8,V\00\02+\00\0Am\00\149\E4\00\1D8\F5\01\101e\08\04%\00\0D\F7\01\101\AE\08\04\C8\00\02.\00\08<\01\185\F9\01$ad\C6\00\02\99\06\00\1E\00\02\19\07\00N\00\05\E1\00\024\07*16\DD\00\04,\07\1E1\BF\01\04&\07\04\8B\00(13\83\00\137\DB\00/14.\02\00/15.\02\05&16M\01\0Cp\00%7,=\00\02+\00\08p\00\148p\00\147/\02#le/\02#5,\91\00\00'\00\01/\02\1F5/\02\07\134/\02\194/\02/18/\02\03/19/\02\04/20/\02\05\03\85\07\01\22\00\0B/\02\03\7F\07\141\E6\07)21m\00\142\82\03.22/\02\03}\07\01%\00\0E/\02\03~\07#11\91\02*24\AE\00\1F6*\04\04\131y\07.12\90\01\03s\07\05\\\00*27T\00\04\DD\02J28+46\01\05\E1\05-16\CA\05%6,>\00\02+\00\01\B1\01\166\B1\01\0C\CB\05\135\B1\01\195\B1\01/31\B1\01\03/32\B1\01\04/33\B1\01\05534,\22\00\0B\B1\01535,V\00\02+\00\0Am\00\136\03\01.35\B1\01537,%\00\0E\B1\01538,\C8\00\02.\00\0A\AE\00\1F9\B1\01\05540,\22\00\0B\B1\01941,\\\00)40T\00#42\AE\00,41\B1\01543,\D7\08\0BT\00%4,\22\00\04\02\01\05x\08\01 \00\03x\08+42\B8\01\136\B8\01*6:\18\00\137\18\00\197\D0\01/45\D0\01\03546,Y\09\0A\CE\00\1F7\D0\01\05\144\FF\00\1D7\D0\01549,V\00\02+\00\09m\00$50\22\01\1E9\D0\01551,%\00\0E\D0\01552,\C8\00\02.\00\09\AE\00/53\D0\01\05554,\22\00\0B\D0\01955,\\\00(54\11\05\04\7F\03/55\81\05\00556,H\0A\0Bp\00\167\1E\01\0Cp\00%8,=\00\02+\00\07p\00$20p\00\1D8\B0\07#7,\91\00\00'\00\01\D0\03\177\9C\09\0C\19\02\138\01\02\198\01\02/59\01\02\03/60\01\02\04/61\01\02\05562,\22\00\0B\01\02563,V\00\02+\00\0Am\00\04\95\08.63\01\02565,%\00\0E\01\02566,\C8\00\02.\00\08=\01\1F1\B1\07\03\02\11\06\00\1E\00\0F\B1\07\00\116W\01+22\DD\00\03\A5\01\1E6\C0\01969,\8B\00(68\83\00\04]\06/690\02\00/700\02\05&71M\01\0Bp\00572,=\00\02+\00\08p\00\04K\01-72\B1\07#8,\91\00\00'\00\010\02\1F80\02\08\1390\02\1990\02/730\02\03/740\02\04/750\02\05576,\22\00\0B0\02577,V\00\02+\00\0Am\00\04\96\08.770\02579,%\00\0E0\02580,\C8\00\02.\00\09\AE\00/811\04\05\138v\01\1E8\91\01983,\\\00*82T\00\04\93\01,83\01\06/85\B2\07\07%9,>\00\02+\00\01\B2\01\179\B2\01\1C1\F6\0D\140\F7\0D\190\B4\01/86\B4\01\03/87\B4\01\04/88\B4\01\05\148\96\00\1D8\B4\01\03,\0D#18\BC\01)89m\00#91\06\01.90\B4\01\00\C6\0C\04%\00\0E\B4\01\03\F9\0D#181\02\1A9`\01/94\B4\01\05\00\DB\0D\04\22\00\0B\B4\01\03\FD\0D\05\\\00*95T\00\04h\0B,96\B4\01\05\FF\0D;48]T\00%9,\22\00\04\02\01\08\B5\07$99u\10\0C\1E\0B\151\B1\0F\1B1\B7\07$12\1A\00\D82:\0Abar.sync 0\F2\03\09\96\0C\01D\02\14n\F2\03\02\ED\0C\00\22\00\02 \06'10?\02\1C4\85\00\04\D3\0D\191\D4\0D8200\A6\08\06'\02/20\0B\06\05\132\9D\0E-20\0B\06\132\9E\0E\132\13\0F\142\9E\0E\04m\00\03-\03\112\A0\0E\08\19\00\09B\0E\09R\00%6,\22\00\04R\00\07w\01#20\EC\11:204R\00\187\C9\01\08\D8\00\1F8\D8\00\06%9,\22\00\0B\D8\00\03X\0E\132\B7\0E)20\FF\023211\D8\00\04\FA\16\05\19\00\182\85\03\09R\00%3,\22\00\04R\00\08\D8\00$13\DC\12\0D\09\04\04n\0D*145\02\0A&\00\04\E3\0B715:O\02\186&\12\074\07#7,\1E\00\1F1\83\12\02/27\84\12\05\186N\02/32\8C\04\02/33@\0C\03/34\8B\01\04\03#\0C\00 \00\0A\89\01\00\F8\0B\03Q\00\01'\00\09f\00\03\D6\03-36\83\04\03\BD\0B\00\22\00\0D\81\04\00\F4\0B\03\BB\00\01*\00\08\A1\00/40}\04\04\03\16\0C\00 \00\0A{\04\00\10\0C\06U\00\184\BD\07\03\FC\02.42+\08/43\DB\0F\04%44\09\01\0Bh\00$5,9\00\01'\00\07h\00\03\14\13-451\04\222,\87\00\22%ro\06\1720\04\1C8\E1\01\04S\0C\191T\0C\1F4n\06\03/47\E2\01\03\1F4m\03\05\035\0C\1D4k\03\00\0A\0C\03Q\00\01'\00\08f\00\135g\03\1E5e\06\03\CF\0B\1F5c\06\01\00\06\0C\03\BB\00\01*\00\08\A1\00\1F5_\06\05\03(\0C\1D5z\01\03\C9\0B\03U\00\185\92\0A\03\BA\0E\1C5V\06\00\1F\0C\04i\16\0Ae\00\158\06\01\0Be\00$9,9\00\01'\00\07\95\03\00\1D\00\01\94\03\0C\D4\0F$18\B2\01\1E8M\06\03\DF\1A\0F\AD\15\03/61+\08\03\1F6\AD\15\05\03\F2\0B\1D6\AD\15\00\C7\0B\03Q\00\01'\00\09f\00\03\99\02\1E6\AD\15\03]\0B\1F6\AD\15\01\02\B2\0B\13d]\0C\196\AD\15\1F6\AD\15\05\03\B6\0B\1D6Y\01\03W\0B\04U\00\08\AC\15\03?\04\1F7\AA\15\00/71\CA\0B\04%72\09\01\0Bh\00$3,9\00\01'\00\07h\00\03\DF\0A-73\A0\03\223,\87\00\22%rr\13\163\A0\03\1D2\E0\05\04\C2\0B\191\C3\0B\1F7\A3\15\03/75\E2\01\03\1F7\A1\15\05\03\A4\0B\1D7\9F\15\00y\0B\03Q\00\01'\00\09f\00\03?\0F\1E7\9A\15\03>\0B\1F7\98\15\01\00u\0B\03\BB\00\01*\00\08\A1\00/82\E2\01\04\03\97\0B\1D8z\01\03S\0B\03U\00\188`\15\03\9F\00+84\A0\03/85\A0\03\04%86\06\01\0Be\00$7,9\00\01'\00\08D\10\03\D4\0F\118T\15$ubG\16\221,\82\00\00\22\00\09`\16#2,\1F\00\09\B9\14\05\A6\0B\1B8:\08\03\A5\0B%88{\00\07\1B\04\128\1B\04\0D\EF\0D\142=\0C\C020:\0Aret;\0A\0A}\0A\00\00\00\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([26033 x i8], [26033 x i8]* @0, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline optnone uwtable -define dso_local void @findRangeK(i64 %height, %struct.knode* %knodesD, i64 %knodes_elem, i64* %currKnodeD, i64* %offsetD, i64* %lastKnodeD, i64* %offset_2D, i32* %startD, i32* %endD, i32* %RecstartD, i32* %ReclenD) #0 { -entry: - %height.addr = alloca i64, align 8 - %knodesD.addr = alloca %struct.knode*, align 8 - %knodes_elem.addr = alloca i64, align 8 - %currKnodeD.addr = alloca i64*, align 8 - %offsetD.addr = alloca i64*, align 8 - %lastKnodeD.addr = alloca i64*, align 8 - %offset_2D.addr = alloca i64*, align 8 - %startD.addr = alloca i32*, align 8 - %endD.addr = alloca i32*, align 8 - %RecstartD.addr = alloca i32*, align 8 - %ReclenD.addr = alloca i32*, align 8 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i64 %height, i64* %height.addr, align 8 - store %struct.knode* %knodesD, %struct.knode** %knodesD.addr, align 8 - store i64 %knodes_elem, i64* %knodes_elem.addr, align 8 - store i64* %currKnodeD, i64** %currKnodeD.addr, align 8 - store i64* %offsetD, i64** %offsetD.addr, align 8 - store i64* %lastKnodeD, i64** %lastKnodeD.addr, align 8 - store i64* %offset_2D, i64** %offset_2D.addr, align 8 - store i32* %startD, i32** %startD.addr, align 8 - store i32* %endD, i32** %endD.addr, align 8 - store i32* %RecstartD, i32** %RecstartD.addr, align 8 - store i32* %ReclenD, i32** %ReclenD.addr, align 8 - %kernel_args = alloca i8*, i64 11, align 16 - %0 = bitcast i64* %height.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast %struct.knode** %knodesD.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i64* %knodes_elem.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i64** %currKnodeD.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i64** %offsetD.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i64** %lastKnodeD.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = bitcast i64** %offset_2D.addr to i8* - %13 = getelementptr i8*, i8** %kernel_args, i32 6 - store i8* %12, i8** %13 - %14 = bitcast i32** %startD.addr to i8* - %15 = getelementptr i8*, i8** %kernel_args, i32 7 - store i8* %14, i8** %15 - %16 = bitcast i32** %endD.addr to i8* - %17 = getelementptr i8*, i8** %kernel_args, i32 8 - store i8* %16, i8** %17 - %18 = bitcast i32** %RecstartD.addr to i8* - %19 = getelementptr i8*, i8** %kernel_args, i32 9 - store i8* %18, i8** %19 - %20 = bitcast i32** %ReclenD.addr to i8* - %21 = getelementptr i8*, i8** %kernel_args, i32 10 - store i8* %20, i8** %21 - %22 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %23 = load i64, i64* %shmem_size, align 8 - %24 = load i8*, i8** %stream, align 8 - %25 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %26 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %25, i8* align 8 %26, i64 12, i1 false) - %27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %28 = load i64, i64* %27, align 8 - %29 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %30 = load i32, i32* %29, align 8 - %31 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %32 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %31, i8* align 8 %32, i64 12, i1 false) - %33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %34 = load i64, i64* %33, align 8 - %35 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %36 = load i32, i32* %35, align 8 - %37 = bitcast i8* %24 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i64, %struct.knode*, i64, i64*, i64*, i64*, i64*, i32*, i32*, i32*, i32*)* @findRangeK to i8*), i64 %28, i32 %30, i64 %34, i32 %36, i8** %kernel_args, i64 %23, %struct.CUstream_st* %37) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @kernel_gpu_cuda_wrapper_2(%struct.knode* %knodes, i64 %knodes_elem, i64 %knodes_mem, i32 %order, i64 %maxheight, i32 %count, i64* %currKnode, i64* %offset, i64* %lastKnode, i64* %offset_2, i32* %start, i32* %end, i32* %recstart, i32* %reclength) #0 { -entry: - %knodes.addr = alloca %struct.knode*, align 8 - %knodes_elem.addr = alloca i64, align 8 - %knodes_mem.addr = alloca i64, align 8 - %order.addr = alloca i32, align 4 - %maxheight.addr = alloca i64, align 8 - %count.addr = alloca i32, align 4 - %currKnode.addr = alloca i64*, align 8 - %offset.addr = alloca i64*, align 8 - %lastKnode.addr = alloca i64*, align 8 - %offset_2.addr = alloca i64*, align 8 - %start.addr = alloca i32*, align 8 - %end.addr = alloca i32*, align 8 - %recstart.addr = alloca i32*, align 8 - %reclength.addr = alloca i32*, align 8 - %time0 = alloca i64, align 8 - %time1 = alloca i64, align 8 - %time2 = alloca i64, align 8 - %time3 = alloca i64, align 8 - %time4 = alloca i64, align 8 - %time5 = alloca i64, align 8 - %time6 = alloca i64, align 8 - %numBlocks = alloca i32, align 4 - %threadsPerBlock = alloca i32, align 4 - %knodesD = alloca %struct.knode*, align 8 - %currKnodeD = alloca i64*, align 8 - %offsetD = alloca i64*, align 8 - %lastKnodeD = alloca i64*, align 8 - %offset_2D = alloca i64*, align 8 - %startD = alloca i32*, align 8 - %endD = alloca i32*, align 8 - %ansDStart = alloca i32*, align 8 - %ansDLength = alloca i32*, align 8 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp54 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp54.coerce = alloca { i64, i32 }, align 4 - store %struct.knode* %knodes, %struct.knode** %knodes.addr, align 8 - store i64 %knodes_elem, i64* %knodes_elem.addr, align 8 - store i64 %knodes_mem, i64* %knodes_mem.addr, align 8 - store i32 %order, i32* %order.addr, align 4 - store i64 %maxheight, i64* %maxheight.addr, align 8 - store i32 %count, i32* %count.addr, align 4 - store i64* %currKnode, i64** %currKnode.addr, align 8 - store i64* %offset, i64** %offset.addr, align 8 - store i64* %lastKnode, i64** %lastKnode.addr, align 8 - store i64* %offset_2, i64** %offset_2.addr, align 8 - store i32* %start, i32** %start.addr, align 8 - store i32* %end, i32** %end.addr, align 8 - store i32* %recstart, i32** %recstart.addr, align 8 - store i32* %reclength, i32** %reclength.addr, align 8 - %call = call i64 @get_time() - store i64 %call, i64* %time0, align 8 - %call1 = call i32 @cudaThreadSynchronize() - %0 = load i32, i32* %count.addr, align 4 - store i32 %0, i32* %numBlocks, align 4 - %1 = load i32, i32* %order.addr, align 4 - %cmp = icmp slt i32 %1, 1024 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %2 = load i32, i32* %order.addr, align 4 - br label %cond.end - -cond.false: ; preds = %entry - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %2, %cond.true ], [ 1024, %cond.false ] - store i32 %cond, i32* %threadsPerBlock, align 4 - %3 = load i32, i32* %numBlocks, align 4 - %4 = load i32, i32* %threadsPerBlock, align 4 - %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([75 x i8], [75 x i8]* @.str, i64 0, i64 0), i32 %3, i32 %4) - %call3 = call i64 @get_time() - store i64 %call3, i64* %time1, align 8 - %5 = bitcast %struct.knode** %knodesD to i8** - %6 = load i64, i64* %knodes_mem.addr, align 8 - %call4 = call i32 @cudaMalloc(i8** %5, i64 %6) - call void @checkCUDAError(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.1, i64 0, i64 0)) - %7 = bitcast i64** %currKnodeD to i8** - %8 = load i32, i32* %count.addr, align 4 - %conv = sext i32 %8 to i64 - %mul = mul i64 %conv, 8 - %call5 = call i32 @cudaMalloc(i8** %7, i64 %mul) - call void @checkCUDAError(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0)) - %9 = bitcast i64** %offsetD to i8** - %10 = load i32, i32* %count.addr, align 4 - %conv6 = sext i32 %10 to i64 - %mul7 = mul i64 %conv6, 8 - %call8 = call i32 @cudaMalloc(i8** %9, i64 %mul7) - call void @checkCUDAError(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.3, i64 0, i64 0)) - %11 = bitcast i64** %lastKnodeD to i8** - %12 = load i32, i32* %count.addr, align 4 - %conv9 = sext i32 %12 to i64 - %mul10 = mul i64 %conv9, 8 - %call11 = call i32 @cudaMalloc(i8** %11, i64 %mul10) - call void @checkCUDAError(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.4, i64 0, i64 0)) - %13 = bitcast i64** %offset_2D to i8** - %14 = load i32, i32* %count.addr, align 4 - %conv12 = sext i32 %14 to i64 - %mul13 = mul i64 %conv12, 8 - %call14 = call i32 @cudaMalloc(i8** %13, i64 %mul13) - call void @checkCUDAError(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.5, i64 0, i64 0)) - %15 = bitcast i32** %startD to i8** - %16 = load i32, i32* %count.addr, align 4 - %conv15 = sext i32 %16 to i64 - %mul16 = mul i64 %conv15, 4 - %call17 = call i32 @cudaMalloc(i8** %15, i64 %mul16) - call void @checkCUDAError(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.6, i64 0, i64 0)) - %17 = bitcast i32** %endD to i8** - %18 = load i32, i32* %count.addr, align 4 - %conv18 = sext i32 %18 to i64 - %mul19 = mul i64 %conv18, 4 - %call20 = call i32 @cudaMalloc(i8** %17, i64 %mul19) - call void @checkCUDAError(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0)) - %19 = bitcast i32** %ansDStart to i8** - %20 = load i32, i32* %count.addr, align 4 - %conv21 = sext i32 %20 to i64 - %mul22 = mul i64 %conv21, 4 - %call23 = call i32 @cudaMalloc(i8** %19, i64 %mul22) - call void @checkCUDAError(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.8, i64 0, i64 0)) - %21 = bitcast i32** %ansDLength to i8** - %22 = load i32, i32* %count.addr, align 4 - %conv24 = sext i32 %22 to i64 - %mul25 = mul i64 %conv24, 4 - %call26 = call i32 @cudaMalloc(i8** %21, i64 %mul25) - call void @checkCUDAError(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.9, i64 0, i64 0)) - %call27 = call i64 @get_time() - store i64 %call27, i64* %time2, align 8 - %23 = load %struct.knode*, %struct.knode** %knodesD, align 8 - %24 = bitcast %struct.knode* %23 to i8* - %25 = load %struct.knode*, %struct.knode** %knodes.addr, align 8 - %26 = bitcast %struct.knode* %25 to i8* - %27 = load i64, i64* %knodes_mem.addr, align 8 - %call28 = call i32 @cudaMemcpy(i8* %24, i8* %26, i64 %27, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.10, i64 0, i64 0)) - %28 = load i64*, i64** %currKnodeD, align 8 - %29 = bitcast i64* %28 to i8* - %30 = load i64*, i64** %currKnode.addr, align 8 - %31 = bitcast i64* %30 to i8* - %32 = load i32, i32* %count.addr, align 4 - %conv29 = sext i32 %32 to i64 - %mul30 = mul i64 %conv29, 8 - %call31 = call i32 @cudaMemcpy(i8* %29, i8* %31, i64 %mul30, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.11, i64 0, i64 0)) - %33 = load i64*, i64** %offsetD, align 8 - %34 = bitcast i64* %33 to i8* - %35 = load i64*, i64** %offset.addr, align 8 - %36 = bitcast i64* %35 to i8* - %37 = load i32, i32* %count.addr, align 4 - %conv32 = sext i32 %37 to i64 - %mul33 = mul i64 %conv32, 8 - %call34 = call i32 @cudaMemcpy(i8* %34, i8* %36, i64 %mul33, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str.12, i64 0, i64 0)) - %38 = load i64*, i64** %lastKnodeD, align 8 - %39 = bitcast i64* %38 to i8* - %40 = load i64*, i64** %lastKnode.addr, align 8 - %41 = bitcast i64* %40 to i8* - %42 = load i32, i32* %count.addr, align 4 - %conv35 = sext i32 %42 to i64 - %mul36 = mul i64 %conv35, 8 - %call37 = call i32 @cudaMemcpy(i8* %39, i8* %41, i64 %mul36, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.13, i64 0, i64 0)) - %43 = load i64*, i64** %offset_2D, align 8 - %44 = bitcast i64* %43 to i8* - %45 = load i64*, i64** %offset_2.addr, align 8 - %46 = bitcast i64* %45 to i8* - %47 = load i32, i32* %count.addr, align 4 - %conv38 = sext i32 %47 to i64 - %mul39 = mul i64 %conv38, 8 - %call40 = call i32 @cudaMemcpy(i8* %44, i8* %46, i64 %mul39, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str.14, i64 0, i64 0)) - %48 = load i32*, i32** %startD, align 8 - %49 = bitcast i32* %48 to i8* - %50 = load i32*, i32** %start.addr, align 8 - %51 = bitcast i32* %50 to i8* - %52 = load i32, i32* %count.addr, align 4 - %conv41 = sext i32 %52 to i64 - %mul42 = mul i64 %conv41, 4 - %call43 = call i32 @cudaMemcpy(i8* %49, i8* %51, i64 %mul42, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.15, i64 0, i64 0)) - %53 = load i32*, i32** %endD, align 8 - %54 = bitcast i32* %53 to i8* - %55 = load i32*, i32** %end.addr, align 8 - %56 = bitcast i32* %55 to i8* - %57 = load i32, i32* %count.addr, align 4 - %conv44 = sext i32 %57 to i64 - %mul45 = mul i64 %conv44, 4 - %call46 = call i32 @cudaMemcpy(i8* %54, i8* %56, i64 %mul45, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.16, i64 0, i64 0)) - %58 = load i32*, i32** %ansDStart, align 8 - %59 = bitcast i32* %58 to i8* - %60 = load i32*, i32** %recstart.addr, align 8 - %61 = bitcast i32* %60 to i8* - %62 = load i32, i32* %count.addr, align 4 - %conv47 = sext i32 %62 to i64 - %mul48 = mul i64 %conv47, 4 - %call49 = call i32 @cudaMemcpy(i8* %59, i8* %61, i64 %mul48, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.17, i64 0, i64 0)) - %63 = load i32*, i32** %ansDLength, align 8 - %64 = bitcast i32* %63 to i8* - %65 = load i32*, i32** %reclength.addr, align 8 - %66 = bitcast i32* %65 to i8* - %67 = load i32, i32* %count.addr, align 4 - %conv50 = sext i32 %67 to i64 - %mul51 = mul i64 %conv50, 4 - %call52 = call i32 @cudaMemcpy(i8* %64, i8* %66, i64 %mul51, i32 1) - call void @checkCUDAError(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.18, i64 0, i64 0)) - %call53 = call i64 @get_time() - store i64 %call53, i64* %time3, align 8 - %68 = load i32, i32* %numBlocks, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp, i32 %68, i32 1, i32 1) - %69 = load i32, i32* %threadsPerBlock, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp54, i32 %69, i32 1, i32 1) - %70 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %71 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %70, i8* align 4 %71, i64 12, i1 false) - %72 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %73 = load i64, i64* %72, align 4 - %74 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %75 = load i32, i32* %74, align 4 - %76 = bitcast { i64, i32 }* %agg.tmp54.coerce to i8* - %77 = bitcast %struct.dim3* %agg.tmp54 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %76, i8* align 4 %77, i64 12, i1 false) - %78 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp54.coerce, i32 0, i32 0 - %79 = load i64, i64* %78, align 4 - %80 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp54.coerce, i32 0, i32 1 - %81 = load i32, i32* %80, align 4 - %call55 = call i32 @__cudaPushCallConfiguration(i64 %73, i32 %75, i64 %79, i32 %81, i64 0, i8* null) - %tobool = icmp ne i32 %call55, 0 - br i1 %tobool, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %cond.end - %82 = load i64, i64* %maxheight.addr, align 8 - %83 = load %struct.knode*, %struct.knode** %knodesD, align 8 - %84 = load i64, i64* %knodes_elem.addr, align 8 - %85 = load i64*, i64** %currKnodeD, align 8 - %86 = load i64*, i64** %offsetD, align 8 - %87 = load i64*, i64** %lastKnodeD, align 8 - %88 = load i64*, i64** %offset_2D, align 8 - %89 = load i32*, i32** %startD, align 8 - %90 = load i32*, i32** %endD, align 8 - %91 = load i32*, i32** %ansDStart, align 8 - %92 = load i32*, i32** %ansDLength, align 8 - call void @findRangeK(i64 %82, %struct.knode* %83, i64 %84, i64* %85, i64* %86, i64* %87, i64* %88, i32* %89, i32* %90, i32* %91, i32* %92) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %cond.end - %call56 = call i32 @cudaThreadSynchronize() - call void @checkCUDAError(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.19, i64 0, i64 0)) - %call57 = call i64 @get_time() - store i64 %call57, i64* %time4, align 8 - %93 = load i32*, i32** %recstart.addr, align 8 - %94 = bitcast i32* %93 to i8* - %95 = load i32*, i32** %ansDStart, align 8 - %96 = bitcast i32* %95 to i8* - %97 = load i32, i32* %count.addr, align 4 - %conv58 = sext i32 %97 to i64 - %mul59 = mul i64 %conv58, 4 - %call60 = call i32 @cudaMemcpy(i8* %94, i8* %96, i64 %mul59, i32 2) - call void @checkCUDAError(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.17, i64 0, i64 0)) - %98 = load i32*, i32** %reclength.addr, align 8 - %99 = bitcast i32* %98 to i8* - %100 = load i32*, i32** %ansDLength, align 8 - %101 = bitcast i32* %100 to i8* - %102 = load i32, i32* %count.addr, align 4 - %conv61 = sext i32 %102 to i64 - %mul62 = mul i64 %conv61, 4 - %call63 = call i32 @cudaMemcpy(i8* %99, i8* %101, i64 %mul62, i32 2) - call void @checkCUDAError(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.18, i64 0, i64 0)) - %call64 = call i64 @get_time() - store i64 %call64, i64* %time5, align 8 - %103 = load %struct.knode*, %struct.knode** %knodesD, align 8 - %104 = bitcast %struct.knode* %103 to i8* - %call65 = call i32 @cudaFree(i8* %104) - %105 = load i64*, i64** %currKnodeD, align 8 - %106 = bitcast i64* %105 to i8* - %call66 = call i32 @cudaFree(i8* %106) - %107 = load i64*, i64** %offsetD, align 8 - %108 = bitcast i64* %107 to i8* - %call67 = call i32 @cudaFree(i8* %108) - %109 = load i64*, i64** %lastKnodeD, align 8 - %110 = bitcast i64* %109 to i8* - %call68 = call i32 @cudaFree(i8* %110) - %111 = load i64*, i64** %offset_2D, align 8 - %112 = bitcast i64* %111 to i8* - %call69 = call i32 @cudaFree(i8* %112) - %113 = load i32*, i32** %startD, align 8 - %114 = bitcast i32* %113 to i8* - %call70 = call i32 @cudaFree(i8* %114) - %115 = load i32*, i32** %endD, align 8 - %116 = bitcast i32* %115 to i8* - %call71 = call i32 @cudaFree(i8* %116) - %117 = load i32*, i32** %ansDStart, align 8 - %118 = bitcast i32* %117 to i8* - %call72 = call i32 @cudaFree(i8* %118) - %119 = load i32*, i32** %ansDLength, align 8 - %120 = bitcast i32* %119 to i8* - %call73 = call i32 @cudaFree(i8* %120) - %call74 = call i64 @get_time() - store i64 %call74, i64* %time6, align 8 - %call75 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @.str.20, i64 0, i64 0)) - %121 = load i64, i64* %time1, align 8 - %122 = load i64, i64* %time0, align 8 - %sub = sub nsw i64 %121, %122 - %conv76 = sitofp i64 %sub to float - %div = fdiv float %conv76, 1.000000e+06 - %conv77 = fpext float %div to double - %123 = load i64, i64* %time1, align 8 - %124 = load i64, i64* %time0, align 8 - %sub78 = sub nsw i64 %123, %124 - %conv79 = sitofp i64 %sub78 to float - %125 = load i64, i64* %time6, align 8 - %126 = load i64, i64* %time0, align 8 - %sub80 = sub nsw i64 %125, %126 - %conv81 = sitofp i64 %sub80 to float - %div82 = fdiv float %conv79, %conv81 - %mul83 = fmul contract float %div82, 1.000000e+02 - %conv84 = fpext float %mul83 to double - %call85 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([54 x i8], [54 x i8]* @.str.21, i64 0, i64 0), double %conv77, double %conv84) - %127 = load i64, i64* %time2, align 8 - %128 = load i64, i64* %time1, align 8 - %sub86 = sub nsw i64 %127, %128 - %conv87 = sitofp i64 %sub86 to float - %div88 = fdiv float %conv87, 1.000000e+06 - %conv89 = fpext float %div88 to double - %129 = load i64, i64* %time2, align 8 - %130 = load i64, i64* %time1, align 8 - %sub90 = sub nsw i64 %129, %130 - %conv91 = sitofp i64 %sub90 to float - %131 = load i64, i64* %time6, align 8 - %132 = load i64, i64* %time0, align 8 - %sub92 = sub nsw i64 %131, %132 - %conv93 = sitofp i64 %sub92 to float - %div94 = fdiv float %conv91, %conv93 - %mul95 = fmul contract float %div94, 1.000000e+02 - %conv96 = fpext float %mul95 to double - %call97 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.22, i64 0, i64 0), double %conv89, double %conv96) - %133 = load i64, i64* %time3, align 8 - %134 = load i64, i64* %time2, align 8 - %sub98 = sub nsw i64 %133, %134 - %conv99 = sitofp i64 %sub98 to float - %div100 = fdiv float %conv99, 1.000000e+06 - %conv101 = fpext float %div100 to double - %135 = load i64, i64* %time3, align 8 - %136 = load i64, i64* %time2, align 8 - %sub102 = sub nsw i64 %135, %136 - %conv103 = sitofp i64 %sub102 to float - %137 = load i64, i64* %time6, align 8 - %138 = load i64, i64* %time0, align 8 - %sub104 = sub nsw i64 %137, %138 - %conv105 = sitofp i64 %sub104 to float - %div106 = fdiv float %conv103, %conv105 - %mul107 = fmul contract float %div106, 1.000000e+02 - %conv108 = fpext float %mul107 to double - %call109 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.23, i64 0, i64 0), double %conv101, double %conv108) - %139 = load i64, i64* %time4, align 8 - %140 = load i64, i64* %time3, align 8 - %sub110 = sub nsw i64 %139, %140 - %conv111 = sitofp i64 %sub110 to float - %div112 = fdiv float %conv111, 1.000000e+06 - %conv113 = fpext float %div112 to double - %141 = load i64, i64* %time4, align 8 - %142 = load i64, i64* %time3, align 8 - %sub114 = sub nsw i64 %141, %142 - %conv115 = sitofp i64 %sub114 to float - %143 = load i64, i64* %time6, align 8 - %144 = load i64, i64* %time0, align 8 - %sub116 = sub nsw i64 %143, %144 - %conv117 = sitofp i64 %sub116 to float - %div118 = fdiv float %conv115, %conv117 - %mul119 = fmul contract float %div118, 1.000000e+02 - %conv120 = fpext float %mul119 to double - %call121 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.24, i64 0, i64 0), double %conv113, double %conv120) - %145 = load i64, i64* %time5, align 8 - %146 = load i64, i64* %time4, align 8 - %sub122 = sub nsw i64 %145, %146 - %conv123 = sitofp i64 %sub122 to float - %div124 = fdiv float %conv123, 1.000000e+06 - %conv125 = fpext float %div124 to double - %147 = load i64, i64* %time5, align 8 - %148 = load i64, i64* %time4, align 8 - %sub126 = sub nsw i64 %147, %148 - %conv127 = sitofp i64 %sub126 to float - %149 = load i64, i64* %time6, align 8 - %150 = load i64, i64* %time0, align 8 - %sub128 = sub nsw i64 %149, %150 - %conv129 = sitofp i64 %sub128 to float - %div130 = fdiv float %conv127, %conv129 - %mul131 = fmul contract float %div130, 1.000000e+02 - %conv132 = fpext float %mul131 to double - %call133 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.25, i64 0, i64 0), double %conv125, double %conv132) - %151 = load i64, i64* %time6, align 8 - %152 = load i64, i64* %time5, align 8 - %sub134 = sub nsw i64 %151, %152 - %conv135 = sitofp i64 %sub134 to float - %div136 = fdiv float %conv135, 1.000000e+06 - %conv137 = fpext float %div136 to double - %153 = load i64, i64* %time6, align 8 - %154 = load i64, i64* %time5, align 8 - %sub138 = sub nsw i64 %153, %154 - %conv139 = sitofp i64 %sub138 to float - %155 = load i64, i64* %time6, align 8 - %156 = load i64, i64* %time0, align 8 - %sub140 = sub nsw i64 %155, %156 - %conv141 = sitofp i64 %sub140 to float - %div142 = fdiv float %conv139, %conv141 - %mul143 = fmul contract float %div142, 1.000000e+02 - %conv144 = fpext float %mul143 to double - %call145 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.26, i64 0, i64 0), double %conv137, double %conv144) - %call146 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.27, i64 0, i64 0)) - %157 = load i64, i64* %time6, align 8 - %158 = load i64, i64* %time0, align 8 - %sub147 = sub nsw i64 %157, %158 - %conv148 = sitofp i64 %sub147 to float - %div149 = fdiv float %conv148, 1.000000e+06 - %conv150 = fpext float %div149 to double - %call151 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.28, i64 0, i64 0), double %conv150) - ret void -} - -declare dso_local i64 @get_time() #2 - -declare dso_local i32 @cudaThreadSynchronize() #2 - -declare dso_local i32 @printf(i8*, ...) #2 - -declare dso_local i32 @cudaMalloc(i8**, i64) #2 - -declare dso_local void @checkCUDAError(i8*) #2 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #2 - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #2 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #3 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -declare dso_local i32 @cudaFree(i8*) #2 - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i64, %struct.knode*, i64, i64*, i64*, i64*, i64*, i32*, i32*, i32*, i32*)* @findRangeK to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.19, i64 0, i64 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.19, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/btree/main.c b/examples/btree/main.c deleted file mode 100644 index 8ddf6d4..0000000 --- a/examples/btree/main.c +++ /dev/null @@ -1,2192 +0,0 @@ -// # ifdef __cplusplus -// extern "C" { -// # endif - -//========================================================================================================================================================================================================200 -//======================================================================================================================================================150 -//====================================================================================================100 -//==================================================50 - -//========================================================================================================================================================================================================200 -// INFORMATION -//========================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// UPDATE -//======================================================================================================================================================150 - -// 2009; Amittai Aviram; entire code written in C; -// 2010; Jordan Fix and Andrew Wilkes; code converted to CUDA; -// 2011.10; Lukasz G. Szafaryn; code converted to portable form, to C, OpenMP, -// CUDA, PGI versions; 2011.12; Lukasz G. Szafaryn; Split different versions for -// Rodinia. 2011.12; Lukasz G. Szafaryn; code converted to OpenCL; 2012.10; Ke -// Wang; Change it to non-interactive mode. Use command option read command from -// file. And also add output for easy verification among different platforms and -// devices.Merged into Rodinia main distribution 2.2. -//======================================================================================================================================================150 -// DESCRIPTION -//======================================================================================================================================================150 - -// Description - -//======================================================================================================================================================150 -// USE -//======================================================================================================================================================150 - -// EXAMPLE: -// ./b+tree file ./input/mil.txt command ./command.txt -// ...then enter any of the following commands after the prompt > : -// f -- Find the value under key -// p -- Print the path from the root to key k and its associated value -// t -- Print the B+ tree -// l -- Print the keys of the leaves (bottom row of the tree) -// v -- Toggle output of pointer addresses ("verbose") in tree and leaves. -// k -- Run bundled queries on the CPU and GPU (B+Tree) (Selects random -// values for each search) j -- Run a range search of bundled -// queries on the CPU and GPU (B+Tree) with the range of each search of size -// x -- Run a single search for value z on the GPU and CPU -// y -- Run a single range search for range a-b on the GPU and CPU -// q -- Quit. (Or use Ctl-D.) - -//======================================================================================================================================================150 -// END -//======================================================================================================================================================150 - -//========================================================================================================================================================================================================200 -// DEFINE/INCLUDE -//========================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// LIBRARIES -//======================================================================================================================================================150 - -#include // (in directory known to compiler) needed by INT_MIN, INT_MAX -#include // (in directory known to compiler) needed by printf, stderr -// #include // -// (in directory known to compiler) needed by ??? -#include // (in directory known to compiler) needed by log, pow -#include // (in directory known to compiler) needed by memset - -//======================================================================================================================================================150 -// COMMON -//======================================================================================================================================================150 - -#include "./common.h" // (in directory provided here) - -//======================================================================================================================================================150 -// DEFINE -//======================================================================================================================================================150 - -//======================================================================================================================================================150 -// UTILITIES -//======================================================================================================================================================150 - -#include "./util/num/num.h" // (in directory provided here) -#include "./util/timer/timer.h" // (in directory provided here) - -//======================================================================================================================================================150 -// KERNEL HEADERS -//======================================================================================================================================================150 - -#include "./kernel/kernel_gpu_cuda_wrapper.h" // (in directory provided here) -#include "./kernel/kernel_gpu_cuda_wrapper_2.h" // (in directory provided here) - -//======================================================================================================================================================150 -// HEADER -//======================================================================================================================================================150 - -#include "./main.h" // (in directory provided here) - -//======================================================================================================================================================150 -// END -//======================================================================================================================================================150 - -//========================================================================================================================================================================================================200 -// VARIABLES -//========================================================================================================================================================================================================200 - -// general variables -knode *knodes; -record *krecords; -char *mem; -long freeptr; -long malloc_size; -long size; -long maxheight; - -/* The order determines the maximum and minimum - * number of entries (keys and pointers) in any - * node. Every node has at most order - 1 keys and - * at least (roughly speaking) half that number. - * Every leaf has as many pointers to data as keys, - * and every internal node has one more pointer - * to a subtree than the number of keys. - * This global variable is initialized to the - * default value. - */ -int order = DEFAULT_ORDER; - -/* The queue is used to print the tree in - * level order, starting from the root - * printing each entire rank on a separate - * line, finishing with the leaves. - */ -node *queue = NULL; - -/* The user can toggle on and off the "verbose" - * property, which causes the pointer addresses - * to be printed out in hexadecimal notation - * next to their corresponding keys. - */ -bool verbose_output = false; - -//========================================================================================================================================================================================================200 -// FUNCTIONS -//========================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// Components -//======================================================================================================================================================150 - -void list_init(list_t *l, int32_t (*compare)(const void *key, const void *with), - void (*datum_delete)(void *)) { - l->head = l->tail = NULL; - l->length = 0; - l->compare = compare; - l->datum_delete = datum_delete; -} - -void list_delete(list_t *l) { - - list_item_t *li, *del; - - for (li = l->head; li;) { - - del = li; - li = li->next; - list_item_delete(del, l->datum_delete); - } - - l->head = l->tail = NULL; - l->length = 0; -} - -void list_reset(list_t *l) { list_delete(l); } - -void list_insert_item_head(list_t *l, list_item_t *i) { - if (l->head) { - i->next = l->head; - l->head->pred = i; - l->head = i; - l->head->pred = NULL; - } else { - l->head = l->tail = i; - i->next = i->pred = NULL; - } - l->length++; -} - -void list_insert_item_tail(list_t *l, list_item_t *i) { - if (l->head) { - l->tail->next = i; - i->pred = l->tail; - i->next = NULL; - l->tail = i; - } else { - l->head = l->tail = i; - i->next = i->pred = NULL; - } - l->length++; -} - -void list_insert_item_before(list_t *l, list_item_t *next, list_item_t *i) { - /* Assume next is actually in the list! */ - /* If it's not, we may lose the list. */ - if (l->head == next) { - i->next = next; - i->pred = NULL; - l->head = i; - next->pred = i; - } else { - i->next = next; - i->pred = next->pred; - next->pred->next = i; - next->pred = i; - } - l->length++; -} - -void list_insert_item_after(list_t *l, list_item_t *pred, list_item_t *i) { - /* Assume pred is actually in the list! */ - /* If it's not, we may lose the list. */ - if (l->tail == pred) { - i->pred = pred; - i->next = NULL; - l->tail = i; - pred->next = i; - } else { - i->pred = pred; - i->next = pred->next; - pred->next->pred = i; - pred->next = i; - } - l->length++; -} - -void list_insert_item_sorted(list_t *l, list_item_t *i) { - list_item_t *itr; - - if (l->head) { - for (itr = l->head; itr && l->compare(list_item_get_datum(i), - list_item_get_datum(itr)) < 0; - itr = itr->next) - ; - if (itr) { - i->next = itr; - i->pred = itr->pred; - itr->pred = i; - i->pred->next = i; - } else { - l->tail->next = i; - i->pred = l->tail; - i->next = NULL; - l->tail = i; - } - } else { - l->head = l->tail = i; - i->pred = i->next = NULL; - } - l->length++; -} - -void list_insert_head(list_t *l, void *v) { - list_item_t *i; - i = (list_item_t *)malloc(sizeof(*i)); - list_item_init(i, v); - if (l->head) { - i->next = l->head; - l->head->pred = i; - l->head = i; - l->head->pred = NULL; - } else { - l->head = l->tail = i; - i->next = i->pred = NULL; - } - l->length++; -} - -void list_insert_tail(list_t *l, void *v) { - list_item_t *i; - - i = (list_item_t *)malloc(sizeof(*i)); - list_item_init(i, v); - if (l->head) { - l->tail->next = i; - i->pred = l->tail; - i->next = NULL; - l->tail = i; - } else { - l->head = l->tail = i; - i->next = i->pred = NULL; - } - l->length++; -} - -void list_insert_before(list_t *l, list_item_t *next, void *v) { - list_item_t *i; - - i = (list_item_t *)malloc(sizeof(*i)); - list_item_init(i, v); - - /* Assume next is actually in the list! */ - /* If it's not, we may lose the list. */ - if (l->head == next) { - i->next = next; - i->pred = NULL; - l->head = i; - next->pred = i; - } else { - i->next = next; - i->pred = next->pred; - next->pred->next = i; - next->pred = i; - } - l->length++; -} - -void list_insert_after(list_t *l, list_item_t *pred, void *v) { - list_item_t *i; - - i = (list_item_t *)malloc(sizeof(*i)); - list_item_init(i, v); - - /* Assume pred is actually in the list! */ - /* If it's not, we may lose the list. */ - if (l->tail == pred) { - i->pred = pred; - i->next = NULL; - l->tail = i; - pred->next = i; - } else { - i->pred = pred; - i->next = pred->next; - pred->next->pred = i; - pred->next = i; - } - l->length++; -} - -void list_insert_sorted(list_t *l, void *v) { - list_item_t *itr; - list_item_t *i; - - i = (list_item_t *)malloc(sizeof(*i)); - list_item_init(i, v); - - if (l->head) { - for (itr = l->head; itr && l->compare(list_item_get_datum(i), - list_item_get_datum(itr)) < 0; - itr = itr->next) - ; - if (itr) { - i->next = itr; - i->pred = itr->pred; - itr->pred = i; - i->pred->next = i; - } else { - l->tail->next = i; - i->pred = l->tail; - i->next = NULL; - l->tail = i; - } - } else { - l->head = l->tail = i; - i->pred = i->next = NULL; - } - l->length++; -} - -void list_remove_item(list_t *l, list_item_t *i) { - if (i == l->head) { - l->head = l->head->next; - if (l->head) - l->head->pred = NULL; - else - l->tail = NULL; - } else if (i == l->tail) { - l->tail = l->tail->pred; - l->tail->next = NULL; - } else { - i->pred->next = i->next; - i->next->pred = i->pred; - } - l->length--; - list_item_delete(i, l->datum_delete); -} - -void list_remove_head(list_t *l) { list_remove_item(l, l->head); } - -void list_remove_tail(list_t *l) { list_remove_item(l, l->tail); } - -list_item_t *list_find_item(list_t *l, void *datum) { - list_item_t *li; - - for (li = l->head; li && l->compare(datum, list_item_get_datum(li)); - li = li->next) - ; - - return li; -} - -list_item_t *list_get_head_item(list_t *l) { return l->head; } - -list_item_t *list_get_tail_item(list_t *l) { return l->tail; } - -void *list_find(list_t *l, void *datum) { - list_item_t *li; - - for (li = l->head; li && l->compare(datum, list_item_get_datum(li)); - li = li->next) - ; - - return li ? li->datum : NULL; -} - -void *list_get_head(list_t *l) { return l->head ? l->head->datum : NULL; } - -void *list_get_tail(list_t *l) { return l->tail ? l->tail->datum : NULL; } - -uint32_t list_get_length(list_t *l) { return l->length; } - -bool list_is_empty(list_t *l) { return (l->length == 0); } - -bool list_not_empty(list_t *l) { return (l->length != 0); } - -void list_visit_items(list_t *l, void (*visitor)(void *v)) { - list_item_t *li; - - for (li = l->head; li; li = li->next) - visitor(list_item_get_datum(li)); -} - -void list_item_init(list_item_t *li, void *datum) { - li->pred = li->next = NULL; - li->datum = datum; -} - -void list_item_delete(list_item_t *li, void (*datum_delete)(void *datum)) { - if (datum_delete) { - datum_delete(li->datum); - } - - free(li); -} - -void *list_item_get_datum(list_item_t *li) { return li->datum; } - -void list_iterator_init(list_t *l, list_iterator_t *li) { - *li = l ? l->head : NULL; -} - -void list_iterator_delete(list_iterator_t *li) { *li = NULL; } - -void list_iterator_next(list_iterator_t *li) { - if (*li) - *li = (*li)->next; -} - -void list_iterator_prev(list_iterator_t *li) { - if (*li) - *li = (*li)->pred; -} - -void *list_iterator_get_datum(list_iterator_t *li) { - return *li ? (*li)->datum : NULL; -} - -bool list_iterator_is_valid(list_iterator_t *li) { return (*li != NULL); } - -void list_reverse_iterator_init(list_t *l, list_reverse_iterator_t *li) { - *li = l ? l->tail : NULL; -} - -void list_reverse_iterator_delete(list_reverse_iterator_t *li) { *li = NULL; } - -void list_reverse_iterator_next(list_reverse_iterator_t *li) { - if (*li) - *li = (*li)->pred; -} - -void list_reverse_iterator_prev(list_reverse_iterator_t *li) { - if (*li) - *li = (*li)->next; -} - -void *list_reverse_iterator_get_datum(list_reverse_iterator_t *li) { - return *li ? (*li)->datum : NULL; -} - -bool list_reverse_iterator_is_valid(list_reverse_iterator_t *li) { - return (li != NULL); -} - -//======================================================================================================================================================150 -// OUTPUT AND UTILITIES -//======================================================================================================================================================150 - -/* */ -void *kmalloc(int size) { - - // printf("size: %d, current offset: %p\n",size,freeptr); - void *r = (void *)freeptr; - freeptr += size; - if (freeptr > malloc_size + (long)mem) { - printf("Memory Overflow\n"); - exit(1); - } - return r; -} - -// transforms the current B+ Tree into a single, contiguous block of memory to -// be used on the GPU -long transform_to_cuda(node *root, bool verbose) { - - struct timeval one, two; - double time; - gettimeofday(&one, NULL); - long max_nodes = (long)(pow(order, log(size) / log(order / 2.0) - 1) + 1); - malloc_size = size * sizeof(record) + max_nodes * sizeof(knode); - mem = (char *)malloc(malloc_size); - if (mem == NULL) { - printf("Initial malloc error\n"); - exit(1); - } - freeptr = (long)mem; - - krecords = (record *)kmalloc(size * sizeof(record)); - // printf("%d records\n", size); - knodes = (knode *)kmalloc(max_nodes * sizeof(knode)); - // printf("%d knodes\n", max_nodes); - - queue = NULL; - enqueue(root); - node *n; - knode *k; - int i; - long nodeindex = 0; - long recordindex = 0; - long queueindex = 0; - knodes[0].location = nodeindex++; - - while (queue != NULL) { - n = dequeue(); - k = &knodes[queueindex]; - k->location = queueindex++; - k->is_leaf = n->is_leaf; - k->num_keys = n->num_keys + 2; - // start at 1 because 0 is set to INT_MIN - k->keys[0] = INT_MIN; - k->keys[k->num_keys - 1] = INT_MAX; - for (i = k->num_keys; i < order; i++) - k->keys[i] = INT_MAX; - if (!k->is_leaf) { - k->indices[0] = nodeindex++; - // if(k->indices[0]>3953){ - // printf("ERROR: %d\n", k->indices[0]); - // } - for (i = 1; i < k->num_keys - 1; i++) { - k->keys[i] = n->keys[i - 1]; - enqueue((node *)n->pointers[i - 1]); - k->indices[i] = nodeindex++; - // if(k->indices[i]>3953){ - // printf("ERROR 1: %d\n", k->indices[i]); - // } - // knodes[nodeindex].location = nodeindex++; - } - // for final point of n - enqueue((node *)n->pointers[i - 1]); - } else { - k->indices[0] = 0; - for (i = 1; i < k->num_keys - 1; i++) { - k->keys[i] = n->keys[i - 1]; - krecords[recordindex].value = ((record *)n->pointers[i - 1])->value; - k->indices[i] = recordindex++; - // if(k->indices[i]>3953){ - // printf("ERROR 2: %d\n", k->indices[i]); - // } - } - } - - k->indices[k->num_keys - 1] = queueindex; - // if(k->indices[k->num_keys-1]>3953){ - // printf("ERROR 3: %d\n", k->indices[k->num_keys-1]); - // } - - if (verbose) { - printf("Successfully created knode with index %d\n", k->location); - printf("Is Leaf: %d, Num Keys: %d\n", k->is_leaf, k->num_keys); - printf("Pointers: "); - for (i = 0; i < k->num_keys; i++) - printf("%d | ", k->indices[i]); - printf("\nKeys: "); - for (i = 0; i < k->num_keys; i++) - printf("%d | ", k->keys[i]); - printf("\n\n"); - } - } - long mem_used = size * sizeof(record) + (nodeindex) * sizeof(knode); - if (verbose) { - for (i = 0; i < size; i++) - printf("%d ", krecords[i].value); - printf("\nNumber of records = %d, sizeof(record)=%d, total=%d\n", size, - sizeof(record), size * sizeof(record)); - printf("Number of knodes = %d, sizeof(knode)=%d, total=%d\n", nodeindex, - sizeof(knode), (nodeindex) * sizeof(knode)); - printf("\nDone Transformation. Mem used: %d\n", mem_used); - } - gettimeofday(&two, NULL); - double oneD = one.tv_sec + (double)one.tv_usec * .000001; - double twoD = two.tv_sec + (double)two.tv_usec * .000001; - time = twoD - oneD; - printf("Tree transformation took %f\n", time); - - return mem_used; -} - -/* */ -list_t *findRange(node *root, int start, int end) { - - int i; - node *c = find_leaf(root, start, false); - - if (c == NULL) - return NULL; - - list_t *retList = (list_t *)malloc(sizeof(list_t)); - list_init(retList, NULL, NULL); - - int counter = 0; - bool cont = true; - while (cont && c != 0) { - cont = false; - for (i = 0; i < c->num_keys; i++) { - if (c->keys[i] >= start && c->keys[i] <= end) { - // list_insert_tail(retList,(record *)c->pointers[i]); - counter++; - cont = true; - } else { - cont = false; - break; - } - } - c = (node *)c->pointers[order - 1]; - } - return retList; -} - -/* First message to the user. */ -void usage_1(void) { - - printf("B+ Tree of Order %d.\n", order); - printf("\tAmittai Aviram -- amittai.aviram@yale.edu Version %s\n", Version); - printf("\tfollowing Silberschatz, Korth, Sidarshan, Database Concepts, 5th " - "ed.\n\n"); - printf("To build a B+ tree of a different order, start again and enter the " - "order\n"); - printf("as an integer argument: bpt . "); - printf("3 <= order <=20\n"); - printf("To start with input from a file of newline-delimited integers, start " - "again and enter\n"); - printf("the order followed by the filename: bpt .\n"); -} - -/* Second message to the user. */ -void usage_2(void) { - - printf("Enter any of the following commands after the prompt > :\n"); - printf("\ti -- Insert (an integer) as both key and value).\n"); - printf("\tf -- Find the value under key .\n"); - printf("\tp -- Print the path from the root to key k and its associated " - "value.\n"); - printf("\td -- Delete key and its associated value.\n"); - printf("\tx -- Destroy the whole tree. Start again with an empty tree of " - "the same order.\n"); - printf("\tt -- Print the B+ tree.\n"); - printf("\tl -- Print the keys of the leaves (bottom row of the tree).\n"); - printf("\tv -- Toggle output of pointer addresses (\"verbose\") in tree and " - "leaves.\n"); - printf("\tq -- Quit. (Or use Ctl-D.)\n"); - printf("\t? -- Print this help message.\n"); -} - -/* Helper function for printing the tree out. See print_tree. */ -void enqueue(node *new_node) { - node *c; - if (queue == NULL) { - queue = new_node; - queue->next = NULL; - } else { - c = queue; - while (c->next != NULL) { - c = c->next; - } - c->next = new_node; - new_node->next = NULL; - } -} - -/* Helper function for printing the tree out. See print_tree. */ -node *dequeue(void) { - node *n = queue; - queue = queue->next; - n->next = NULL; - return n; -} - -/* Prints the bottom row of keys of the tree (with their respective pointers, if - * the verbose_output flag is set. */ -void print_leaves(node *root) { - int i; - node *c = root; - if (root == NULL) { - printf("Empty tree.\n"); - return; - } - while (!c->is_leaf) - c = (node *)c->pointers[0]; - while (true) { - for (i = 0; i < c->num_keys; i++) { - if (verbose_output) - // printf("%x ", (unsigned int)c->pointers[i]); - printf("%d ", c->keys[i]); - } - if (verbose_output) - // printf("%x ", (unsigned int)c->pointers[order - 1]); - if (c->pointers[order - 1] != NULL) { - printf(" | "); - c = (node *)c->pointers[order - 1]; - } else - break; - } - printf("\n"); -} - -/* Utility function to give the height of the tree, which length in number of - * edges of the path from the root to any leaf. */ -int height(node *root) { - int h = 0; - node *c = root; - while (!c->is_leaf) { - c = (node *)c->pointers[0]; - h++; - } - return h; -} - -/* Utility function to give the length in edges of the path from any node to the - * root. */ -int path_to_root(node *root, node *child) { - int length = 0; - node *c = child; - while (c != root) { - c = c->parent; - length++; - } - return length; -} - -/* Prints the B+ tree in the command line in level (rank) order, with the keys - * in each node and the '|' symbol to separate nodes. With the verbose_output - * flag set. the values of the pointers corresponding to the keys also appear - * next to their respective keys, in hexadecimal notation. */ -void print_tree(node *root) { - - node *n = NULL; - int i = 0; - int rank = 0; - int new_rank = 0; - - if (root == NULL) { - printf("Empty tree.\n"); - return; - } - queue = NULL; - enqueue(root); - while (queue != NULL) { - n = dequeue(); - if (n->parent != NULL && n == n->parent->pointers[0]) { - new_rank = path_to_root(root, n); - if (new_rank != rank) { - rank = new_rank; - printf("\n"); - } - } - if (verbose_output) - printf("(%x)", n); - for (i = 0; i < n->num_keys; i++) { - if (verbose_output) - printf("%x ", n->pointers[i]); - printf("%d ", n->keys[i]); - } - if (!n->is_leaf) - for (i = 0; i <= n->num_keys; i++) - enqueue((node *)n->pointers[i]); - if (verbose_output) { - if (n->is_leaf) - printf("%x ", n->pointers[order - 1]); - else - printf("%x ", n->pointers[n->num_keys]); - } - printf("| "); - } - printf("\n"); -} - -/* Traces the path from the root to a leaf, searching by key. Displays - * information about the path if the verbose flag is set. Returns the leaf - * containing the given key. */ -node *find_leaf(node *root, int key, bool verbose) { - - int i = 0; - node *c = root; - if (c == NULL) { - if (verbose) - printf("Empty tree.\n"); - return c; - } - while (!c->is_leaf) { - if (verbose) { - printf("["); - for (i = 0; i < c->num_keys - 1; i++) - printf("%d ", c->keys[i]); - printf("%d] ", c->keys[i]); - } - i = 0; - while (i < c->num_keys) { - if (key >= c->keys[i]) - i++; - else - break; - } - if (verbose) - printf("%d ->\n", i); - c = (node *)c->pointers[i]; - } - if (verbose) { - printf("Leaf ["); - for (i = 0; i < c->num_keys - 1; i++) - printf("%d ", c->keys[i]); - printf("%d] ->\n", c->keys[i]); - } - return c; -} - -/* Finds and returns the record to which a key refers. */ -record *find(node *root, int key, bool verbose) { - - int i = 0; - node *c = find_leaf(root, key, verbose); - if (c == NULL) - return NULL; - for (i = 0; i < c->num_keys; i++) - if (c->keys[i] == key) - break; - if (i == c->num_keys) - return NULL; - else - return (record *)c->pointers[i]; -} - -/* Finds the appropriate place to split a node that is too big into two. */ -int cut(int length) { - if (length % 2 == 0) - return length / 2; - else - return length / 2 + 1; -} - -//======================================================================================================================================================150 -// INSERTION -//======================================================================================================================================================150 - -/* Creates a new record to hold the value to which a key refers. */ -record *make_record(int value) { - record *new_record = (record *)malloc(sizeof(record)); - if (new_record == NULL) { - perror("Record creation."); - exit(EXIT_FAILURE); - } else { - new_record->value = value; - } - return new_record; -} - -/* Creates a new general node, which can be adapted to serve as either a leaf or - * an internal node. */ -node *make_node(void) { - node *new_node; - new_node = (node *)malloc(sizeof(node)); - if (new_node == NULL) { - perror("Node creation."); - exit(EXIT_FAILURE); - } - new_node->keys = (int *)malloc((order - 1) * sizeof(int)); - if (new_node->keys == NULL) { - perror("New node keys array."); - exit(EXIT_FAILURE); - } - new_node->pointers = (void **)malloc(order * sizeof(void *)); - if (new_node->pointers == NULL) { - perror("New node pointers array."); - exit(EXIT_FAILURE); - } - new_node->is_leaf = false; - new_node->num_keys = 0; - new_node->parent = NULL; - new_node->next = NULL; - return new_node; -} - -/* Creates a new leaf by creating a node and then adapting it appropriately. */ -node *make_leaf(void) { - node *leaf = make_node(); - leaf->is_leaf = true; - return leaf; -} - -/* Helper function used in insert_into_parent to find the index of the parent's - * pointer to the node to the left of the key to be inserted. */ -int get_left_index(node *parent, node *left) { - - int left_index = 0; - while (left_index <= parent->num_keys && parent->pointers[left_index] != left) - left_index++; - return left_index; -} - -/* Inserts a new pointer to a record and its corresponding key into a leaf. - * Returns the altered leaf. */ -node *insert_into_leaf(node *leaf, int key, record *pointer) { - - int i, insertion_point; - - insertion_point = 0; - while (insertion_point < leaf->num_keys && leaf->keys[insertion_point] < key) - insertion_point++; - - for (i = leaf->num_keys; i > insertion_point; i--) { - leaf->keys[i] = leaf->keys[i - 1]; - leaf->pointers[i] = leaf->pointers[i - 1]; - } - leaf->keys[insertion_point] = key; - leaf->pointers[insertion_point] = pointer; - leaf->num_keys++; - return leaf; -} - -/* Inserts a new key and pointer to a new record into a leaf so as to exceed the - * tree's order, causing the leaf to be split in half. */ -node *insert_into_leaf_after_splitting(node *root, node *leaf, int key, - record *pointer) { - - node *new_leaf; - int *temp_keys; - void **temp_pointers; - int insertion_index, split, new_key, i, j; - - new_leaf = make_leaf(); - - temp_keys = (int *)malloc(order * sizeof(int)); - if (temp_keys == NULL) { - perror("Temporary keys array."); - exit(EXIT_FAILURE); - } - - temp_pointers = (void **)malloc(order * sizeof(void *)); - if (temp_pointers == NULL) { - perror("Temporary pointers array."); - exit(EXIT_FAILURE); - } - - insertion_index = 0; - while (leaf->keys[insertion_index] < key && insertion_index < order - 1) - insertion_index++; - - for (i = 0, j = 0; i < leaf->num_keys; i++, j++) { - if (j == insertion_index) - j++; - temp_keys[j] = leaf->keys[i]; - temp_pointers[j] = leaf->pointers[i]; - } - - temp_keys[insertion_index] = key; - temp_pointers[insertion_index] = pointer; - - leaf->num_keys = 0; - - split = cut(order - 1); - - for (i = 0; i < split; i++) { - leaf->pointers[i] = temp_pointers[i]; - leaf->keys[i] = temp_keys[i]; - leaf->num_keys++; - } - - for (i = split, j = 0; i < order; i++, j++) { - new_leaf->pointers[j] = temp_pointers[i]; - new_leaf->keys[j] = temp_keys[i]; - new_leaf->num_keys++; - } - - free(temp_pointers); - free(temp_keys); - - new_leaf->pointers[order - 1] = leaf->pointers[order - 1]; - leaf->pointers[order - 1] = new_leaf; - - for (i = leaf->num_keys; i < order - 1; i++) - leaf->pointers[i] = NULL; - for (i = new_leaf->num_keys; i < order - 1; i++) - new_leaf->pointers[i] = NULL; - - new_leaf->parent = leaf->parent; - new_key = new_leaf->keys[0]; - - return insert_into_parent(root, leaf, new_key, new_leaf); -} - -/* Inserts a new key and pointer to a node into a node into which these can fit - * without violating the B+ tree properties. */ -node *insert_into_node(node *root, node *n, int left_index, int key, - node *right) { - - int i; - - for (i = n->num_keys; i > left_index; i--) { - n->pointers[i + 1] = n->pointers[i]; - n->keys[i] = n->keys[i - 1]; - } - n->pointers[left_index + 1] = right; - n->keys[left_index] = key; - n->num_keys++; - return root; -} - -/* Inserts a new key and pointer to a node into a node, causing the node's size - * to exceed the order, and causing the node to split into two. */ -node *insert_into_node_after_splitting(node *root, node *old_node, - int left_index, int key, node *right) { - - int i, j, split, k_prime; - node *new_node, *child; - int *temp_keys; - node **temp_pointers; - - /* First create a temporary set of keys and pointers - * to hold everything in order, including - * the new key and pointer, inserted in their - * correct places. - * Then create a new node and copy half of the - * keys and pointers to the old node and - * the other half to the new. - */ - - temp_pointers = (node **)malloc((order + 1) * sizeof(node *)); - if (temp_pointers == NULL) { - perror("Temporary pointers array for splitting nodes."); - exit(EXIT_FAILURE); - } - temp_keys = (int *)malloc(order * sizeof(int)); - if (temp_keys == NULL) { - perror("Temporary keys array for splitting nodes."); - exit(EXIT_FAILURE); - } - - for (i = 0, j = 0; i < old_node->num_keys + 1; i++, j++) { - if (j == left_index + 1) - j++; - temp_pointers[j] = (node *)old_node->pointers[i]; - } - - for (i = 0, j = 0; i < old_node->num_keys; i++, j++) { - if (j == left_index) - j++; - temp_keys[j] = old_node->keys[i]; - } - - temp_pointers[left_index + 1] = right; - temp_keys[left_index] = key; - - /* Create the new node and copy - * half the keys and pointers to the - * old and half to the new. - */ - split = cut(order); - new_node = make_node(); - old_node->num_keys = 0; - for (i = 0; i < split - 1; i++) { - old_node->pointers[i] = temp_pointers[i]; - old_node->keys[i] = temp_keys[i]; - old_node->num_keys++; - } - old_node->pointers[i] = temp_pointers[i]; - k_prime = temp_keys[split - 1]; - for (++i, j = 0; i < order; i++, j++) { - new_node->pointers[j] = temp_pointers[i]; - new_node->keys[j] = temp_keys[i]; - new_node->num_keys++; - } - new_node->pointers[j] = temp_pointers[i]; - free(temp_pointers); - free(temp_keys); - new_node->parent = old_node->parent; - for (i = 0; i <= new_node->num_keys; i++) { - child = (node *)new_node->pointers[i]; - child->parent = new_node; - } - - /* Insert a new key into the parent of the two - * nodes resulting from the split, with - * the old node to the left and the new to the right. - */ - - return insert_into_parent(root, old_node, k_prime, new_node); -} - -/* Inserts a new node (leaf or internal node) into the B+ tree. Returns the root - * of the tree after insertion. */ -node *insert_into_parent(node *root, node *left, int key, node *right) { - - int left_index; - node *parent; - - parent = left->parent; - - /* Case: new root. */ - - if (parent == NULL) - return insert_into_new_root(left, key, right); - - /* Case: leaf or node. (Remainder of - * function body.) - */ - - /* Find the parent's pointer to the left - * node. - */ - - left_index = get_left_index(parent, left); - - /* Simple case: the new key fits into the node. - */ - - if (parent->num_keys < order - 1) - return insert_into_node(root, parent, left_index, key, right); - - /* Harder case: split a node in order - * to preserve the B+ tree properties. - */ - - return insert_into_node_after_splitting(root, parent, left_index, key, right); -} - -/* Creates a new root for two subtrees and inserts the appropriate key into the - * new root. */ -node *insert_into_new_root(node *left, int key, node *right) { - - node *root = make_node(); - root->keys[0] = key; - root->pointers[0] = left; - root->pointers[1] = right; - root->num_keys++; - root->parent = NULL; - left->parent = root; - right->parent = root; - return root; -} - -/* First insertion: start a new tree. */ -node *start_new_tree(int key, record *pointer) { - - node *root = make_leaf(); - root->keys[0] = key; - root->pointers[0] = pointer; - root->pointers[order - 1] = NULL; - root->parent = NULL; - root->num_keys++; - return root; -} - -/* Master insertion function. Inserts a key and an associated value into the B+ - * tree, causing the tree to be adjusted however necessary to maintain the B+ - * tree properties. */ -node *insert(node *root, int key, int value) { - - record *pointer; - node *leaf; - - /* The current implementation ignores duplicates. */ - if (find(root, key, false) != NULL) - return root; - - /* Create a new record for the value. */ - pointer = make_record(value); - - /* Case: the tree does not exist yet. Start a new tree. */ - if (root == NULL) - return start_new_tree(key, pointer); - - /* Case: the tree already exists. (Rest of function body.) */ - leaf = find_leaf(root, key, false); - - /* Case: leaf has room for key and pointer. */ - if (leaf->num_keys < order - 1) { - leaf = insert_into_leaf(leaf, key, pointer); - return root; - } - - /* Case: leaf must be split. */ - return insert_into_leaf_after_splitting(root, leaf, key, pointer); -} - -//======================================================================================================================================================150 -// DELETION -//======================================================================================================================================================150 - -/* Utility function for deletion. Retrieves the index of a node's nearest - * neighbor (sibling) to the left if one exists. If not (the node is the - * leftmost child), returns -1 to signify this special case. */ -int get_neighbor_index(node *n) { - - int i; - - /* Return the index of the key to the left - * of the pointer in the parent pointing - * to n. - * If n is the leftmost child, this means - * return -1. - */ - for (i = 0; i <= n->parent->num_keys; i++) - if (n->parent->pointers[i] == n) - return i - 1; - - // Error state. - printf("Search for nonexistent pointer to node in parent.\n"); - // printf("Node: %#x\n", (unsigned int)n); - exit(EXIT_FAILURE); -} - -/* */ -node *remove_entry_from_node(node *n, int key, node *pointer) { - - int i, num_pointers; - - // Remove the key and shift other keys accordingly. - i = 0; - while (n->keys[i] != key) - i++; - for (++i; i < n->num_keys; i++) - n->keys[i - 1] = n->keys[i]; - - // Remove the pointer and shift other pointers accordingly. - // First determine number of pointers. - num_pointers = n->is_leaf ? n->num_keys : n->num_keys + 1; - i = 0; - while (n->pointers[i] != pointer) - i++; - for (++i; i < num_pointers; i++) - n->pointers[i - 1] = n->pointers[i]; - - // One key fewer. - n->num_keys--; - - // Set the other pointers to NULL for tidiness. - // A leaf uses the last pointer to point to the next leaf. - if (n->is_leaf) - for (i = n->num_keys; i < order - 1; i++) - n->pointers[i] = NULL; - else - for (i = n->num_keys + 1; i < order; i++) - n->pointers[i] = NULL; - - return n; -} - -/* */ -node *adjust_root(node *root) { - - node *new_root; - - /* Case: nonempty root. - * Key and pointer have already been deleted, - * so nothing to be done. - */ - - if (root->num_keys > 0) - return root; - - /* Case: empty root. - */ - - // If it has a child, promote - // the first (only) child - // as the new root. - - if (!root->is_leaf) { - new_root = (node *)root->pointers[0]; - new_root->parent = NULL; - } - - // If it is a leaf (has no children), - // then the whole tree is empty. - - else - new_root = NULL; - - free(root->keys); - free(root->pointers); - free(root); - - return new_root; -} - -/* Coalesces a node that has become too small after deletion with a neighboring - * node that can accept the additional entries without exceeding the maximum. */ -node *coalesce_nodes(node *root, node *n, node *neighbor, int neighbor_index, - int k_prime) { - - int i, j, neighbor_insertion_index, n_start, n_end, new_k_prime; - node *tmp; - bool split; - - /* Swap neighbor with node if node is on the - * extreme left and neighbor is to its right. - */ - - if (neighbor_index == -1) { - tmp = n; - n = neighbor; - neighbor = tmp; - } - - /* Starting point in the neighbor for copying - * keys and pointers from n. - * Recall that n and neighbor have swapped places - * in the special case of n being a leftmost child. - */ - - neighbor_insertion_index = neighbor->num_keys; - - /* - * Nonleaf nodes may sometimes need to remain split, - * if the insertion of k_prime would cause the resulting - * single coalesced node to exceed the limit order - 1. - * The variable split is always false for leaf nodes - * and only sometimes set to true for nonleaf nodes. - */ - - split = false; - - /* Case: nonleaf node. - * Append k_prime and the following pointer. - * If there is room in the neighbor, append - * all pointers and keys from the neighbor. - * Otherwise, append only cut(order) - 2 keys and - * cut(order) - 1 pointers. - */ - - if (!n->is_leaf) { - - /* Append k_prime. - */ - - neighbor->keys[neighbor_insertion_index] = k_prime; - neighbor->num_keys++; - - /* Case (default): there is room for all of n's keys and pointers - * in the neighbor after appending k_prime. - */ - - n_end = n->num_keys; - - /* Case (special): k cannot fit with all the other keys and pointers - * into one coalesced node. - */ - n_start = 0; // Only used in this special case. - if (n->num_keys + neighbor->num_keys >= order) { - split = true; - n_end = cut(order) - 2; - } - - for (i = neighbor_insertion_index + 1, j = 0; j < n_end; i++, j++) { - neighbor->keys[i] = n->keys[j]; - neighbor->pointers[i] = n->pointers[j]; - neighbor->num_keys++; - n->num_keys--; - n_start++; - } - - /* The number of pointers is always - * one more than the number of keys. - */ - - neighbor->pointers[i] = n->pointers[j]; - - /* If the nodes are still split, remove the first key from - * n. - */ - if (split) { - new_k_prime = n->keys[n_start]; - for (i = 0, j = n_start + 1; i < n->num_keys; i++, j++) { - n->keys[i] = n->keys[j]; - n->pointers[i] = n->pointers[j]; - } - n->pointers[i] = n->pointers[j]; - n->num_keys--; - } - - /* All children must now point up to the same parent. - */ - - for (i = 0; i < neighbor->num_keys + 1; i++) { - tmp = (node *)neighbor->pointers[i]; - tmp->parent = neighbor; - } - } - - /* In a leaf, append the keys and pointers of - * n to the neighbor. - * Set the neighbor's last pointer to point to - * what had been n's right neighbor. - */ - - else { - for (i = neighbor_insertion_index, j = 0; j < n->num_keys; i++, j++) { - neighbor->keys[i] = n->keys[j]; - neighbor->pointers[i] = n->pointers[j]; - neighbor->num_keys++; - } - neighbor->pointers[order - 1] = n->pointers[order - 1]; - } - - if (!split) { - root = delete_entry(root, n->parent, k_prime, n); - free(n->keys); - free(n->pointers); - free(n); - } else - for (i = 0; i < n->parent->num_keys; i++) - if (n->parent->pointers[i + 1] == n) { - n->parent->keys[i] = new_k_prime; - break; - } - - return root; -} - -/* Redistributes entries between two nodes when one has become too small after - * deletion but its neighbor is too big to append the small node's entries - * without exceeding the maximum */ -node *redistribute_nodes(node *root, node *n, node *neighbor, - int neighbor_index, int k_prime_index, int k_prime) { - - int i; - node *tmp; - - /* Case: n has a neighbor to the left. - * Pull the neighbor's last key-pointer pair over - * from the neighbor's right end to n's left end. - */ - - if (neighbor_index != -1) { - if (!n->is_leaf) - n->pointers[n->num_keys + 1] = n->pointers[n->num_keys]; - for (i = n->num_keys; i > 0; i--) { - n->keys[i] = n->keys[i - 1]; - n->pointers[i] = n->pointers[i - 1]; - } - if (!n->is_leaf) { - n->pointers[0] = neighbor->pointers[neighbor->num_keys]; - tmp = (node *)n->pointers[0]; - tmp->parent = n; - neighbor->pointers[neighbor->num_keys] = NULL; - n->keys[0] = k_prime; - n->parent->keys[k_prime_index] = neighbor->keys[neighbor->num_keys - 1]; - } else { - n->pointers[0] = neighbor->pointers[neighbor->num_keys - 1]; - neighbor->pointers[neighbor->num_keys - 1] = NULL; - n->keys[0] = neighbor->keys[neighbor->num_keys - 1]; - n->parent->keys[k_prime_index] = n->keys[0]; - } - } - - /* Case: n is the leftmost child. - * Take a key-pointer pair from the neighbor to the right. - * Move the neighbor's leftmost key-pointer pair - * to n's rightmost position. - */ - - else { - if (n->is_leaf) { - n->keys[n->num_keys] = neighbor->keys[0]; - n->pointers[n->num_keys] = neighbor->pointers[0]; - n->parent->keys[k_prime_index] = neighbor->keys[1]; - } else { - n->keys[n->num_keys] = k_prime; - n->pointers[n->num_keys + 1] = neighbor->pointers[0]; - tmp = (node *)n->pointers[n->num_keys + 1]; - tmp->parent = n; - n->parent->keys[k_prime_index] = neighbor->keys[0]; - } - for (i = 0; i < neighbor->num_keys; i++) { - neighbor->keys[i] = neighbor->keys[i + 1]; - neighbor->pointers[i] = neighbor->pointers[i + 1]; - } - if (!n->is_leaf) - neighbor->pointers[i] = neighbor->pointers[i + 1]; - } - - /* n now has one more key and one more pointer; - * the neighbor has one fewer of each. - */ - - n->num_keys++; - neighbor->num_keys--; - - return root; -} - -/* Deletes an entry from the B+ tree. Removes the record and its key and pointer - * from the leaf, and then makes all appropriate changes to preserve the B+ tree - * properties. */ -node *delete_entry(node *root, node *n, int key, void *pointer) { - - int min_keys; - node *neighbor; - int neighbor_index; - int k_prime_index, k_prime; - int capacity; - - // Remove key and pointer from node. - - n = remove_entry_from_node(n, key, (node *)pointer); - - /* Case: deletion from the root. - */ - - if (n == root) - return adjust_root(root); - - /* Case: deletion from a node below the root. - * (Rest of function body.) - */ - - /* Determine minimum allowable size of node, - * to be preserved after deletion. - */ - - min_keys = n->is_leaf ? cut(order - 1) : cut(order) - 1; - - /* Case: node stays at or above minimum. - * (The simple case.) - */ - - if (n->num_keys >= min_keys) - return root; - - /* Case: node falls below minimum. - * Either coalescence or redistribution - * is needed. - */ - - /* Find the appropriate neighbor node with which - * to coalesce. - * Also find the key (k_prime) in the parent - * between the pointer to node n and the pointer - * to the neighbor. - */ - - neighbor_index = get_neighbor_index(n); - k_prime_index = neighbor_index == -1 ? 0 : neighbor_index; - k_prime = n->parent->keys[k_prime_index]; - neighbor = neighbor_index == -1 ? (node *)n->parent->pointers[1] - : (node *)n->parent->pointers[neighbor_index]; - - capacity = n->is_leaf ? order : order - 1; - - /* Coalescence. */ - - if (neighbor->num_keys + n->num_keys < capacity) - return coalesce_nodes(root, n, neighbor, neighbor_index, k_prime); - - /* Redistribution. */ - - else - return redistribute_nodes(root, n, neighbor, neighbor_index, k_prime_index, - k_prime); -} - -/* Master deletion function. */ -node *deleteVal(node *root, int key) { - - node *key_leaf; - record *key_record; - - key_record = find(root, key, false); - key_leaf = find_leaf(root, key, false); - if (key_record != NULL && key_leaf != NULL) { - free(key_record); - root = delete_entry(root, key_leaf, key, key_record); - } - return root; -} - -/* */ -void destroy_tree_nodes(node *root) { - int i; - if (root->is_leaf) - for (i = 0; i < root->num_keys; i++) - free(root->pointers[i]); - else - for (i = 0; i < root->num_keys + 1; i++) - destroy_tree_nodes((node *)root->pointers[i]); - free(root->pointers); - free(root->keys); - free(root); -} - -/* */ -node *destroy_tree(node *root) { - destroy_tree_nodes(root); - return NULL; -} - -//======================================================================================================================================================150 -// END -//======================================================================================================================================================150 - -//========================================================================================================================================================================================================200 -// MAIN FUNCTION -//========================================================================================================================================================================================================200 - -int main(int argc, char **argv) { - - printf("WG size of kernel 1 & 2 = %d \n", DEFAULT_ORDER); - - // ------------------------------------------------------------60 - // figure out and display whether 32-bit or 64-bit architecture - // ------------------------------------------------------------60 - - // if(sizeof(int *)==8){ - // printf("64 bit machine\n"); - // } - // else if(sizeof(int *)==4){ - // printf("32 bit machine\n"); - // } - - // ------------------------------------------------------------60 - // set GPU - // ------------------------------------------------------------60 - - int device = 0; - cudaSetDevice(device); - printf("Selecting device %d\n", device); - - // ------------------------------------------------------------60 - // read inputs - // ------------------------------------------------------------60 - - // assing default values - int cur_arg; - int arch_arg; - arch_arg = 0; - int cores_arg; - cores_arg = 1; - char *input_file = NULL; - char *command_file = NULL; - char *output = "output.txt"; - FILE *pFile; - - // go through arguments - for (cur_arg = 1; cur_arg < argc; cur_arg++) { - // check if -file - if (strcmp(argv[cur_arg], "file") == 0) { - // check if value provided - if (argc >= cur_arg + 1) { - input_file = argv[cur_arg + 1]; - cur_arg = cur_arg + 1; - // value is not a number - } - // value not provided - else { - printf("ERROR: Missing value to -file parameter\n"); - return -1; - } - } else if (strcmp(argv[cur_arg], "command") == 0) { - // check if value provided - if (argc >= cur_arg + 1) { - command_file = argv[cur_arg + 1]; - cur_arg = cur_arg + 1; - // value is not a number - } - // value not provided - else { - printf("ERROR: Missing value to command parameter\n"); - return -1; - } - } - } - // Print configuration - if ((input_file == NULL) || (command_file == NULL)) - printf("Usage: ./b+tree file input_file command command_list\n"); - - // For debug - printf("Input File: %s \n", input_file); - printf("Command File: %s \n", command_file); - - FILE *commandFile; - long lSize; - char *commandBuffer; - size_t result; - - commandFile = fopen(command_file, "rb"); - if (commandFile == NULL) { - fputs("Command File error", stderr); - exit(1); - } - - // obtain file size: - fseek(commandFile, 0, SEEK_END); - lSize = ftell(commandFile); - rewind(commandFile); - - // allocate memory to contain the whole file: - commandBuffer = (char *)malloc(sizeof(char) * lSize); - if (commandBuffer == NULL) { - fputs("Command Buffer memory error", stderr); - exit(2); - } - - // copy the file into the buffer: - result = fread(commandBuffer, 1, lSize, commandFile); - if (result != lSize) { - fputs("Command file reading error", stderr); - exit(3); - } - - /* the whole file is now loaded in the memory buffer. */ - - // terminate - fclose(commandFile); - - // For Debug - char *sPointer = commandBuffer; - printf("Command Buffer: \n"); - printf("%s", commandBuffer); - // - - pFile = fopen(output, "w+"); - if (pFile == NULL) - fputs("Fail to open %s !\n", output); - fprintf(pFile, "******starting******\n"); - fclose(pFile); - - // ------------------------------------------------------------60 - // general variables - // ------------------------------------------------------------60 - - FILE *file_pointer; - node *root; - root = NULL; - record *r; - int input; - char instruction; - order = DEFAULT_ORDER; - verbose_output = false; - - // usage_1(); - // usage_2(); - - // ------------------------------------------------------------60 - // get input from file, if file provided - // ------------------------------------------------------------60 - - if (input_file != NULL) { - - printf("Getting input from file %s...\n", input_file); - - // open input file - file_pointer = fopen(input_file, "r"); - if (file_pointer == NULL) { - perror("Failure to open input file."); - exit(EXIT_FAILURE); - } - - // get # of numbers in the file - fscanf(file_pointer, "%d\n", &input); - size = input; - - // save all numbers - while (!feof(file_pointer)) { - fscanf(file_pointer, "%d\n", &input); - root = insert(root, input, input); - } - - // close file - fclose(file_pointer); - // print_tree(root); - // printf("Height of tree = %d\n", height(root)); - - } else { - printf("ERROR: Argument -file missing\n"); - return 0; - } - - // ------------------------------------------------------------60 - // get tree statistics - // ------------------------------------------------------------60 - - printf("Transforming data to a GPU suitable structure...\n"); - long mem_used = transform_to_cuda(root, 0); - maxheight = height(root); - long rootLoc = (long)knodes - (long)mem; - - // ------------------------------------------------------------60 - // process commands - // ------------------------------------------------------------60 - char *commandPointer = commandBuffer; - - printf("Waiting for command\n"); - printf("> "); - while (sscanf(commandPointer, "%c", &instruction) != EOF) { - commandPointer++; - switch (instruction) { - // ----------------------------------------40 - // Insert - // ----------------------------------------40 - - case 'i': { - scanf("%d", &input); - while (getchar() != (int)'\n') - ; - root = insert(root, input, input); - print_tree(root); - break; - } - - // ----------------------------------------40 - // n/a - // ----------------------------------------40 - - case 'f': { - } - - // ----------------------------------------40 - // find - // ----------------------------------------40 - - case 'p': { - scanf("%d", &input); - while (getchar() != (int)'\n') - ; - r = find(root, input, instruction == 'p'); - if (r == NULL) - printf("Record not found under key %d.\n", input); - else - printf("Record found: %d\n", r->value); - break; - } - - // ----------------------------------------40 - // delete value - // ----------------------------------------40 - - case 'd': { - scanf("%d", &input); - while (getchar() != (int)'\n') - ; - root = (node *)deleteVal(root, input); - print_tree(root); - break; - } - - // ----------------------------------------40 - // destroy tree - // ----------------------------------------40 - - case 'x': { - while (getchar() != (int)'\n') - ; - root = destroy_tree(root); - print_tree(root); - break; - } - - // ----------------------------------------40 - // print leaves - // ----------------------------------------40 - - case 'l': { - while (getchar() != (int)'\n') - ; - print_leaves(root); - break; - } - - // ----------------------------------------40 - // print tree - // ----------------------------------------40 - - case 't': { - while (getchar() != (int)'\n') - ; - print_tree(root); - break; - } - - // ----------------------------------------40 - // toggle verbose output - // ----------------------------------------40 - - case 'v': { - while (getchar() != (int)'\n') - ; - verbose_output = !verbose_output; - break; - } - - // ----------------------------------------40 - // quit - // ----------------------------------------40 - - case 'q': { - while (getchar() != (int)'\n') - ; - return EXIT_SUCCESS; - } - - // ----------------------------------------40 - // [GPU] find K (initK, findK) - // ----------------------------------------40 - - case 'k': { - - // get # of queries from user - int count; - sscanf(commandPointer, "%d", &count); - while (*commandPointer != 32 && commandPointer != '\n') - commandPointer++; - - printf("\n ******command: k count=%d \n", count); - if (count > 65535) { - printf("ERROR: Number of requested querries should be 65,535 at most. " - "(limited by # of CUDA blocks)\n"); - exit(0); - } - - // INPUT: records CPU allocation (setting pointer in mem variable) - record *records = (record *)mem; - long records_elem = (long)rootLoc / sizeof(record); - long records_mem = (long)rootLoc; - printf("records_elem=%d, records_unit_mem=%d, records_mem=%d\n", - (int)records_elem, (int)sizeof(record), (int)records_mem); - - // INPUT: knodes CPU allocation (setting pointer in mem variable) - knode *knodes = (knode *)((long)mem + (long)rootLoc); - long knodes_elem = ((long)(mem_used) - (long)rootLoc) / sizeof(knode); - long knodes_mem = (long)(mem_used) - (long)rootLoc; - printf("knodes_elem=%d, knodes_unit_mem=%d, knodes_mem=%d\n", - (int)knodes_elem, (int)sizeof(knode), (int)knodes_mem); - - // INPUT: currKnode CPU allocation - long *currKnode; - currKnode = (long *)malloc(count * sizeof(long)); - // INPUT: offset CPU initialization - memset(currKnode, 0, count * sizeof(long)); - - // INPUT: offset CPU allocation - long *offset; - offset = (long *)malloc(count * sizeof(long)); - // INPUT: offset CPU initialization - memset(offset, 0, count * sizeof(long)); - - // INPUT: keys CPU allocation - int *keys; - keys = (int *)malloc(count * sizeof(int)); - // INPUT: keys CPU initialization - int i; - for (i = 0; i < count; i++) { - keys[i] = (rand() / (float)RAND_MAX) * size; - } - - // OUTPUT: ans CPU allocation - record *ans = (record *)malloc(sizeof(record) * count); - // OUTPUT: ans CPU initialization - for (i = 0; i < count; i++) { - ans[i].value = -1; - } - - // CUDA kernel - kernel_gpu_cuda_wrapper(records, records_mem, knodes, knodes_elem, - knodes_mem, - - order, maxheight, count, - - currKnode, offset, keys, ans); - - /* printf("ans: \n"); */ - /* for(i = 0; i < count; i++){ */ - /* printf("%d ",ans[i].value); */ - /* } */ - - /* printf(" \n"); */ - - pFile = fopen(output, "aw+"); - if (pFile == NULL) { - fputs("Fail to open %s !\n", output); - } - - fprintf(pFile, "\n ******command: k count=%d \n", count); - for (i = 0; i < count; i++) { - fprintf(pFile, "%d %d\n", i, ans[i].value); - } - fprintf(pFile, " \n"); - fclose(pFile); - - // free memory - free(currKnode); - free(offset); - free(keys); - free(ans); - - // break out of case - break; - } - - // ----------------------------------------40 - // find range - // ----------------------------------------40 - - case 'r': { - int start, end; - scanf("%d", &start); - scanf("%d", &end); - if (start > end) { - input = start; - start = end; - end = input; - } - printf("For range %d to %d, ", start, end); - list_t *ansList; - ansList = findRange(root, start, end); - printf("%d records found\n", list_get_length(ansList)); - // list_iterator_t iter; - free(ansList); - break; - } - - // ----------------------------------------40 - // [GPU] find Range K (initK, findRangeK) - // ----------------------------------------40 - - case 'j': { - - // get # of queries from user - int count; - sscanf(commandPointer, "%d", &count); - while (*commandPointer != 32 && commandPointer != '\n') - commandPointer++; - - int rSize; - sscanf(commandPointer, "%d", &rSize); - while (*commandPointer != 32 && commandPointer != '\n') - commandPointer++; - - printf("\n******command: j count=%d, rSize=%d \n", count, rSize); - if (rSize > size || rSize < 0) { - printf("Search range size is larger than data set size %d.\n", - (int)size); - exit(0); - } - - // INPUT: knodes CPU allocation (setting pointer in mem variable) - knode *knodes = (knode *)((long)mem + (long)rootLoc); - long knodes_elem = ((long)(mem_used) - (long)rootLoc) / sizeof(knode); - long knodes_mem = (long)(mem_used) - (long)rootLoc; - printf("knodes_elem=%d, knodes_unit_mem=%d, knodes_mem=%d\n", - (int)knodes_elem, (int)sizeof(knode), (int)knodes_mem); - - // INPUT: currKnode CPU allocation - long *currKnode; - currKnode = (long *)malloc(count * sizeof(long)); - // INPUT: offset CPU initialization - memset(currKnode, 0, count * sizeof(long)); - - // INPUT: offset CPU allocation - long *offset; - offset = (long *)malloc(count * sizeof(long)); - // INPUT: offset CPU initialization - memset(offset, 0, count * sizeof(long)); - - // INPUT: lastKnode CPU allocation - long *lastKnode; - lastKnode = (long *)malloc(count * sizeof(long)); - // INPUT: offset CPU initialization - memset(lastKnode, 0, count * sizeof(long)); - - // INPUT: offset_2 CPU allocation - long *offset_2; - offset_2 = (long *)malloc(count * sizeof(long)); - // INPUT: offset CPU initialization - memset(offset_2, 0, count * sizeof(long)); - - // INPUT: start, end CPU allocation - int *start; - start = (int *)malloc(count * sizeof(int)); - int *end; - end = (int *)malloc(count * sizeof(int)); - // INPUT: start, end CPU initialization - int i; - for (i = 0; i < count; i++) { - start[i] = (rand() / (float)RAND_MAX) * size; - end[i] = start[i] + rSize; - if (end[i] >= size) { - start[i] = start[i] - (end[i] - size); - end[i] = size - 1; - } - } - - // INPUT: recstart, reclenght CPU allocation - int *recstart; - recstart = (int *)malloc(count * sizeof(int)); - int *reclength; - reclength = (int *)malloc(count * sizeof(int)); - // OUTPUT: ans CPU initialization - for (i = 0; i < count; i++) { - recstart[i] = 0; - reclength[i] = 0; - } - - // CUDA kernel - kernel_gpu_cuda_wrapper_2(knodes, knodes_elem, knodes_mem, - - order, maxheight, count, - - currKnode, offset, lastKnode, offset_2, start, - end, recstart, reclength); - - pFile = fopen(output, "aw+"); - if (pFile == NULL) { - fputs("Fail to open %s !\n", output); - } - - fprintf(pFile, "\n******command: j count=%d, rSize=%d \n", count, rSize); - for (i = 0; i < count; i++) { - fprintf(pFile, "%d %d %d\n", i, recstart[i], reclength[i]); - } - fprintf(pFile, " \n"); - fclose(pFile); - - // free memory - free(currKnode); - free(offset); - free(lastKnode); - free(offset_2); - free(start); - free(end); - free(recstart); - free(reclength); - - // break out of case - break; - } - - // ----------------------------------------40 - // default - // ----------------------------------------40 - - default: { - - // usage_2(); - break; - } - } - printf("> "); - } - printf("\n"); - - // ------------------------------------------------------------60 - // free remaining memory and exit - // ------------------------------------------------------------60 - - free(mem); - return EXIT_SUCCESS; -} - -//========================================================================================================================================================================================================200 -// END -//========================================================================================================================================================================================================200 - -// # ifdef __cplusplus -// } -// # endif diff --git a/examples/btree/run.sh b/examples/btree/run.sh deleted file mode 100755 index bcc5f79..0000000 --- a/examples/btree/run.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash -set -e -clang -c -emit-llvm util/timer/timer.c -clang -c -emit-llvm util/num/num.c -#clang -c -emit-llvm util/cuda/cuda.cu --cuda-gpu-arch=sm_61 -#clang -c -emit-llvm kernel/kernel_gpu_cuda_wrapper.cu --cuda-gpu-arch=sm_61 -#clang++ kernel/kernel_gpu_cuda_wrapper.cu --cuda-path=/usr/local/cuda-10.1 --cuda-gpu-arch=sm_61 -L/usr/local/cuda-10.1/lib64 -lcudart_static -ldl -lrt -pthread -save-temps -v -#clang++ kernel/kernel_gpu_cuda_wrapper_2.cu --cuda-path=/usr/local/cuda-10.1 --cuda-gpu-arch=sm_61 -L/usr/local/cuda-10.1/lib64 -lcudart_static -ldl -lrt -pthread -save-temps -v -clang -c -emit-llvm main.c - -llvm-as kernel_gpu_cuda_wrapper-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as kernel_gpu_cuda_wrapper_2-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as kernel_gpu_cuda_wrapper-host-x86_64-unknown-linux-gnu.ll -llvm-as kernel_gpu_cuda_wrapper_2-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator kernel_gpu_cuda_wrapper-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel1.bc -../../build/compilation/kernelTranslator kernel_gpu_cuda_wrapper_2-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel2.bc -../../build/compilation/hostTranslator kernel_gpu_cuda_wrapper-host-x86_64-unknown-linux-gnu.bc host1.bc -../../build/compilation/hostTranslator kernel_gpu_cuda_wrapper_2-host-x86_64-unknown-linux-gnu.bc host2.bc - -llc --relocation-model=pic --filetype=obj main.bc -llc --relocation-model=pic --filetype=obj cuda.bc -llc --relocation-model=pic --filetype=obj num.bc -llc --relocation-model=pic --filetype=obj timer.bc -llc --relocation-model=pic --filetype=obj kernel1.bc -llc --relocation-model=pic --filetype=obj kernel2.bc -llc --relocation-model=pic --filetype=obj host1.bc -llc --relocation-model=pic --filetype=obj host2.bc -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -g++ -Wall -L../../build/runtime -L../../build/runtime/threadPool -o b+tree.out \ - -fPIC -no-pie main.o host1.o host2.o kernel1.o kernel2.o cuda.o num.o timer.o \ - -lc -lx86Runtime -lthreadPool -lpthread - -./b+tree.out file ../../rodinia-data/b+tree/mil.txt \ - command ../../rodinia-data/b+tree/command.txt -if grep -q "0 840187 6001" output.txt; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/btree/util/cuda/cuda.cu b/examples/btree/util/cuda/cuda.cu deleted file mode 100755 index dafe6a0..0000000 --- a/examples/btree/util/cuda/cuda.cu +++ /dev/null @@ -1,75 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//===============================================================================================================================================================================================================200 -// SET_DEVICE CODE -//===============================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// INCLUDE/DEFINE -//======================================================================================================================================================150 - -#include "cuda.h" // (in library path specified to compiler) - -//======================================================================================================================================================150 -// FUNCTIONS -//======================================================================================================================================================150 - -//====================================================================================================100 -// SET DEVICE -//====================================================================================================100 - -void setdevice(void){ - - // variables - int num_devices; - int device; - - // work - cudaGetDeviceCount(&num_devices); - if (num_devices > 1) { - - // variables - int max_multiprocessors; - int max_device; - cudaDeviceProp properties; - - // initialize variables - max_multiprocessors = 0; - max_device = 0; - - for (device = 0; device < num_devices; device++) { - cudaGetDeviceProperties(&properties, device); - if (max_multiprocessors < properties.multiProcessorCount) { - max_multiprocessors = properties.multiProcessorCount; - max_device = device; - } - } - cudaSetDevice(max_device); - } - -} - -//====================================================================================================100 -// GET LAST ERROR -//====================================================================================================100 - -void checkCUDAError(const char *msg) -{ - cudaError_t err = cudaGetLastError(); - if( cudaSuccess != err) { - // fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); - printf("Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) ); - fflush(NULL); - exit(EXIT_FAILURE); - } -} - -//===============================================================================================================================================================================================================200 -// END -//===============================================================================================================================================================================================================200 - -#ifdef __cplusplus -} -#endif diff --git a/examples/btree/util/cuda/cuda.h b/examples/btree/util/cuda/cuda.h deleted file mode 100644 index b5ce6dc..0000000 --- a/examples/btree/util/cuda/cuda.h +++ /dev/null @@ -1,37 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//===============================================================================================================================================================================================================200 -// SET_DEVICE HEADER -//===============================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// INCLUDE/DEFINE -//======================================================================================================================================================150 - -#include // (in library path known to compiler) needed by printf - -//======================================================================================================================================================150 -// FUNCTION PROTOTYPES -//======================================================================================================================================================150 - -//====================================================================================================100 -// SET DEVICE -//====================================================================================================100 - -void setdevice(void); - -//====================================================================================================100 -// GET LAST ERROR -//====================================================================================================100 - -void checkCUDAError(const char *msg); - -//===============================================================================================================================================================================================================200 -// END SET_DEVICE HEADER -//===============================================================================================================================================================================================================200 - -#ifdef __cplusplus -} -#endif diff --git a/examples/btree/util/num/num.c b/examples/btree/util/num/num.c deleted file mode 100644 index 3b3a452..0000000 --- a/examples/btree/util/num/num.c +++ /dev/null @@ -1,55 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//===============================================================================================================================================================================================================200 -// DESCRIPTION -//===============================================================================================================================================================================================================200 - -// Returns: 0 if string does not represent integer -// 1 if string represents integer - -//===============================================================================================================================================================================================================200 -// NUM CODE -//===============================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// ISINTEGER FUNCTION -//======================================================================================================================================================150 - -int isInteger(char *str) { - - //====================================================================================================100 - // make sure it's not empty - //====================================================================================================100 - - if (*str == '\0') { - return 0; - } - - //====================================================================================================100 - // if any digit is not a number, return false - //====================================================================================================100 - - for (; *str != '\0'; str++) { - if (*str < 48 || - *str > - 57) { // digit characters (need to include . if checking for float) - return 0; - } - } - - //====================================================================================================100 - // it got past all my checks so I think it's a number - //====================================================================================================100 - - return 1; -} - -//===============================================================================================================================================================================================================200 -// END NUM CODE -//===============================================================================================================================================================================================================200 - -#ifdef __cplusplus -} -#endif diff --git a/examples/btree/util/num/num.h b/examples/btree/util/num/num.h deleted file mode 100755 index 27a5e42..0000000 --- a/examples/btree/util/num/num.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//===============================================================================================================================================================================================================200 -// FILE HEADER -//===============================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// ISINTEGER FUNCTION PROTOTYPE -//======================================================================================================================================================150 - -int isInteger(char *str); - -//===============================================================================================================================================================================================================200 -// END FILE HEADER -//===============================================================================================================================================================================================================200 - -#ifdef __cplusplus -} -#endif diff --git a/examples/btree/util/timer/timer.c b/examples/btree/util/timer/timer.c deleted file mode 100644 index b6aace4..0000000 --- a/examples/btree/util/timer/timer.c +++ /dev/null @@ -1,36 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//===============================================================================================================================================================================================================200 -// TIMER CODE -//===============================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// INCLUDE/DEFINE -//======================================================================================================================================================150 - -#include - -//======================================================================================================================================================150 -// FUNCTIONS -//======================================================================================================================================================150 - -//====================================================================================================100 -// DISPLAY TIME -//====================================================================================================100 - -// Returns the current system time in microseconds -long long get_time() { - struct timeval tv; - gettimeofday(&tv, NULL); - return (tv.tv_sec * 1000000) + tv.tv_usec; -} - -//===============================================================================================================================================================================================================200 -// END TIMER CODE -//===============================================================================================================================================================================================================200 - -#ifdef __cplusplus -} -#endif diff --git a/examples/btree/util/timer/timer.h b/examples/btree/util/timer/timer.h deleted file mode 100644 index 1744df4..0000000 --- a/examples/btree/util/timer/timer.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//===============================================================================================================================================================================================================200 -// TIMER HEADER -//===============================================================================================================================================================================================================200 - -//======================================================================================================================================================150 -// FUNCTION PROTOTYPES -//======================================================================================================================================================150 - -long long get_time(); - -//===============================================================================================================================================================================================================200 -// END TIMER HEADER -//===============================================================================================================================================================================================================200 - -#ifdef __cplusplus -} -#endif diff --git a/examples/cfd/euler3d.cu b/examples/cfd/euler3d.cu deleted file mode 100755 index ddaa774..0000000 --- a/examples/cfd/euler3d.cu +++ /dev/null @@ -1,662 +0,0 @@ -#include -#include -#include -#include - -/* - * Options - * - */ -#define GAMMA 1.4f -#define iterations 2 -// #ifndef block_length -// #define block_length 192 -// #endif - -#define NDIM 3 -#define NNB 4 - -#define RK 3 // 3rd order RK -#define ff_mach 1.2f -#define deg_angle_of_attack 0.0f - -/* - * not options - */ - -#ifdef RD_WG_SIZE_0_0 -#define BLOCK_SIZE_0 RD_WG_SIZE_0_0 -#elif defined(RD_WG_SIZE_0) -#define BLOCK_SIZE_0 RD_WG_SIZE_0 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE_0 RD_WG_SIZE -#else -#define BLOCK_SIZE_0 192 -#endif - -#ifdef RD_WG_SIZE_1_0 -#define BLOCK_SIZE_1 RD_WG_SIZE_1_0 -#elif defined(RD_WG_SIZE_1) -#define BLOCK_SIZE_1 RD_WG_SIZE_1 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE_1 RD_WG_SIZE -#else -#define BLOCK_SIZE_1 192 -#endif - -#ifdef RD_WG_SIZE_2_0 -#define BLOCK_SIZE_2 RD_WG_SIZE_2_0 -#elif defined(RD_WG_SIZE_1) -#define BLOCK_SIZE_2 RD_WG_SIZE_2 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE_2 RD_WG_SIZE -#else -#define BLOCK_SIZE_2 192 -#endif - -#ifdef RD_WG_SIZE_3_0 -#define BLOCK_SIZE_3 RD_WG_SIZE_3_0 -#elif defined(RD_WG_SIZE_3) -#define BLOCK_SIZE_3 RD_WG_SIZE_3 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE_3 RD_WG_SIZE -#else -#define BLOCK_SIZE_3 192 -#endif - -#ifdef RD_WG_SIZE_4_0 -#define BLOCK_SIZE_4 RD_WG_SIZE_4_0 -#elif defined(RD_WG_SIZE_4) -#define BLOCK_SIZE_4 RD_WG_SIZE_4 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE_4 RD_WG_SIZE -#else -#define BLOCK_SIZE_4 192 -#endif - -// #if block_length > 128 -// #warning "the kernels may fail too launch on some systems if the block length -// is too large" #endif - -#define VAR_DENSITY 0 -#define VAR_MOMENTUM 1 -#define VAR_DENSITY_ENERGY (VAR_MOMENTUM + NDIM) -#define NVAR (VAR_DENSITY_ENERGY + 1) - -/* - * Generic functions - */ -template T *alloc(int N) { - T *t; - checkCudaErrors(cudaMalloc((void **)&t, sizeof(T) * N)); - return t; -} - -template void dealloc(T *array) { - checkCudaErrors(cudaFree((void *)array)); -} - -template void copy(T *dst, T *src, int N) { - checkCudaErrors(cudaMemcpy((void *)dst, (void *)src, N * sizeof(T), - cudaMemcpyDeviceToDevice)); -} - -template void upload(T *dst, T *src, int N) { - checkCudaErrors(cudaMemcpy((void *)dst, (void *)src, N * sizeof(T), - cudaMemcpyHostToDevice)); -} - -template void download(T *dst, T *src, int N) { - checkCudaErrors(cudaMemcpy((void *)dst, (void *)src, N * sizeof(T), - cudaMemcpyDeviceToHost)); -} - -void dump(float *variables, int nel, int nelr) { - float *h_variables = new float[nelr * NVAR]; - download(h_variables, variables, nelr * NVAR); - - { - std::ofstream file("density"); - file << nel << " " << nelr << std::endl; - for (int i = 0; i < nel; i++) - file << h_variables[i + VAR_DENSITY * nelr] << std::endl; - } - - { - std::ofstream file("momentum"); - file << nel << " " << nelr << std::endl; - for (int i = 0; i < nel; i++) { - for (int j = 0; j != NDIM; j++) - file << h_variables[i + (VAR_MOMENTUM + j) * nelr] << " "; - file << std::endl; - } - } - - { - std::ofstream file("density_energy"); - file << nel << " " << nelr << std::endl; - for (int i = 0; i < nel; i++) - file << h_variables[i + VAR_DENSITY_ENERGY * nelr] << std::endl; - } - delete[] h_variables; -} - -/* - * Element-based Cell-centered FVM solver functions - */ -__constant__ float ff_variable[NVAR]; -__constant__ float3 ff_flux_contribution_momentum_x[1]; -__constant__ float3 ff_flux_contribution_momentum_y[1]; -__constant__ float3 ff_flux_contribution_momentum_z[1]; -__constant__ float3 ff_flux_contribution_density_energy[1]; - -__global__ void cuda_initialize_variables(int nelr, float *variables) { - const int i = (blockDim.x * blockIdx.x + threadIdx.x); - for (int j = 0; j < NVAR; j++) - variables[i + j * nelr] = ff_variable[j]; -} -void initialize_variables(int nelr, float *variables) { - dim3 Dg(nelr / BLOCK_SIZE_1), Db(BLOCK_SIZE_1); - cuda_initialize_variables<<>>(nelr, variables); - getLastCudaError("initialize_variables failed"); -} - -__device__ __host__ inline void compute_flux_contribution( - float &density, float3 &momentum, float &density_energy, float &pressure, - float3 &velocity, float3 &fc_momentum_x, float3 &fc_momentum_y, - float3 &fc_momentum_z, float3 &fc_density_energy) { - fc_momentum_x.x = velocity.x * momentum.x + pressure; - fc_momentum_x.y = velocity.x * momentum.y; - fc_momentum_x.z = velocity.x * momentum.z; - - fc_momentum_y.x = fc_momentum_x.y; - fc_momentum_y.y = velocity.y * momentum.y + pressure; - fc_momentum_y.z = velocity.y * momentum.z; - - fc_momentum_z.x = fc_momentum_x.z; - fc_momentum_z.y = fc_momentum_y.z; - fc_momentum_z.z = velocity.z * momentum.z + pressure; - - float de_p = density_energy + pressure; - fc_density_energy.x = velocity.x * de_p; - fc_density_energy.y = velocity.y * de_p; - fc_density_energy.z = velocity.z * de_p; -} - -__device__ inline void compute_velocity(float &density, float3 &momentum, - float3 &velocity) { - velocity.x = momentum.x / density; - velocity.y = momentum.y / density; - velocity.z = momentum.z / density; -} - -__device__ inline float compute_speed_sqd(float3 &velocity) { - return velocity.x * velocity.x + velocity.y * velocity.y + - velocity.z * velocity.z; -} - -__device__ inline float compute_pressure(float &density, float &density_energy, - float &speed_sqd) { - return (float(GAMMA) - float(1.0f)) * - (density_energy - float(0.5f) * density * speed_sqd); -} - -__device__ inline float compute_speed_of_sound(float &density, - float &pressure) { - return sqrtf(float(GAMMA) * pressure / density); -} - -__global__ void cuda_compute_step_factor(int nelr, float *variables, - float *areas, float *step_factors) { - const int i = (blockDim.x * blockIdx.x + threadIdx.x); - - float density = variables[i + VAR_DENSITY * nelr]; - float3 momentum; - momentum.x = variables[i + (VAR_MOMENTUM + 0) * nelr]; - momentum.y = variables[i + (VAR_MOMENTUM + 1) * nelr]; - momentum.z = variables[i + (VAR_MOMENTUM + 2) * nelr]; - - float density_energy = variables[i + VAR_DENSITY_ENERGY * nelr]; - - float3 velocity; - compute_velocity(density, momentum, velocity); - float speed_sqd = compute_speed_sqd(velocity); - float pressure = compute_pressure(density, density_energy, speed_sqd); - float speed_of_sound = compute_speed_of_sound(density, pressure); - - // dt = float(0.5f) * sqrtf(areas[i]) / (||v|| + c).... but when we do time - // stepping, this later would need to be divided by the area, so we just do it - // all at once - step_factors[i] = - float(0.5f) / (sqrtf(areas[i]) * (sqrtf(speed_sqd) + speed_of_sound)); -} -void compute_step_factor(int nelr, float *variables, float *areas, - float *step_factors) { - dim3 Dg(nelr / BLOCK_SIZE_2), Db(BLOCK_SIZE_2); - cuda_compute_step_factor<<>>(nelr, variables, areas, step_factors); - getLastCudaError("compute_step_factor failed"); -} - -/* - * - * - */ -__global__ void cuda_compute_flux(int nelr, int *elements_surrounding_elements, - float *normals, float *variables, - float *fluxes) { - const float smoothing_coefficient = float(0.2f); - const int i = (blockDim.x * blockIdx.x + threadIdx.x); - - int j, nb; - float3 normal; - float normal_len; - float factor; - - float density_i = variables[i + VAR_DENSITY * nelr]; - float3 momentum_i; - momentum_i.x = variables[i + (VAR_MOMENTUM + 0) * nelr]; - momentum_i.y = variables[i + (VAR_MOMENTUM + 1) * nelr]; - momentum_i.z = variables[i + (VAR_MOMENTUM + 2) * nelr]; - - float density_energy_i = variables[i + VAR_DENSITY_ENERGY * nelr]; - - float3 velocity_i; - compute_velocity(density_i, momentum_i, velocity_i); - float speed_sqd_i = compute_speed_sqd(velocity_i); - float speed_i = sqrtf(speed_sqd_i); - float pressure_i = compute_pressure(density_i, density_energy_i, speed_sqd_i); - float speed_of_sound_i = compute_speed_of_sound(density_i, pressure_i); - float3 flux_contribution_i_momentum_x, flux_contribution_i_momentum_y, - flux_contribution_i_momentum_z; - float3 flux_contribution_i_density_energy; - compute_flux_contribution( - density_i, momentum_i, density_energy_i, pressure_i, velocity_i, - flux_contribution_i_momentum_x, flux_contribution_i_momentum_y, - flux_contribution_i_momentum_z, flux_contribution_i_density_energy); - - float flux_i_density = float(0.0f); - float3 flux_i_momentum; - flux_i_momentum.x = float(0.0f); - flux_i_momentum.y = float(0.0f); - flux_i_momentum.z = float(0.0f); - float flux_i_density_energy = float(0.0f); - - float3 velocity_nb; - float density_nb, density_energy_nb; - float3 momentum_nb; - float3 flux_contribution_nb_momentum_x, flux_contribution_nb_momentum_y, - flux_contribution_nb_momentum_z; - float3 flux_contribution_nb_density_energy; - float speed_sqd_nb, speed_of_sound_nb, pressure_nb; - -#pragma unroll - for (j = 0; j < NNB; j++) { - nb = elements_surrounding_elements[i + j * nelr]; - normal.x = normals[i + (j + 0 * NNB) * nelr]; - normal.y = normals[i + (j + 1 * NNB) * nelr]; - normal.z = normals[i + (j + 2 * NNB) * nelr]; - normal_len = - sqrtf(normal.x * normal.x + normal.y * normal.y + normal.z * normal.z); - - if (nb >= 0) // a legitimate neighbor - { - density_nb = variables[nb + VAR_DENSITY * nelr]; - momentum_nb.x = variables[nb + (VAR_MOMENTUM + 0) * nelr]; - momentum_nb.y = variables[nb + (VAR_MOMENTUM + 1) * nelr]; - momentum_nb.z = variables[nb + (VAR_MOMENTUM + 2) * nelr]; - density_energy_nb = variables[nb + VAR_DENSITY_ENERGY * nelr]; - compute_velocity(density_nb, momentum_nb, velocity_nb); - speed_sqd_nb = compute_speed_sqd(velocity_nb); - pressure_nb = - compute_pressure(density_nb, density_energy_nb, speed_sqd_nb); - speed_of_sound_nb = compute_speed_of_sound(density_nb, pressure_nb); - compute_flux_contribution( - density_nb, momentum_nb, density_energy_nb, pressure_nb, velocity_nb, - flux_contribution_nb_momentum_x, flux_contribution_nb_momentum_y, - flux_contribution_nb_momentum_z, flux_contribution_nb_density_energy); - - // artificial viscosity - factor = -normal_len * smoothing_coefficient * float(0.5f) * - (speed_i + sqrtf(speed_sqd_nb) + speed_of_sound_i + - speed_of_sound_nb); - flux_i_density += factor * (density_i - density_nb); - flux_i_density_energy += factor * (density_energy_i - density_energy_nb); - flux_i_momentum.x += factor * (momentum_i.x - momentum_nb.x); - flux_i_momentum.y += factor * (momentum_i.y - momentum_nb.y); - flux_i_momentum.z += factor * (momentum_i.z - momentum_nb.z); - - // accumulate cell-centered fluxes - factor = float(0.5f) * normal.x; - flux_i_density += factor * (momentum_nb.x + momentum_i.x); - flux_i_density_energy += factor * (flux_contribution_nb_density_energy.x + - flux_contribution_i_density_energy.x); - flux_i_momentum.x += factor * (flux_contribution_nb_momentum_x.x + - flux_contribution_i_momentum_x.x); - flux_i_momentum.y += factor * (flux_contribution_nb_momentum_y.x + - flux_contribution_i_momentum_y.x); - flux_i_momentum.z += factor * (flux_contribution_nb_momentum_z.x + - flux_contribution_i_momentum_z.x); - - factor = float(0.5f) * normal.y; - flux_i_density += factor * (momentum_nb.y + momentum_i.y); - flux_i_density_energy += factor * (flux_contribution_nb_density_energy.y + - flux_contribution_i_density_energy.y); - flux_i_momentum.x += factor * (flux_contribution_nb_momentum_x.y + - flux_contribution_i_momentum_x.y); - flux_i_momentum.y += factor * (flux_contribution_nb_momentum_y.y + - flux_contribution_i_momentum_y.y); - flux_i_momentum.z += factor * (flux_contribution_nb_momentum_z.y + - flux_contribution_i_momentum_z.y); - - factor = float(0.5f) * normal.z; - flux_i_density += factor * (momentum_nb.z + momentum_i.z); - flux_i_density_energy += factor * (flux_contribution_nb_density_energy.z + - flux_contribution_i_density_energy.z); - flux_i_momentum.x += factor * (flux_contribution_nb_momentum_x.z + - flux_contribution_i_momentum_x.z); - flux_i_momentum.y += factor * (flux_contribution_nb_momentum_y.z + - flux_contribution_i_momentum_y.z); - flux_i_momentum.z += factor * (flux_contribution_nb_momentum_z.z + - flux_contribution_i_momentum_z.z); - } else if (nb == -1) // a wing boundary - { - flux_i_momentum.x += normal.x * pressure_i; - flux_i_momentum.y += normal.y * pressure_i; - flux_i_momentum.z += normal.z * pressure_i; - } else if (nb == -2) // a far field boundary - { - factor = float(0.5f) * normal.x; - flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 0] + momentum_i.x); - flux_i_density_energy += - factor * (ff_flux_contribution_density_energy[0].x + - flux_contribution_i_density_energy.x); - flux_i_momentum.x += factor * (ff_flux_contribution_momentum_x[0].x + - flux_contribution_i_momentum_x.x); - flux_i_momentum.y += factor * (ff_flux_contribution_momentum_y[0].x + - flux_contribution_i_momentum_y.x); - flux_i_momentum.z += factor * (ff_flux_contribution_momentum_z[0].x + - flux_contribution_i_momentum_z.x); - - factor = float(0.5f) * normal.y; - flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 1] + momentum_i.y); - flux_i_density_energy += - factor * (ff_flux_contribution_density_energy[0].y + - flux_contribution_i_density_energy.y); - flux_i_momentum.x += factor * (ff_flux_contribution_momentum_x[0].y + - flux_contribution_i_momentum_x.y); - flux_i_momentum.y += factor * (ff_flux_contribution_momentum_y[0].y + - flux_contribution_i_momentum_y.y); - flux_i_momentum.z += factor * (ff_flux_contribution_momentum_z[0].y + - flux_contribution_i_momentum_z.y); - - factor = float(0.5f) * normal.z; - flux_i_density += factor * (ff_variable[VAR_MOMENTUM + 2] + momentum_i.z); - flux_i_density_energy += - factor * (ff_flux_contribution_density_energy[0].z + - flux_contribution_i_density_energy.z); - flux_i_momentum.x += factor * (ff_flux_contribution_momentum_x[0].z + - flux_contribution_i_momentum_x.z); - flux_i_momentum.y += factor * (ff_flux_contribution_momentum_y[0].z + - flux_contribution_i_momentum_y.z); - flux_i_momentum.z += factor * (ff_flux_contribution_momentum_z[0].z + - flux_contribution_i_momentum_z.z); - } - } - - fluxes[i + VAR_DENSITY * nelr] = flux_i_density; - fluxes[i + (VAR_MOMENTUM + 0) * nelr] = flux_i_momentum.x; - fluxes[i + (VAR_MOMENTUM + 1) * nelr] = flux_i_momentum.y; - fluxes[i + (VAR_MOMENTUM + 2) * nelr] = flux_i_momentum.z; - fluxes[i + VAR_DENSITY_ENERGY * nelr] = flux_i_density_energy; -} -void compute_flux(int nelr, int *elements_surrounding_elements, float *normals, - float *variables, float *fluxes) { - dim3 Dg(nelr / BLOCK_SIZE_3), Db(BLOCK_SIZE_3); - cuda_compute_flux<<>>(nelr, elements_surrounding_elements, normals, - variables, fluxes); - getLastCudaError("compute_flux failed"); -} - -__global__ void cuda_time_step(int j, int nelr, float *old_variables, - float *variables, float *step_factors, - float *fluxes) { - const int i = (blockDim.x * blockIdx.x + threadIdx.x); - - float factor = step_factors[i] / float(RK + 1 - j); - - variables[i + VAR_DENSITY * nelr] = old_variables[i + VAR_DENSITY * nelr] + - factor * fluxes[i + VAR_DENSITY * nelr]; - variables[i + VAR_DENSITY_ENERGY * nelr] = - old_variables[i + VAR_DENSITY_ENERGY * nelr] + - factor * fluxes[i + VAR_DENSITY_ENERGY * nelr]; - variables[i + (VAR_MOMENTUM + 0) * nelr] = - old_variables[i + (VAR_MOMENTUM + 0) * nelr] + - factor * fluxes[i + (VAR_MOMENTUM + 0) * nelr]; - variables[i + (VAR_MOMENTUM + 1) * nelr] = - old_variables[i + (VAR_MOMENTUM + 1) * nelr] + - factor * fluxes[i + (VAR_MOMENTUM + 1) * nelr]; - variables[i + (VAR_MOMENTUM + 2) * nelr] = - old_variables[i + (VAR_MOMENTUM + 2) * nelr] + - factor * fluxes[i + (VAR_MOMENTUM + 2) * nelr]; -} -void time_step(int j, int nelr, float *old_variables, float *variables, - float *step_factors, float *fluxes) { - dim3 Dg(nelr / BLOCK_SIZE_4), Db(BLOCK_SIZE_4); - cuda_time_step<<>>(j, nelr, old_variables, variables, step_factors, - fluxes); - getLastCudaError("update failed"); -} - -/* - * Main function - */ -int main(int argc, char **argv) { - printf("WG size of kernel:initialize = %d, WG size of " - "kernel:compute_step_factor = %d, WG size of kernel:compute_flux = " - "%d, WG size of kernel:time_step = %d\n", - BLOCK_SIZE_1, BLOCK_SIZE_2, BLOCK_SIZE_3, BLOCK_SIZE_4); - - if (argc < 2) { - std::cout << "specify data file name" << std::endl; - return 0; - } - const char *data_file_name = argv[1]; - - cudaDeviceProp prop; - int dev; - - checkCudaErrors(cudaSetDevice(0)); - - // set far field conditions and load them into constant memory on the gpu - { - float h_ff_variable[NVAR]; - const float angle_of_attack = - float(3.1415926535897931 / 180.0f) * float(deg_angle_of_attack); - - h_ff_variable[VAR_DENSITY] = float(1.4); - - float ff_pressure = float(1.0f); - float ff_speed_of_sound = - sqrt(GAMMA * ff_pressure / h_ff_variable[VAR_DENSITY]); - float ff_speed = float(ff_mach) * ff_speed_of_sound; - - float3 ff_velocity; - ff_velocity.x = ff_speed * float(cos((float)angle_of_attack)); - ff_velocity.y = ff_speed * float(sin((float)angle_of_attack)); - ff_velocity.z = 0.0f; - - h_ff_variable[VAR_MOMENTUM + 0] = - h_ff_variable[VAR_DENSITY] * ff_velocity.x; - h_ff_variable[VAR_MOMENTUM + 1] = - h_ff_variable[VAR_DENSITY] * ff_velocity.y; - h_ff_variable[VAR_MOMENTUM + 2] = - h_ff_variable[VAR_DENSITY] * ff_velocity.z; - - h_ff_variable[VAR_DENSITY_ENERGY] = - h_ff_variable[VAR_DENSITY] * (float(0.5f) * (ff_speed * ff_speed)) + - (ff_pressure / float(GAMMA - 1.0f)); - - float3 h_ff_momentum; - h_ff_momentum.x = *(h_ff_variable + VAR_MOMENTUM + 0); - h_ff_momentum.y = *(h_ff_variable + VAR_MOMENTUM + 1); - h_ff_momentum.z = *(h_ff_variable + VAR_MOMENTUM + 2); - float3 h_ff_flux_contribution_momentum_x; - float3 h_ff_flux_contribution_momentum_y; - float3 h_ff_flux_contribution_momentum_z; - float3 h_ff_flux_contribution_density_energy; - compute_flux_contribution(h_ff_variable[VAR_DENSITY], h_ff_momentum, - h_ff_variable[VAR_DENSITY_ENERGY], ff_pressure, - ff_velocity, h_ff_flux_contribution_momentum_x, - h_ff_flux_contribution_momentum_y, - h_ff_flux_contribution_momentum_z, - h_ff_flux_contribution_density_energy); - - // copy far field conditions to the gpu - checkCudaErrors( - cudaMemcpyToSymbol(ff_variable, h_ff_variable, NVAR * sizeof(float))); - checkCudaErrors(cudaMemcpyToSymbol(ff_flux_contribution_momentum_x, - &h_ff_flux_contribution_momentum_x, - sizeof(float3))); - checkCudaErrors(cudaMemcpyToSymbol(ff_flux_contribution_momentum_y, - &h_ff_flux_contribution_momentum_y, - sizeof(float3))); - checkCudaErrors(cudaMemcpyToSymbol(ff_flux_contribution_momentum_z, - &h_ff_flux_contribution_momentum_z, - sizeof(float3))); - - checkCudaErrors(cudaMemcpyToSymbol(ff_flux_contribution_density_energy, - &h_ff_flux_contribution_density_energy, - sizeof(float3))); - } - int nel; - int nelr; - - // read in domain geometry - float *areas; - int *elements_surrounding_elements; - float *normals; - { - std::ifstream file(data_file_name); - - file >> nel; - nelr = - BLOCK_SIZE_0 * ((nel / BLOCK_SIZE_0) + std::min(1, nel % BLOCK_SIZE_0)); - - float *h_areas = new float[nelr]; - int *h_elements_surrounding_elements = new int[nelr * NNB]; - float *h_normals = new float[nelr * NDIM * NNB]; - - // read in data - for (int i = 0; i < nel; i++) { - file >> h_areas[i]; - for (int j = 0; j < NNB; j++) { - file >> h_elements_surrounding_elements[i + j * nelr]; - if (h_elements_surrounding_elements[i + j * nelr] < 0) - h_elements_surrounding_elements[i + j * nelr] = -1; - h_elements_surrounding_elements[i + j * nelr]--; // it's coming in with - // Fortran numbering - - for (int k = 0; k < NDIM; k++) { - file >> h_normals[i + (j + k * NNB) * nelr]; - h_normals[i + (j + k * NNB) * nelr] = - -h_normals[i + (j + k * NNB) * nelr]; - } - } - } - - // fill in remaining data - int last = nel - 1; - for (int i = nel; i < nelr; i++) { - h_areas[i] = h_areas[last]; - for (int j = 0; j < NNB; j++) { - // duplicate the last element - h_elements_surrounding_elements[i + j * nelr] = - h_elements_surrounding_elements[last + j * nelr]; - for (int k = 0; k < NDIM; k++) - h_normals[last + (j + k * NNB) * nelr] = - h_normals[last + (j + k * NNB) * nelr]; - } - } - - areas = alloc(nelr); - upload(areas, h_areas, nelr); - - elements_surrounding_elements = alloc(nelr * NNB); - upload(elements_surrounding_elements, h_elements_surrounding_elements, - nelr * NNB); - - normals = alloc(nelr * NDIM * NNB); - upload(normals, h_normals, nelr * NDIM * NNB); - - delete[] h_areas; - delete[] h_elements_surrounding_elements; - delete[] h_normals; - } - - // Create arrays and set initial conditions - float *variables = alloc(nelr * NVAR); - initialize_variables(nelr, variables); - - float *old_variables = alloc(nelr * NVAR); - float *fluxes = alloc(nelr * NVAR); - float *step_factors = alloc(nelr); - - // make sure all memory is floatly allocated before we start timing - initialize_variables(nelr, old_variables); - initialize_variables(nelr, fluxes); - cudaMemset((void *)step_factors, 0, sizeof(float) * nelr); - // make sure CUDA isn't still doing something before we start timing - cudaThreadSynchronize(); - - // these need to be computed the first time in order to compute time step - std::cout << "Starting..." << std::endl; - - StopWatchInterface *timer = 0; - // unsigned int timer = 0; - - // CUT_SAFE_CALL( cutCreateTimer( &timer)); - // CUT_SAFE_CALL( cutStartTimer( timer)); - sdkCreateTimer(&timer); - sdkStartTimer(&timer); - // Begin iterations - for (int i = 0; i < iterations; i++) { - copy(old_variables, variables, nelr * NVAR); - - // for the first iteration we compute the time step - compute_step_factor(nelr, variables, areas, step_factors); - getLastCudaError("compute_step_factor failed"); - - for (int j = 0; j < RK; j++) { - compute_flux(nelr, elements_surrounding_elements, normals, variables, - fluxes); - getLastCudaError("compute_flux failed"); - time_step(j, nelr, old_variables, variables, step_factors, fluxes); - getLastCudaError("time_step failed"); - } - } - - cudaThreadSynchronize(); - // CUT_SAFE_CALL( cutStopTimer(timer) ); - sdkStopTimer(&timer); - - std::cout << (sdkGetAverageTimerValue(&timer) / 1000.0) / iterations - << " seconds per iteration" << std::endl; - - std::cout << "Saving solution..." << std::endl; - dump(variables, nel, nelr); - std::cout << "Saved solution..." << std::endl; - - std::cout << "Cleaning up..." << std::endl; - dealloc(areas); - dealloc(elements_surrounding_elements); - dealloc(normals); - - dealloc(variables); - dealloc(old_variables); - dealloc(fluxes); - dealloc(step_factors); - - std::cout << "Done..." << std::endl; - - return 0; -} diff --git a/examples/cfd/run.sh b/examples/cfd/run.sh deleted file mode 100644 index bc5d506..0000000 --- a/examples/cfd/run.sh +++ /dev/null @@ -1,15 +0,0 @@ -# # #!/bin/bash -clang++ euler3d.cu -I/usr/local/cuda-10.1/samples/common/inc --cuda-path=/usr/local/cuda-10.1 --cuda-gpu-arch=sm_50 -L/usr/local/cuda-10.1/lib64 -lcudart_static -ldl -lrt -pthread -save-temps -v - -/home/robinhan/repo/open_source_template/build/compilation/kernelTranslator euler3d-cuda-nvptx64-nvidia-cuda-sm_50.bc kernel.bc -/home/robinhan/repo/open_source_template/build/compilation/hostTranslator euler3d-host-x86_64-unknown-linux-gnu.bc host.bc - -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - -g++ -Wall -L/home/robinhan/repo/open_source_template/build/runtime -L/home/robinhan/repo/open_source_template/build/runtime/threadPool -o a.out -fPIC -no-pie host.o kernel.o -lc -lx86Runtime -lthreadPool -lpthread - -./a.out ../rodinia-data/cfd/fvcorr.domn.097K -# ./demo 1024 -# # # ./demo -f ../../data/matrix3.txt -# # # run -f ../../data/gaussian/matrix3.txt diff --git a/examples/dwt2d/common.h b/examples/dwt2d/common.h deleted file mode 100644 index ac276a1..0000000 --- a/examples/dwt2d/common.h +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2009, Jiri Matela - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _COMMON_H -#define _COMMON_H - -// 24-bit multiplication is faster on G80, -// but we must be sure to multiply integers -// only within [-8M, 8M - 1] range -#define IMUL(a, b) __mul24(a, b) - -////cuda timing macros -//#define CTIMERINIT cudaEvent_t cstart, cstop; \ -// cudaEventCreate(&cstart); \ -// cudaEventCreate(&cstop); \ -// float elapsedTime -//#define CTIMERSTART(cstart) cudaEventRecord(cstart,0) -//#define CTIMERSTOP(cstop) cudaEventRecord(cstop,0); \ -// cudaEventSynchronize(cstop); \ -// cudaEventElapsedTime(&elapsedTime, cstart, cstop) - -// divide and round up macro -#define DIVANDRND(a, b) ((((a) % (b)) != 0) ? ((a) / (b) + 1) : ((a) / (b))) - -#define cudaCheckError(msg) \ - { \ - cudaError_t err = cudaGetLastError(); \ - if (cudaSuccess != err) { \ - fprintf(stderr, "%s: %i: %s: %s.\n", __FILE__, __LINE__, msg, \ - cudaGetErrorString(err)); \ - exit(-1); \ - } \ - } - -#define cudaCheckAsyncError(msg) \ - { \ - cudaThreadSynchronize(); \ - cudaCheckError(msg); \ - } - -#endif diff --git a/examples/dwt2d/components.cu b/examples/dwt2d/components.cu deleted file mode 100755 index e768b4e..0000000 --- a/examples/dwt2d/components.cu +++ /dev/null @@ -1,193 +0,0 @@ -/* - * Copyright (c) 2009, Jiri Matela - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include - -#include "components.h" -#include "common.h" - -#define THREADS 256 - -/* Store 3 RGB float components */ -__device__ void storeComponents(float *d_r, float *d_g, float *d_b, float r, float g, float b, int pos) -{ - d_r[pos] = (r/255.0f) - 0.5f; - d_g[pos] = (g/255.0f) - 0.5f; - d_b[pos] = (b/255.0f) - 0.5f; -} - -/* Store 3 RGB intege components */ -__device__ void storeComponents(int *d_r, int *d_g, int *d_b, int r, int g, int b, int pos) -{ - d_r[pos] = r - 128; - d_g[pos] = g - 128; - d_b[pos] = b - 128; -} - -/* Store float component */ -__device__ void storeComponent(float *d_c, float c, int pos) -{ - d_c[pos] = (c/255.0f) - 0.5f; -} - -/* Store integer component */ -__device__ void storeComponent(int *d_c, int c, int pos) -{ - d_c[pos] = c - 128; -} - -/* Copy img src data into three separated component buffers */ -template -__global__ void c_CopySrcToComponents(T *d_r, T *d_g, T *d_b, - unsigned char * d_src, - int pixels) -{ - int x = threadIdx.x; - int gX = blockDim.x*blockIdx.x; - - __shared__ unsigned char sData[THREADS*3]; - - /* Copy data to shared mem by 4bytes - other checks are not necessary, since - d_src buffer is aligned to sharedDataSize */ - if ( (x*4) < THREADS*3 ) { - float *s = (float *)d_src; - float *d = (float *)sData; - d[x] = s[((gX*3)>>2) + x]; - } - __syncthreads(); - - T r, g, b; - - int offset = x*3; - r = (T)(sData[offset]); - g = (T)(sData[offset+1]); - b = (T)(sData[offset+2]); - - int globalOutputPosition = gX + x; - if (globalOutputPosition < pixels) { - storeComponents(d_r, d_g, d_b, r, g, b, globalOutputPosition); - } -} - -/* Copy img src data into three separated component buffers */ -template -__global__ void c_CopySrcToComponent(T *d_c, unsigned char * d_src, int pixels) -{ - int x = threadIdx.x; - int gX = blockDim.x*blockIdx.x; - - __shared__ unsigned char sData[THREADS]; - - /* Copy data to shared mem by 4bytes - other checks are not necessary, since - d_src buffer is aligned to sharedDataSize */ - if ( (x*4) < THREADS) { - float *s = (float *)d_src; - float *d = (float *)sData; - d[x] = s[(gX>>2) + x]; - } - __syncthreads(); - - T c; - - c = (T)(sData[x]); - - int globalOutputPosition = gX + x; - if (globalOutputPosition < pixels) { - storeComponent(d_c, c, globalOutputPosition); - } -} - - -/* Separate compoents of 8bit RGB source image */ -template -void rgbToComponents(T *d_r, T *d_g, T *d_b, unsigned char * src, int width, int height) -{ - unsigned char * d_src; - int pixels = width*height; - int alignedSize = DIVANDRND(width*height, THREADS) * THREADS * 3; //aligned to thread block size -- THREADS - - /* Alloc d_src buffer */ - cudaMalloc((void **)&d_src, alignedSize); - cudaCheckAsyncError("Cuda malloc") - cudaMemset(d_src, 0, alignedSize); - - /* Copy data to device */ - cudaMemcpy(d_src, src, pixels*3, cudaMemcpyHostToDevice); - cudaCheckError("Copy data to device") - - /* Kernel */ - dim3 threads(THREADS); - dim3 grid(alignedSize/(THREADS*3)); - assert(alignedSize%(THREADS*3) == 0); - c_CopySrcToComponents<<>>(d_r, d_g, d_b, d_src, pixels); - cudaCheckAsyncError("CopySrcToComponents kernel") - - /* Free Memory */ - cudaFree(d_src); - cudaCheckAsyncError("Free memory") -} -template void rgbToComponents(float *d_r, float *d_g, float *d_b, unsigned char * src, int width, int height); -template void rgbToComponents(int *d_r, int *d_g, int *d_b, unsigned char * src, int width, int height); - - -/* Copy a 8bit source image data into a color compoment of type T */ -template -void bwToComponent(T *d_c, unsigned char * src, int width, int height) -{ - unsigned char * d_src; - int pixels = width*height; - int alignedSize = DIVANDRND(pixels, THREADS) * THREADS; //aligned to thread block size -- THREADS - - /* Alloc d_src buffer */ - cudaMalloc((void **)&d_src, alignedSize); - cudaCheckAsyncError("Cuda malloc") - cudaMemset(d_src, 0, alignedSize); - - /* Copy data to device */ - cudaMemcpy(d_src, src, pixels, cudaMemcpyHostToDevice); - cudaCheckError("Copy data to device") - - /* Kernel */ - dim3 threads(THREADS); - dim3 grid(alignedSize/(THREADS)); - assert(alignedSize%(THREADS) == 0); - c_CopySrcToComponent<<>>(d_c, d_src, pixels); - cudaCheckAsyncError("CopySrcToComponent kernel") - - /* Free Memory */ - cudaFree(d_src); - cudaCheckAsyncError("Free memory") -} - -template void bwToComponent(float *d_c, unsigned char *src, int width, int height); -template void bwToComponent(int *d_c, unsigned char *src, int width, int height); diff --git a/examples/dwt2d/components.h b/examples/dwt2d/components.h deleted file mode 100644 index 3766cdd..0000000 --- a/examples/dwt2d/components.h +++ /dev/null @@ -1,39 +0,0 @@ -/* - * Copyright (c) 2009, Jiri Matela - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _COMPONENTS_H -#define _COMPONENTS_H - -/* Separate compoents of source 8bit RGB image */ -template -void rgbToComponents(T *d_r, T *d_g, T *d_b, unsigned char *src, int width, - int height); - -/* Copy a 8bit source image data into a color compoment of type T */ -template -void bwToComponent(T *d_c, unsigned char *src, int width, int height); - -#endif diff --git a/examples/dwt2d/dwt.cu b/examples/dwt2d/dwt.cu deleted file mode 100755 index c102bce..0000000 --- a/examples/dwt2d/dwt.cu +++ /dev/null @@ -1,385 +0,0 @@ -/* - * Copyright (c) 2009, Jiri Matela - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include "dwt_cuda/dwt.h" -#include "dwt_cuda/common.h" -#include "dwt.h" -#include "common.h" -#include -#include - -inline void fdwt(float *in, float *out, int width, int height, int levels) -{ - printf(" Running fdwt97 Float \n"); - dwt_cuda::fdwt97(in, out, width, height, levels); -} -/* -inline void fdwt(float *in, float *out, int width, int height, int levels, float *diffOut) -{ - dwt_cuda::fdwt97(in, out, width, height, levels, diffOut); -} -*/ - - - -inline void fdwt(int *in, int *out, int width, int height, int levels) -{ - printf(" Running fdwt53 Int \n"); - - dwt_cuda::fdwt53(in, out, width, height, levels); -} -/* -inline void fdwt(int *in, int *out, int width, int height, int levels, int *diffOut) -{ - dwt_cuda::fdwt53(in, out, width, height, levels, diffOut); -} -*/ - - - -inline void rdwt(float *in, float *out, int width, int height, int levels) -{ - printf(" Running rdwt97 Float \n"); - - dwt_cuda::rdwt97(in, out, width, height, levels); -} - -inline void rdwt(int *in, int *out, int width, int height, int levels) -{ - printf(" Running rdwt53 Int \n"); - - dwt_cuda::rdwt53(in, out, width, height, levels); -} - -template -int nStage2dDWT(T * in, T * out, T * backup, int pixWidth, int pixHeight, int stages, bool forward) -{ - printf("\n*** %d stages of 2D forward DWT:\n", stages); - - /* create backup of input, because each test iteration overwrites it */ - const int size = pixHeight * pixWidth * sizeof(T); - cudaMemcpy(backup, in, size, cudaMemcpyDeviceToDevice); - cudaCheckError("Memcopy device to device"); - - /* Measure time of individual levels. */ - if(forward) - fdwt(in, out, pixWidth, pixHeight, stages); - else - rdwt(in, out, pixWidth, pixHeight, stages); - - // Measure overall time of DWT. -/* #ifdef GPU_DWT_TESTING_1 - - dwt_cuda::CudaDWTTester tester; - for(int i = tester.getNumIterations(); i--; ) { - // Recover input and measure one overall DWT run. - cudaMemcpy(in, backup, size, cudaMemcpyDeviceToDevice); - cudaCheckError("Memcopy device to device"); - tester.beginTestIteration(); - if(forward) - fdwt(in, out, pixWidth, pixHeight, stages); - else - rdwt(in, out, pixWidth, pixHeight, stages); - tester.endTestIteration(); - } - tester.showPerformance(" Overall DWT", pixWidth, pixHeight); - #endif // GPU_DWT_TESTING - - cudaCheckAsyncError("DWT Kernel calls"); -*/ return 0; -} -template int nStage2dDWT(float*, float*, float*, int, int, int, bool); -template int nStage2dDWT(int*, int*, int*, int, int, int, bool); - - - -/* -template -int nStage2dDWT(T * in, T * out, T * backup, int pixWidth, int pixHeight, int stages, bool forward, T * diffOut) -{ - printf("*** %d stages of 2D forward DWT:\n", stages); - - // create backup of input, because each test iteration overwrites it - const int size = pixHeight * pixWidth * sizeof(T); - cudaMemcpy(backup, in, size, cudaMemcpyDeviceToDevice); - cudaCheckError("Memcopy device to device"); - - // Measure time of individual levels. - if(forward) - fdwt(in, out, pixWidth, pixHeight, stages, diffOut); - else - rdwt(in, out, pixWidth, pixHeight, stages); - - // Measure overall time of DWT. - #ifdef GPU_DWT_TESTING_1 - - dwt_cuda::CudaDWTTester tester; - for(int i = tester.getNumIterations(); i--; ) { - // Recover input and measure one overall DWT run. - cudaMemcpy(in, backup, size, cudaMemcpyDeviceToDevice); - cudaCheckError("Memcopy device to device"); - tester.beginTestIteration(); - if(forward) - fdwt(in, out, pixWidth, pixHeight, stages, diffOut); - else - rdwt(in, out, pixWidth, pixHeight, stages); - tester.endTestIteration(); - } - tester.showPerformance(" Overall DWT", pixWidth, pixHeight); - #endif // GPU_DWT_TESTING - - cudaCheckAsyncError("DWT Kernel calls"); - return 0; -} -template int nStage2dDWT(float*, float*, float*, int, int, int, bool, float*); -template int nStage2dDWT(int*, int*, int*, int, int, int, bool, int*); - -*/ - -void samplesToChar(unsigned char * dst, float * src, int samplesNum, const char * filename) -{ - int i; - std::ofstream outputFile; - char outfile[strlen(filename)+strlen(".txt")]; - strcpy(outfile, filename); - strcpy(outfile+strlen(filename), ".txt"); - outputFile.open(outfile); - - - for(i = 0; i < samplesNum; i++) { - float r = (src[i]+0.5f) * 255; - if (r > 255) r = 255; - if (r < 0) r = 0; - dst[i] = (unsigned char)r; - outputFile << "index: " << i << " val: "<< r <<" \n"; - - - } - outputFile.close(); -} - -void samplesToChar(unsigned char * dst, int * src, int samplesNum, const char * filename) -{ - int i; - std::ofstream outputFile; - char outfile[strlen(filename)+strlen(".txt")]; - strcpy(outfile, filename); - strcpy(outfile+strlen(filename), ".txt"); - outputFile.open(outfile); - for(i = 0; i < samplesNum; i++) { - int r = src[i]+128; - if (r > 255) r = 255; - if (r < 0) r = 0; - dst[i] = (unsigned char)r; - // added this line to output check - outputFile << "index: " << i << " val: "<< r <<" \n"; - } - outputFile.close(); -} - -///* Write output linear orderd*/ -template -int writeLinear(T *component_cuda, int pixWidth, int pixHeight, - const char * filename, const char * suffix) -{ - unsigned char * result; - T *gpu_output; - int i; - int size; - int samplesNum = pixWidth*pixHeight; - - size = samplesNum*sizeof(T); - cudaMallocHost((void **)&gpu_output, size); - cudaCheckError("Malloc host"); - memset(gpu_output, 0, size); - result = (unsigned char *)malloc(samplesNum); - cudaMemcpy(gpu_output, component_cuda, size, cudaMemcpyDeviceToHost); - cudaCheckError("Memcopy device to host"); - - /* T to char */ - samplesToChar(result, gpu_output, samplesNum, filename); - - /* Write component */ - char outfile[strlen(filename)+strlen(suffix)]; - strcpy(outfile, filename); - strcpy(outfile+strlen(filename), suffix); - i = open(outfile, O_CREAT|O_WRONLY, 0644); - if (i == -1) { - error(0,errno,"cannot access %s", outfile); - return -1; - } - printf("\nWriting to %s (%d x %d)\n", outfile, pixWidth, pixHeight); - ssize_t x ; - x = write(i, result, samplesNum); - close(i); - - /* Clean up */ - cudaFreeHost(gpu_output); - cudaCheckError("Cuda free host memory"); - free(result); - if(x == 0) return 1; - return 0; -} -template int writeLinear(float *component_cuda, int pixWidth, int pixHeight, const char * filename, const char * suffix); -template int writeLinear(int *component_cuda, int pixWidth, int pixHeight, const char * filename, const char * suffix); - -/* Write output visual ordered */ -template -int writeNStage2DDWT(T *component_cuda, int pixWidth, int pixHeight, - int stages, const char * filename, const char * suffix) -{ - struct band { - int dimX; - int dimY; - }; - struct dimensions { - struct band LL; - struct band HL; - struct band LH; - struct band HH; - }; - - unsigned char * result; - T *src, *dst; - int i,s; - int size; - int offset; - int yOffset; - int samplesNum = pixWidth*pixHeight; - struct dimensions * bandDims; - - bandDims = (struct dimensions *)malloc(stages * sizeof(struct dimensions)); - - bandDims[0].LL.dimX = DIVANDRND(pixWidth,2); - bandDims[0].LL.dimY = DIVANDRND(pixHeight,2); - bandDims[0].HL.dimX = pixWidth - bandDims[0].LL.dimX; - bandDims[0].HL.dimY = bandDims[0].LL.dimY; - bandDims[0].LH.dimX = bandDims[0].LL.dimX; - bandDims[0].LH.dimY = pixHeight - bandDims[0].LL.dimY; - bandDims[0].HH.dimX = bandDims[0].HL.dimX; - bandDims[0].HH.dimY = bandDims[0].LH.dimY; - - for (i = 1; i < stages; i++) { - bandDims[i].LL.dimX = DIVANDRND(bandDims[i-1].LL.dimX,2); - bandDims[i].LL.dimY = DIVANDRND(bandDims[i-1].LL.dimY,2); - bandDims[i].HL.dimX = bandDims[i-1].LL.dimX - bandDims[i].LL.dimX; - bandDims[i].HL.dimY = bandDims[i].LL.dimY; - bandDims[i].LH.dimX = bandDims[i].LL.dimX; - bandDims[i].LH.dimY = bandDims[i-1].LL.dimY - bandDims[i].LL.dimY; - bandDims[i].HH.dimX = bandDims[i].HL.dimX; - bandDims[i].HH.dimY = bandDims[i].LH.dimY; - } - -#if 0 - printf("Original image pixWidth x pixHeight: %d x %d\n", pixWidth, pixHeight); - for (i = 0; i < stages; i++) { - printf("Stage %d: LL: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].LL.dimX, bandDims[i].LL.dimY); - printf("Stage %d: HL: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].HL.dimX, bandDims[i].HL.dimY); - printf("Stage %d: LH: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].LH.dimX, bandDims[i].LH.dimY); - printf("Stage %d: HH: pixWidth x pixHeight: %d x %d\n", i, bandDims[i].HH.dimX, bandDims[i].HH.dimY); - } -#endif - - size = samplesNum*sizeof(T); - cudaMallocHost((void **)&src, size); - cudaCheckError("Malloc host"); - dst = (T*)malloc(size); - memset(src, 0, size); - memset(dst, 0, size); - result = (unsigned char *)malloc(samplesNum); - cudaMemcpy(src, component_cuda, size, cudaMemcpyDeviceToHost); - cudaCheckError("Memcopy device to host"); - - // LL Band - size = bandDims[stages-1].LL.dimX * sizeof(T); - for (i = 0; i < bandDims[stages-1].LL.dimY; i++) { - memcpy(dst+i*pixWidth, src+i*bandDims[stages-1].LL.dimX, size); - } - - for (s = stages - 1; s >= 0; s--) { - // HL Band - size = bandDims[s].HL.dimX * sizeof(T); - offset = bandDims[s].LL.dimX * bandDims[s].LL.dimY; - for (i = 0; i < bandDims[s].HL.dimY; i++) { - memcpy(dst+i*pixWidth+bandDims[s].LL.dimX, - src+offset+i*bandDims[s].HL.dimX, - size); - } - - // LH band - size = bandDims[s].LH.dimX * sizeof(T); - offset += bandDims[s].HL.dimX * bandDims[s].HL.dimY; - yOffset = bandDims[s].LL.dimY; - for (i = 0; i < bandDims[s].HL.dimY; i++) { - memcpy(dst+(yOffset+i)*pixWidth, - src+offset+i*bandDims[s].LH.dimX, - size); - } - - //HH band - size = bandDims[s].HH.dimX * sizeof(T); - offset += bandDims[s].LH.dimX * bandDims[s].LH.dimY; - yOffset = bandDims[s].HL.dimY; - for (i = 0; i < bandDims[s].HH.dimY; i++) { - memcpy(dst+(yOffset+i)*pixWidth+bandDims[s].LH.dimX, - src+offset+i*bandDims[s].HH.dimX, - size); - } - } - - /* Write component */ - samplesToChar(result, dst, samplesNum, filename); - - char outfile[strlen(filename)+strlen(suffix)]; - strcpy(outfile, filename); - strcpy(outfile+strlen(filename), suffix); - i = open(outfile, O_CREAT|O_WRONLY, 0644); - if (i == -1) { - error(0,errno,"cannot access %s", outfile); - return -1; - } - printf("\nWriting to %s (%d x %d)\n", outfile, pixWidth, pixHeight); - ssize_t x; - x = write(i, result, samplesNum); - close(i); - - cudaFreeHost(src); - cudaCheckError("Cuda free host memory"); - free(dst); - free(result); - free(bandDims); - if (x == 0) return 1; - return 0; -} -template int writeNStage2DDWT(float *component_cuda, int pixWidth, int pixHeight, int stages, const char * filename, const char * suffix); -template int writeNStage2DDWT(int *component_cuda, int pixWidth, int pixHeight, int stages, const char * filename, const char * suffix); diff --git a/examples/dwt2d/dwt.h b/examples/dwt2d/dwt.h deleted file mode 100644 index d84a18e..0000000 --- a/examples/dwt2d/dwt.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2009, Jiri Matela - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _DWT_H -#define _DWT_H - -template -int nStage2dDWT(T *in, T *out, T *backup, int pixWidth, int pixHeight, - int stages, bool forward); - -template -int writeNStage2DDWT(T *component_cuda, int width, int height, int stages, - const char *filename, const char *suffix); -template -int writeLinear(T *component_cuda, int width, int height, const char *filename, - const char *suffix); - -#endif diff --git a/examples/dwt2d/dwt_cuda/common.cu b/examples/dwt2d/dwt_cuda/common.cu deleted file mode 100755 index 5936f57..0000000 --- a/examples/dwt2d/dwt_cuda/common.cu +++ /dev/null @@ -1,35 +0,0 @@ -/// -/// @file common.cu -/// @author Martin Jirman (207962@mail.muni.cz) -/// @date 2011-01-20 14:37 -/// -/// Copyright (c) 2011 Martin Jirman -/// All rights reserved. -/// -/// Redistribution and use in source and binary forms, with or without -/// modification, are permitted provided that the following conditions are met: -/// -/// * Redistributions of source code must retain the above copyright -/// notice, this list of conditions and the following disclaimer. -/// * Redistributions in binary form must reproduce the above copyright -/// notice, this list of conditions and the following disclaimer in the -/// documentation and/or other materials provided with the distribution. -/// -/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -/// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -/// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -/// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -/// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -/// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -/// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -/// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -/// POSSIBILITY OF SUCH DAMAGE. -/// - -#include "common.h" - -namespace dwt_cuda { - bool CudaDWTTester::testRunning = false; -} diff --git a/examples/dwt2d/dwt_cuda/common.h b/examples/dwt2d/dwt_cuda/common.h deleted file mode 100644 index 6fc531e..0000000 --- a/examples/dwt2d/dwt_cuda/common.h +++ /dev/null @@ -1,232 +0,0 @@ -/// -/// @file common.h -/// @author Martin Jirman (207962@mail.muni.cz) -/// @brief Common stuff for all CUDA dwt functions. -/// @date 2011-01-20 14:19 -/// -/// Copyright (c) 2011 Martin Jirman -/// All rights reserved. -/// -/// Redistribution and use in source and binary forms, with or without -/// modification, are permitted provided that the following conditions are met: -/// -/// * Redistributions of source code must retain the above copyright -/// notice, this list of conditions and the following disclaimer. -/// * Redistributions in binary form must reproduce the above copyright -/// notice, this list of conditions and the following disclaimer in the -/// documentation and/or other materials provided with the distribution. -/// -/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -/// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -/// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -/// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -/// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -/// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -/// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -/// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -/// POSSIBILITY OF SUCH DAMAGE. -/// - -#ifndef DWT_COMMON_H -#define DWT_COMMON_H - -#include -#include -#include - -// compile time minimum macro -#define CTMIN(a, b) (((a) < (b)) ? (a) : (b)) - -// performance testing macros -#if defined(GPU_DWT_TESTING) -#define PERF_BEGIN \ - { \ - dwt_cuda::CudaDWTTester PERF_TESTER; \ - for (int PERF_N = PERF_TESTER.getNumIterations(); PERF_N--;) { \ - PERF_TESTER.beginTestIteration(); - -#define PERF_END(PERF_NAME, PERF_W, PERF_H) \ - PERF_TESTER.endTestIteration(); \ - } \ - PERF_TESTER.showPerformance(PERF_NAME, PERF_W, PERF_H); \ - } -#else // GPU_DWT_TESTING -#define PERF_BEGIN -#define PERF_END(PERF_NAME, PERF_W, PERF_H) -#endif // GPU_DWT_TESTING - -namespace dwt_cuda { - -/// Divide and round up. -template -__device__ __host__ inline T divRndUp(const T &n, const T &d) { - return (n / d) + ((n % d) ? 1 : 0); -} - -// 9/7 forward DWT lifting schema coefficients -const float f97Predict1 = -1.586134342; ///< forward 9/7 predict 1 -const float f97Update1 = -0.05298011854; ///< forward 9/7 update 1 -const float f97Predict2 = 0.8829110762; ///< forward 9/7 predict 2 -const float f97Update2 = 0.4435068522; ///< forward 9/7 update 2 - -// 9/7 reverse DWT lifting schema coefficients -const float r97update2 = -f97Update2; ///< undo 9/7 update 2 -const float r97predict2 = -f97Predict2; ///< undo 9/7 predict 2 -const float r97update1 = -f97Update1; ///< undo 9/7 update 1 -const float r97Predict1 = -f97Predict1; ///< undo 9/7 predict 1 - -// FDWT 9/7 scaling coefficients -const float scale97Mul = 1.23017410491400f; -const float scale97Div = 1.0 / scale97Mul; - -// 5/3 forward DWT lifting schema coefficients -const float forward53Predict = -0.5f; /// forward 5/3 predict -const float forward53Update = 0.25f; /// forward 5/3 update - -// 5/3 forward DWT lifting schema coefficients -const float reverse53Update = -forward53Update; /// undo 5/3 update -const float reverse53Predict = -forward53Predict; /// undo 5/3 predict - -/// Functor which adds scaled sum of neighbors to given central pixel. -struct AddScaledSum { - const float scale; // scale of neighbors - __device__ AddScaledSum(const float scale) : scale(scale) {} - __device__ void operator()(const float p, float &c, const float n) const { - - // if(threadIdx.x == 0) { - - // printf("scale %f, p %f c %f n %f , result: %f\n", scale, p, c, n, - // scale * (p + n) ); - - // } - - c += scale * (p + n); - } -}; - -/// Returns index ranging from 0 to num threads, such that first half -/// of threads get even indices and others get odd indices. Each thread -/// gets different index. -/// Example: (for 8 threads) threadIdx.x: 0 1 2 3 4 5 6 7 -/// parityIdx: 0 2 4 6 1 3 5 7 -/// @tparam THREADS total count of participating threads -/// @return parity-separated index of thread -template __device__ inline int parityIdx() { - return (threadIdx.x * 2) - (THREADS - 1) * (threadIdx.x / (THREADS / 2)); -} - -/// size of shared memory -#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200) -const int SHM_SIZE = 48 * 1024; -#else -const int SHM_SIZE = 16 * 1024; -#endif - -/// Perrformance and return code tester. -class CudaDWTTester { -private: - static bool testRunning; ///< true if any test is currently running - cudaEvent_t beginEvent; ///< begin CUDA event - cudaEvent_t endEvent; ///< end CUDA event - std::vector times; ///< collected times - const bool disabled; ///< true if this object is disabled -public: - /// Checks CUDA related error. - /// @param status return code to be checked - /// @param message message to be shown if there was an error - /// @return true if there was no error, false otherwise - static bool check(const cudaError_t &status, const char *message) { -#if defined(GPU_DWT_TESTING) - if ((!testRunning) && status != cudaSuccess) { - const char *errorString = cudaGetErrorString(status); - fprintf(stderr, "CUDA ERROR: '%s': %s\n", message, errorString); - fflush(stderr); - return false; - } -#endif // GPU_DWT_TESTING - return true; - } - - /// Checks last kernel call for errors. - /// @param message description of the kernel call - /// @return true if there was no error, false otherwise - static bool checkLastKernelCall(const char *message) { -#if defined(GPU_DWT_TESTING) - return testRunning ? true : check(cudaThreadSynchronize(), message); -#else // GPU_DWT_TESTING - return true; -#endif // GPU_DWT_TESTING - } - - /// Initializes DWT tester for time measurement - CudaDWTTester() : disabled(testRunning) {} - - /// Gets rpefered number of iterations - int getNumIterations() { return disabled ? 1 : 31; } - - /// Starts one test iteration. - void beginTestIteration() { - if (!disabled) { - cudaEventCreate(&beginEvent); - cudaEventCreate(&endEvent); - cudaEventRecord(beginEvent, 0); - testRunning = true; - } - } - - /// Ends on etest iteration. - void endTestIteration() { - if (!disabled) { - float time; - testRunning = false; - cudaEventRecord(endEvent, 0); - cudaEventSynchronize(endEvent); - cudaEventElapsedTime(&time, beginEvent, endEvent); - cudaEventDestroy(beginEvent); - cudaEventDestroy(endEvent); - times.push_back(time); - } - } - - /// Shows brief info about all iterations. - /// @param name name of processing method - /// @param sizeX width of processed image - /// @param sizeY height of processed image - void showPerformance(const char *name, const int sizeX, const int sizeY) { - if (!disabled) { - // compute mean and median - std::sort(times.begin(), times.end()); - double sum = 0; - for (int i = times.size(); i--;) { - sum += times[i]; - } - const double median = - (times[times.size() / 2] + times[(times.size() - 1) / 2]) * 0.5f; - printf(" %s: %7.3f ms (mean) %7.3f ms (median) %7.3f ms (max) " - "(%d x %d)\n", - name, (sum / times.size()), median, times[times.size() - 1], sizeX, - sizeY); - } - } -}; - -/// Simple cudaMemcpy wrapped in performance tester. -/// @param dest destination bufer -/// @param src source buffer -/// @param sx width of copied image -/// @param sy height of copied image -template -inline void memCopy(T *const dest, const T *const src, const size_t sx, - const size_t sy) { - cudaError_t status; - PERF_BEGIN - status = cudaMemcpy(dest, src, sx * sy * sizeof(T), cudaMemcpyDeviceToDevice); - PERF_END(" memcpy", sx, sy) - CudaDWTTester::check(status, "memcpy device > device"); -} - -} // end of namespace dwt_cuda - -#endif // DWT_COMMON_CUDA_H diff --git a/examples/dwt2d/dwt_cuda/dwt.h b/examples/dwt2d/dwt_cuda/dwt.h deleted file mode 100644 index d6e2161..0000000 --- a/examples/dwt2d/dwt_cuda/dwt.h +++ /dev/null @@ -1,103 +0,0 @@ -/// -/// @file dwt.h -/// @author Martin Jirman (207962@mail.muni.cz) -/// @brief Entry points for CUDA implementaion of 9/7 and 5/3 DWT. -/// @date 2011-01-20 11:41 -/// -/// -/// -/// Copyright (c) 2011 Martin Jirman -/// All rights reserved. -/// -/// Redistribution and use in source and binary forms, with or without -/// modification, are permitted provided that the following conditions are met: -/// -/// * Redistributions of source code must retain the above copyright -/// notice, this list of conditions and the following disclaimer. -/// * Redistributions in binary form must reproduce the above copyright -/// notice, this list of conditions and the following disclaimer in the -/// documentation and/or other materials provided with the distribution. -/// -/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -/// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -/// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -/// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -/// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -/// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -/// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -/// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -/// POSSIBILITY OF SUCH DAMAGE. -/// -/// -/// -/// Following conditions are common for all four DWT functions: -/// - Both input and output images are stored in GPU memory with no padding -/// of lines or interleaving of pixels. -/// - DWT coefficients are stored as follows: Each band is saved as one -/// consecutive chunk (no padding/stride/interleaving). Deepest level bands -/// (smallest ones) are stored first (at the beginning of the input/output -/// buffers), less deep bands follow. There is no padding between stored -/// bands in the buffer. Order of bands of the same level in the buffer is -/// following: Low-low band (or deeper level subbands) is stored first. -/// Vertical-low/horizontal-high band follows. Vertical-high/horizonal-low -/// band is saved next and finally, the high-high band is saved. Out of all -/// low-low bands, only th edeepest one is saved (right at the beginning of -/// the buffer), others are replaced with deeper level subbands. -/// - Input images of all functions won't be preserved (will be overwritten). -/// - Input and output buffers can't overlap. -/// - Size of output buffer must be greater or equal to size of input buffer. -/// -/// There are no common compile time settings (buffer size, etc...) for -/// all DWTs, because each DTW type needs different amount of GPU resources. -/// Instead, each DWT type has its own compile time settings, which can be -/// found in *.cu file, where it is implemented. -/// - -#ifndef DWT_CUDA_H -#define DWT_CUDA_H - -namespace dwt_cuda { - -/// Forward 5/3 2D DWT. See common rules (above) for more details. -/// @param in Expected to be normalized into range [-128, 127]. -/// Will not be preserved (will be overwritten). -/// @param out output buffer on GPU -/// @param sizeX width of input image (in pixels) -/// @param sizeY height of input image (in pixels) -/// @param levels number of recursive DWT levels -void fdwt53(int *in, int *out, int sizeX, int sizeY, int levels); - -/// Reverse 5/3 2D DWT. See common rules (above) for more details. -/// @param in Input DWT coefficients. Format described in common rules. -/// Will not be preserved (will be overwritten). -/// @param out output buffer on GPU - will contain original image -/// in normalized range [-128, 127]. -/// @param sizeX width of input image (in pixels) -/// @param sizeY height of input image (in pixels) -/// @param levels number of recursive DWT levels -void rdwt53(int *in, int *out, int sizeX, int sizeY, int levels); - -/// Forward 9/7 2D DWT. See common rules (above) for more details. -/// @param in Input DWT coefficients. Should be normalized (in range -/// [-0.5, 0.5]). Will not be preserved (will be overwritten). -/// @param out output buffer on GPU - format specified in common rules -/// @param sizeX width of input image (in pixels) -/// @param sizeY height of input image (in pixels) -/// @param levels number of recursive DWT levels -void fdwt97(float *in, float *out, int sizeX, int sizeY, int levels); - -/// Reverse 9/7 2D DWT. See common rules (above) for more details. -/// @param in Input DWT coefficients. Format described in common rules. -/// Will not be preserved (will be overwritten). -/// @param out output buffer on GPU - will contain original image -/// in normalized range [-0.5, 0.5]. -/// @param sizeX width of input image (in pixels) -/// @param sizeY height of input image (in pixels) -/// @param levels number of recursive DWT levels -void rdwt97(float *in, float *out, int sizeX, int sizeY, int levels); - -} // namespace dwt_cuda - -#endif // DWT_CUDA_H diff --git a/examples/dwt2d/dwt_cuda/fdwt53.cu b/examples/dwt2d/dwt_cuda/fdwt53.cu deleted file mode 100755 index c50bdd2..0000000 --- a/examples/dwt2d/dwt_cuda/fdwt53.cu +++ /dev/null @@ -1,400 +0,0 @@ -/// @file fdwt53.cu -/// @brief CUDA implementation of forward 5/3 2D DWT. -/// @author Martin Jirman (207962@mail.muni.cz) -/// @date 2011-02-04 13:23 -/// -/// -/// Copyright (c) 2011 Martin Jirman -/// All rights reserved. -/// -/// Redistribution and use in source and binary forms, with or without -/// modification, are permitted provided that the following conditions are met: -/// -/// * Redistributions of source code must retain the above copyright -/// notice, this list of conditions and the following disclaimer. -/// * Redistributions in binary form must reproduce the above copyright -/// notice, this list of conditions and the following disclaimer in the -/// documentation and/or other materials provided with the distribution. -/// -/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -/// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -/// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -/// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -/// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -/// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -/// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -/// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -/// POSSIBILITY OF SUCH DAMAGE. -/// - - -#include "common.h" -#include "transform_buffer.h" -#include "io.h" - -namespace dwt_cuda { - - - /// Wraps buffer and methods needed for computing one level of 5/3 FDWT - /// using sliding window approach. - /// @tparam WIN_SIZE_X width of sliding window - /// @tparam WIN_SIZE_Y height of sliding window - template - class FDWT53 { - private: - - /// Info needed for processing of one input column. - /// @tparam CHECKED_LOADER true if column's loader should check boundaries - /// false if there are no near boudnaries to check - template - struct FDWT53Column { - /// loader for the column - VerticalDWTPixelLoader loader; - - /// offset of the column in shared buffer - int offset; - - // backup of first 3 loaded pixels (not transformed) - int pixel0, pixel1, pixel2; - - /// Sets all fields to anything to prevent 'uninitialized' warnings. - __device__ void clear() { - offset = pixel0 = pixel1 = pixel2 = 0; - loader.clear(); - } - }; - - - /// Type of shared memory buffer for 5/3 FDWT transforms. - typedef TransformBuffer FDWT53Buffer; - - /// Actual shared buffer used for forward 5/3 DWT. - FDWT53Buffer buffer; - - /// Difference between indices of two vertical neighbors in buffer. - enum { STRIDE = FDWT53Buffer::VERTICAL_STRIDE }; - - - /// Forward 5/3 DWT predict operation. - struct Forward53Predict { - __device__ void operator() (const int p, int & c, const int n) const { - // c = n; - c -= (p + n) / 2; // F.8, page 126, ITU-T Rec. T.800 final draft the real one - } - }; - - - /// Forward 5/3 DWT update operation. - struct Forward53Update { - __device__ void operator() (const int p, int & c, const int n) const { - c += (p + n + 2) / 4; // F.9, page 126, ITU-T Rec. T.800 final draft - } - }; - - - /// Initializes one column: computes offset of the column in shared memory - /// buffer, initializes loader and finally uses it to load first 3 pixels. - /// @tparam CHECKED true if loader of the column checks boundaries - /// @param column (uninitialized) column info to be initialized - /// @param input input image - /// @param sizeX width of the input image - /// @param sizeY height of the input image - /// @param colIndex x-axis coordinate of the column (relative to the left - /// side of this threadblock's block of input pixels) - /// @param firstY y-axis coordinate of first image row to be transformed - - template - __device__ void initColumn(FDWT53Column & column, - const int * const input, - const int sizeX, const int sizeY, - const int colIndex, const int firstY) { - // get offset of the column with index 'cId' - column.offset = buffer.getColumnOffset(colIndex); - - // coordinates of the first pixel to be loaded - const int firstX = blockIdx.x * WIN_SIZE_X + colIndex; - - if(blockIdx.y == 0) { - // topmost block - apply mirroring rules when loading first 3 rows - column.loader.init(sizeX, sizeY, firstX, firstY); - - // load pixels in mirrored way - column.pixel2 = column.loader.loadFrom(input); // loaded pixel #0 - column.pixel1 = column.loader.loadFrom(input); // loaded pixel #1 - column.pixel0 = column.loader.loadFrom(input); // loaded pixel #2 - - // reinitialize loader to start with pixel #1 again - column.loader.init(sizeX, sizeY, firstX, firstY + 1); - } else { - // non-topmost row - regular loading: - column.loader.init(sizeX, sizeY, firstX, firstY - 2); - - // load 3 rows into the column - column.pixel0 = column.loader.loadFrom(input); - column.pixel1 = column.loader.loadFrom(input); - column.pixel2 = column.loader.loadFrom(input); - // Now, the next pixel, which will be loaded by loader, is pixel #1. - } - - } - - - /// Loads and vertically transforms given column. Assumes that first 3 - /// pixels are already loaded in column fields pixel0 ... pixel2. - /// @tparam CHECKED true if loader of the column checks boundaries - /// @param column column to be loaded and vertically transformed - /// @param input pointer to input image data - template - __device__ void loadAndVerticallyTransform(FDWT53Column & column, - const int * const input) { - // take 3 loaded pixels and put them into shared memory transform buffer - buffer[column.offset + 0 * STRIDE] = column.pixel0; - buffer[column.offset + 1 * STRIDE] = column.pixel1; - buffer[column.offset + 2 * STRIDE] = column.pixel2; - - // load remaining pixels to be able to vertically transform the window - - for(int i = 3; i < (3 + WIN_SIZE_Y); i++) - { - buffer[column.offset + i * STRIDE] = column.loader.loadFrom(input); - } - - // remember last 3 pixels for use in next iteration - column.pixel0 = buffer[column.offset + (WIN_SIZE_Y + 0) * STRIDE]; - column.pixel1 = buffer[column.offset + (WIN_SIZE_Y + 1) * STRIDE]; - column.pixel2 = buffer[column.offset + (WIN_SIZE_Y + 2) * STRIDE]; - - // vertically transform the column in transform buffer - buffer.forEachVerticalOdd(column.offset, Forward53Predict()); - buffer.forEachVerticalEven(column.offset, Forward53Update()); - - } - - - /// Actual implementation of 5/3 FDWT. - /// @tparam CHECK_LOADS true if input loader must check boundaries - /// @tparam CHECK_WRITES true if output writer must check boundaries - /// @param in input image - /// @param out output buffer - /// @param sizeX width of the input image - /// @param sizeY height of the input image - /// @param winSteps number of sliding window steps - template - __device__ void transform(const int * const in, int * const out, - const int sizeX, const int sizeY, - const int winSteps) { - // info about one main and one boundary columns processed by this thread - FDWT53Column column; - FDWT53Column boundaryColumn; // only few threads use this - - // Initialize all column info: initialize loaders, compute offset of - // column in shared buffer and initialize loader of column. - const int firstY = blockIdx.y * WIN_SIZE_Y * winSteps; - initColumn(column, in, sizeX, sizeY, threadIdx.x, firstY); //has been checked Mar 9th - - - // first 3 threads initialize boundary columns, others do not use them - boundaryColumn.clear(); - if(threadIdx.x < 3) { - // index of boundary column (relative x-axis coordinate of the column) - const int colId = threadIdx.x + ((threadIdx.x == 0) ? WIN_SIZE_X : -3); - - // initialize the column - initColumn(boundaryColumn, in, sizeX, sizeY, colId, firstY); - - } - - - // index of column which will be written into output by this thread - const int outColumnIndex = parityIdx(); - - // offset of column which will be written by this thread into output - const int outColumnOffset = buffer.getColumnOffset(outColumnIndex); - - // initialize output writer for this thread - const int outputFirstX = blockIdx.x * WIN_SIZE_X + outColumnIndex; - VerticalDWTBandWriter writer; - writer.init(sizeX, sizeY, outputFirstX, firstY); - __syncthreads(); - - - // Sliding window iterations: - // Each iteration assumes that first 3 pixels of each column are loaded. - for(int w = 0; w < winSteps; w++) { - - // For each column (including boundary columns): load and vertically - // transform another WIN_SIZE_Y lines. - loadAndVerticallyTransform(column, in); - if(threadIdx.x < 3) { - loadAndVerticallyTransform(boundaryColumn, in); - } - - // wait for all columns to be vertically transformed and transform all - // output rows horizontally - __syncthreads(); - - - buffer.forEachHorizontalOdd(2, WIN_SIZE_Y, Forward53Predict()); - __syncthreads(); - - buffer.forEachHorizontalEven(2, WIN_SIZE_Y, Forward53Update()); - - // wait for all output rows to be transformed horizontally and write - // them into output buffer - __syncthreads(); - - - for(int r = 2; r < (2 + WIN_SIZE_Y); r += 2) { - // Write low coefficients from output column into low band ... - writer.writeLowInto(out, buffer[outColumnOffset + r * STRIDE]); - // ... and high coeficients into the high band. - writer.writeHighInto(out, buffer[outColumnOffset + (r+1) * STRIDE]); - } - - // before proceeding to next iteration, wait for all output columns - // to be written into the output - __syncthreads(); - - } - - } - - - public: - /// Determines, whether this block's pixels touch boundary and selects - /// right version of algorithm according to it - for many threadblocks, it - /// selects version which does not deal with boundary mirroring and thus is - /// slightly faster. - /// @param in input image - /// @param out output buffer - /// @param sx width of the input image - /// @param sy height of the input image - /// @param steps number of sliding window steps - __device__ static void run(const int * const in, int * const out, - const int sx, const int sy, const int steps) { - // if(blockIdx.x==0 && blockIdx.y ==11 && threadIdx.x >=0&&threadIdx.x <64){ - // object with transform buffer in shared memory - __shared__ FDWT53 fdwt53; - - // Compute limits of this threadblock's block of pixels and use them to - // determine, whether this threadblock will have to deal with boundary. - // (1 in next expressions is for radius of impulse response of 9/7 FDWT.) - const int maxX = (blockIdx.x + 1) * WIN_SIZE_X + 1; - const int maxY = (blockIdx.y + 1) * WIN_SIZE_Y * steps + 1; - const bool atRightBoudary = maxX >= sx; - const bool atBottomBoudary = maxY >= sy; - - // Select specialized version of code according to distance of this - // threadblock's pixels from image boundary. - - // if(threadIdx.x == 0) { - // printf("fdwt53 run"); - // } - if(atBottomBoudary) - { - // near bottom boundary => check both writing and reading - fdwt53.transform(in, out, sx, sy, steps); - } else if(atRightBoudary) - { - // near right boundary only => check writing only - fdwt53.transform(in, out, sx, sy, steps); - } else - { - // no nearby boundary => check nothing - fdwt53.transform(in, out, sx, sy, steps); - } - } - // } - - }; // end of class FDWT53 - - - - /// Main GPU 5/3 FDWT entry point. - /// @tparam WIN_SX width of sliding window to be used - /// @tparam WIN_SY height of sliding window to be used - /// @param input input image - /// @param output output buffer - /// @param sizeX width of the input image - /// @param sizeY height of the input image - /// @param winSteps number of sliding window steps - template - __launch_bounds__(WIN_SX, CTMIN(SHM_SIZE/sizeof(FDWT53), 8)) - __global__ void fdwt53Kernel(const int * const input, int * const output, - const int sizeX, const int sizeY, - const int winSteps) { - FDWT53::run(input, output, sizeX, sizeY, winSteps); - } - - - - /// Only computes optimal number of sliding window steps, - /// number of threadblocks and then lanches the 5/3 FDWT kernel. - /// @tparam WIN_SX width of sliding window - /// @tparam WIN_SY height of sliding window - /// @param in input image - /// @param out output buffer - /// @param sx width of the input image - /// @param sy height of the input image - template - void launchFDWT53Kernel (int * in, int * out, int sx, int sy) { - // compute optimal number of steps of each sliding window - - const int steps = divRndUp(sy, 15 * WIN_SY); - - int gx = divRndUp(sx, WIN_SX); - int gy = divRndUp(sy, WIN_SY * steps); - - printf("\n sliding steps = %d , gx = %d , gy = %d \n", steps, gx, gy); - - // prepare grid size - dim3 gSize(divRndUp(sx, WIN_SX), divRndUp(sy, WIN_SY * steps)); - // printf("\n globalx=%d, globaly=%d, blocksize=%d\n", gSize.x, gSize.y, WIN_SX); - - // run kernel, possibly measure time and finally check the call - // PERF_BEGIN - fdwt53Kernel<<>>(in, out, sx, sy, steps); - // PERF_END(" FDWT53", sx, sy) - // CudaDWTTester::checkLastKernelCall("FDWT 5/3 kernel"); - printf("fdwt53Kernel in launchFDWT53Kernel has finished"); - - } - - - - /// Forward 5/3 2D DWT. See common rules (above) for more details. - /// @param in Expected to be normalized into range [-128, 127]. - /// Will not be preserved (will be overwritten). - /// @param out output buffer on GPU - /// @param sizeX width of input image (in pixels) - /// @param sizeY height of input image (in pixels) - /// @param levels number of recursive DWT levels - void fdwt53(int * in, int * out, int sizeX, int sizeY, int levels) { - // select right width of kernel for the size of the image - - if(sizeX >= 960) { - launchFDWT53Kernel<192, 8>(in, out, sizeX, sizeY); - } else if (sizeX >= 480) { - launchFDWT53Kernel<128, 8>(in, out, sizeX, sizeY); - } else { - launchFDWT53Kernel<64, 8>(in, out, sizeX, sizeY); - } - - // if this was not the last level, continue recursively with other levels - if(levels > 1) { - // copy output's LL band back into input buffer - const int llSizeX = divRndUp(sizeX, 2); - const int llSizeY = divRndUp(sizeY, 2); - // printf("\n llSizeX = %d , llSizeY = %d \n", llSizeX, llSizeY); - memCopy(in, out, llSizeX, llSizeY); //the function memCopy in cuda_dwt/common.h line 238 - - // run remaining levels of FDWT - fdwt53(in, out, llSizeX, llSizeY, levels - 1); - } - } - - - -} // end of namespace dwt_cuda diff --git a/examples/dwt2d/dwt_cuda/fdwt97.cu b/examples/dwt2d/dwt_cuda/fdwt97.cu deleted file mode 100755 index 402f8fe..0000000 --- a/examples/dwt2d/dwt_cuda/fdwt97.cu +++ /dev/null @@ -1,383 +0,0 @@ -/// -/// @file fdwt97.cu -/// @brief CUDA implementation of forward 9/7 2D DWT. -/// @author Martin Jirman (207962@mail.muni.cz) -/// @date 2011-01-20 13:18 -/// -/// -/// Copyright (c) 2011 Martin Jirman -/// All rights reserved. -/// -/// Redistribution and use in source and binary forms, with or without -/// modification, are permitted provided that the following conditions are met: -/// -/// * Redistributions of source code must retain the above copyright -/// notice, this list of conditions and the following disclaimer. -/// * Redistributions in binary form must reproduce the above copyright -/// notice, this list of conditions and the following disclaimer in the -/// documentation and/or other materials provided with the distribution. -/// -/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -/// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -/// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -/// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -/// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -/// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -/// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -/// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -/// POSSIBILITY OF SUCH DAMAGE. -/// - - -#include "common.h" -#include "transform_buffer.h" -#include "io.h" - - -namespace dwt_cuda { - - - - /// Wraps a buffer and methods for computing 9/7 FDWT with sliding window - /// of specified size. Template arguments specify this size. - /// @tparam WIN_SIZE_X width of sliding window - /// @tparam WIN_SIZE_Y height of sliding window - template - class FDWT97 { - private: - /// Type of shared memory buffer used for 9/7 DWT. - typedef TransformBuffer FDWT97Buffer; - - /// Actual shared buffer used for forward 9/7 DWT. - FDWT97Buffer buffer; - - /// Difference of indices of two vertically neighboring items in buffer. - enum { STRIDE = FDWT97Buffer::VERTICAL_STRIDE }; - - - /// One thread's info about loading input image - /// @tparam CHECKED true if loader should check for image boundaries - template - struct FDWT97ColumnLoadingInfo { - /// Loader of pixels from some input image. - VerticalDWTPixelLoader loader; - - /// Offset of column loaded by loader. (Offset in shared buffer.) - int offset; - }; - - - /// Horizontal 9/7 FDWT on specified lines of transform buffer. - /// @param lines number of lines to be transformed - /// @param firstLine index of the first line to be transformed - __device__ void horizontalFDWT97(const int lines, const int firstLine) { - __syncthreads(); - - buffer.forEachHorizontalOdd(firstLine, lines, AddScaledSum(f97Predict1)); - __syncthreads(); - buffer.forEachHorizontalEven(firstLine, lines, AddScaledSum(f97Update1)); - __syncthreads(); - buffer.forEachHorizontalOdd(firstLine, lines, AddScaledSum(f97Predict2)); - __syncthreads(); - buffer.forEachHorizontalEven(firstLine, lines, AddScaledSum(f97Update2)); - __syncthreads(); - - buffer.scaleHorizontal(scale97Div, scale97Mul, firstLine, lines); - - __syncthreads(); - - } - - - /// Initializes one column of shared transform buffer with 7 input pixels. - /// Those 7 pixels will not be transformed. Also initializes given loader. - /// @tparam CHECKED true if loader should check for image boundaries - /// @param column (uninitialized) object for loading input pixels - /// @param columnIndex index (not offset!) of the column to be loaded - /// (relative to threadblock's first column) - /// @param input pointer to input image in GPU memory - /// @param sizeX width of the input image - /// @param sizeY height of the input image - /// @param firstY index of first row to be loaded from image - template - __device__ void initColumn(FDWT97ColumnLoadingInfo & column, - const int columnIndex, const float * const input, - const int sizeX, const int sizeY, - const int firstY) { - // get offset of the column with index 'columnIndex' - column.offset = buffer.getColumnOffset(columnIndex); - - // printf(" offset: %d , threadIdx: %d, blockIdx.y %d\n ", column.offset, threadIdx.x, blockIdx.y); - - // x-coordinate of the first pixel to be loaded by given loader - const int firstX = blockIdx.x * WIN_SIZE_X + columnIndex; - - if(blockIdx.y == 0) { - // topmost block - apply mirroring rules when loading first 7 rows - column.loader.init(sizeX, sizeY, firstX, firstY); - - // load pixels in mirrored way - buffer[column.offset + 4 * STRIDE] = column.loader.loadFrom(input); - buffer[column.offset + 3 * STRIDE] = - buffer[column.offset + 5 * STRIDE] = column.loader.loadFrom(input); - buffer[column.offset + 2 * STRIDE] = - buffer[column.offset + 6 * STRIDE] = column.loader.loadFrom(input); - buffer[column.offset + 1 * STRIDE] = column.loader.loadFrom(input); - buffer[column.offset + 0 * STRIDE] = column.loader.loadFrom(input); - - // reinitialize loader to start with pixel #3 again - column.loader.init(sizeX, sizeY, firstX, firstY + 3); - - } else { - // non-topmost row - regular loading: - column.loader.init(sizeX, sizeY, firstX, firstY - 4); - - // load 7 rows into the transform buffer - for(int i = 0; i < 7; i++) { - buffer[column.offset + i * STRIDE] = column.loader.loadFrom(input); - - } - } - // Now, the next pixel, which will be loaded by loader, is pixel #3. - } - - - /// Loads another WIN_SIZE_Y pixels into given column using given loader. - /// @tparam CHECKED true if loader should check for image boundaries - /// @param input input image to load from - /// @param column loader and offset of loaded column in shared buffer - template - inline __device__ void loadWindowIntoColumn(const float * const input, - FDWT97ColumnLoadingInfo & column) { - for(int i = 7; i < (7 + WIN_SIZE_Y); i++) { - buffer[column.offset + i * STRIDE] = column.loader.loadFrom(input); - } - } - - - /// Main GPU 9/7 FDWT entry point. - /// @tparam CHECK_LOADS true if boundaries should be checked when loading - /// @tparam CHECK_WRITES true if boundaries should be checked when writing - /// @param in input image - /// @param out output buffer - /// @param sizeX width of the input image - /// @param sizeY height of the input image - /// @param winSteps number of steps of sliding window - template - __device__ void transform(const float * const in, float * const out, - const int sizeX, const int sizeY, - const int winSteps) { - // info about columns loaded by this thread: one main column and possibly - // one boundary column. (Only some threads load some boundary column.) - FDWT97ColumnLoadingInfo loadedColumn; - FDWT97ColumnLoadingInfo boundaryColumn; - - // Initialize first 7 lines of transform buffer. - const int firstY = blockIdx.y * WIN_SIZE_Y * winSteps; - initColumn(loadedColumn, threadIdx.x, in, sizeX, sizeY, firstY); - - // Some threads initialize boundary columns. - boundaryColumn.offset = 0; - boundaryColumn.loader.clear(); - if(threadIdx.x < 7) { - // each thread among first 7 ones gets index of one of boundary columns - const int colId = threadIdx.x + ((threadIdx.x < 3) ? WIN_SIZE_X : -7); - - // Thread initializes offset of the boundary column (in shared buffer), - // first 7 pixels of the column and a loader for this column. - initColumn(boundaryColumn, colId, in, sizeX, sizeY, firstY); - } - - // horizontally transform first 7 rows in all columns - horizontalFDWT97(7, 0); - - // Index of column handled by this thread. (First half of threads handle - // even columns and others handle odd columns.) - const int outColumnIndex = parityIdx(); - - // writer of output linear bands - initialize it - const int firstX = blockIdx.x * WIN_SIZE_X + outColumnIndex; - VerticalDWTBandWriter writer; - writer.init(sizeX, sizeY, firstX, firstY); - - // transform buffer offset of column transformed and saved by this thread - const int outColumnOffset = buffer.getColumnOffset(outColumnIndex); - - // (Each iteration of this loop assumes that first 7 rows of transform - // buffer are already loaded with horizontally transformed coefficients.) - for(int w = 0; w < winSteps; w++) { - // Load another WIN_SIZE_Y lines of thread's column into the buffer. - loadWindowIntoColumn(in, loadedColumn); - - // some threads also load boundary columns - if(threadIdx.x < 7) { - loadWindowIntoColumn(in, boundaryColumn); - } - - // horizontally transform all newly loaded lines - horizontalFDWT97(WIN_SIZE_Y, 7); - - // Using 7 registers, remember current values of last 7 rows of - // transform buffer. These rows are transformed horizontally only - // and will be used in next iteration. - float last7Lines[7]; - for(int i = 0; i < 7; i++) { - last7Lines[i] = buffer[outColumnOffset + (WIN_SIZE_Y + i) * STRIDE]; - } - - // vertically transform all central columns (do not scale yet) - buffer.forEachVerticalOdd(outColumnOffset, AddScaledSum(f97Predict1)); - buffer.forEachVerticalEven(outColumnOffset, AddScaledSum(f97Update1)); - buffer.forEachVerticalOdd(outColumnOffset, AddScaledSum(f97Predict2)); - buffer.forEachVerticalEven(outColumnOffset, AddScaledSum(f97Update2)); - - // Save all results of current window. Results are in transform buffer - // at rows from #4 to #(4 + WIN_SIZE_Y). Other rows are invalid now. - // (They only served as a boundary for vertical FDWT.) - - for(int i = 4; i < (4 + WIN_SIZE_Y); i += 2) { - const int index = outColumnOffset + i * STRIDE; - // Write low coefficients from column into low band ... - writer.writeLowInto(out, buffer[index] * scale97Div); - // ... and high coeficients into the high band. - writer.writeHighInto(out, buffer[index + STRIDE] * scale97Mul); - } - - // Use last 7 remembered lines as first 7 lines for next iteration. - // As expected, these lines are already horizontally transformed. - for(int i = 0; i < 7; i++) { - buffer[outColumnOffset + i * STRIDE] = last7Lines[i]; - - } - - // Wait for all writing threads before proceeding to loading new - // pixels in next iteration. (Not to overwrite those which - // are not written yet.) - __syncthreads(); - } - - } - - - public: - /// Runs one of specialized variants of 9/7 FDWT according to distance of - /// processed pixels to image boudnary. Some variants do not check for - /// boudnary and thus are slightly faster. - /// @param in input image - /// @param out output buffer - /// @param sx width of the input image - /// @param sy height of the input image - /// @param steps number of steps of sliding window - __device__ static void run(const float * const input, float * const output, - const int sx, const int sy, const int steps) { - // object with transform buffer in shared memory - __shared__ FDWT97 fdwt97; - - // Compute limits of this threadblock's block of pixels and use them to - // determine, whether this threadblock will have to deal with boundary. - // (3 in next expressions is for radius of impulse response of 9/7 FDWT.) - const int maxX = (blockIdx.x + 1) * WIN_SIZE_X + 3; - const int maxY = (blockIdx.y + 1) * WIN_SIZE_Y * steps + 3; - const bool atRightBoudary = maxX >= sx; - const bool atBottomBoudary = maxY >= sy; - - // Select specialized version of code according to distance of this - // threadblock's pixels from image boundary. - if(atBottomBoudary) { - // near bottom boundary => check both writing and reading - // printf("\n atBottomBoudary \n "); - fdwt97.transform(input, output, sx, sy, steps); - } else if(atRightBoudary) { - - // near right boundary only => check writing only - fdwt97.transform(input, output, sx, sy, steps); - } else { - - // no nearby boundary => check nothing - fdwt97.transform(input, output, sx, sy, steps); - } - } - - }; // end of class FDWT97 - - - - /// Main GPU 9/7 FDWT entry point. - /// @param input input image - /// @parma output output buffer - /// @param sx width of the input image - /// @param sy height of the input image - /// @param steps number of steps of sliding window - template - __launch_bounds__(WIN_SX, CTMIN(SHM_SIZE/sizeof(FDWT97), 8)) - __global__ void fdwt97Kernel(const float * const input, float * const output, - const int sx, const int sy, const int steps) { - // Excuse me, dear reader of this code - this call have to be here. If you - // try to simply put contents of following method right here, CUDA compiler - // (version 3.2) will spit tons of nonsense messy errors ... - // Hope they will not break it even more in future releases. - FDWT97::run(input, output, sx, sy, steps); - } - - - - /// Only computes optimal number of sliding window steps, - /// number of threadblocks and then lanches the 9/7 FDWT kernel. - /// @tparam WIN_SX width of sliding window - /// @tparam WIN_SY height of sliding window - /// @param in input image - /// @param out output buffer - /// @param sx width of the input image - /// @param sy height of the input image - template - void launchFDWT97Kernel (float * in, float * out, int sx, int sy) { - // compute optimal number of steps of each sliding window - const int steps = divRndUp(sy, 15 * WIN_SY); - - // prepare grid size - dim3 gSize(divRndUp(sx, WIN_SX), divRndUp(sy, WIN_SY * steps)); - printf("\n globalx=%d, globaly=%d, blocksize=%d\n", gSize.x, gSize.y, WIN_SX); - - // run kernel, possibly measure time and finally check the call - PERF_BEGIN - fdwt97Kernel<<>>(in, out, sx, sy, steps); - PERF_END(" FDWT97", sx, sy) - CudaDWTTester::checkLastKernelCall("FDWT 9/7 kernel"); - } - - - - /// Forward 9/7 2D DWT. See common rules (dwt.h) for more details. - /// @param in Input DWT coefficients. Should be normalized (in range - /// [-0.5, 0.5]). Will not be preserved (will be overwritten). - /// @param out output buffer on GPU - format specified in common rules - /// @param sizeX width of input image (in pixels) - /// @param sizeY height of input image (in pixels) - /// @param levels number of recursive DWT levels - void fdwt97(float * in, float * out, int sizeX, int sizeY, int levels) { - // select right width of kernel for the size of the image - if(sizeX >= 960) { - launchFDWT97Kernel<192, 8>(in, out, sizeX, sizeY); - } else if (sizeX >= 480) { - launchFDWT97Kernel<128, 6>(in, out, sizeX, sizeY); - } else { - launchFDWT97Kernel<64, 6>(in, out, sizeX, sizeY); - } - - // if this was not the last level, continue recursively with other levels - if(levels > 1) { - // copy output's LL band back into input buffer - const int llSizeX = divRndUp(sizeX, 2); - const int llSizeY = divRndUp(sizeY, 2); - memCopy(in, out, llSizeX, llSizeY); - - // run remaining levels of FDWT - fdwt97(in, out, llSizeX, llSizeY, levels - 1); - } - } - - - -} // end of namespace dwt_cuda diff --git a/examples/dwt2d/dwt_cuda/io.h b/examples/dwt2d/dwt_cuda/io.h deleted file mode 100644 index ae57ffc..0000000 --- a/examples/dwt2d/dwt_cuda/io.h +++ /dev/null @@ -1,440 +0,0 @@ -/// -/// @file: io.h -/// @brief Manages loading and saving lineary stored bands and input images. -/// @author Martin Jirman (207962@mail.muni.cz) -/// @date 2011-01-20 22:38 -/// -/// -/// Copyright (c) 2011 Martin Jirman -/// All rights reserved. -/// -/// Redistribution and use in source and binary forms, with or without -/// modification, are permitted provided that the following conditions are met: -/// -/// * Redistributions of source code must retain the above copyright -/// notice, this list of conditions and the following disclaimer. -/// * Redistributions in binary form must reproduce the above copyright -/// notice, this list of conditions and the following disclaimer in the -/// documentation and/or other materials provided with the distribution. -/// -/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -/// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -/// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -/// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -/// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -/// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -/// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -/// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -/// POSSIBILITY OF SUCH DAMAGE. -/// - -#ifndef IO_H -#define IO_H - -#include "common.h" - -namespace dwt_cuda { - -/// Base for all IO classes - manages mirroring. -class DWTIO { -protected: - /// Handles mirroring of image at edges in a DWT correct way. - /// @param d a position in the image (will be replaced by mirrored d) - /// @param sizeD size of the image along the dimension of 'd' - __device__ static void mirror(int &d, const int &sizeD) { - // TODO: enable multiple mirroring: - // if(sizeD > 1) { - // if(d < 0) { - // const int underflow = -1 - d; - // const int phase = (underflow / (sizeD - 1)) & 1; - // const int remainder = underflow % (sizeD - 1); - // if(phase == 0) { - // d = remainder + 1; - // } else { - // d = sizeD - 2 - remainder; - // } - // } else if(d >= sizeD) { - // const int overflow = d - sizeD; - // const int phase = (overflow / (sizeD - 1)) & 1; - // const int remainder = overflow % (sizeD - 1); - // if(phase == 0) { - // d = sizeD - 2 - remainder; - // } else { - // d = remainder + 1; - // } - // } - // } else { - // d = 0; - // } - // for test the mirror's use Feb 17 - if (d >= sizeD) { - d = 2 * sizeD - 2 - d; - } else if (d < 0) { - d = -d; - } - } -}; - -/// Base class for pixel loader and writer - manages computing start index, -/// stride and end of image for loading column of pixels. -/// @tparam T type of image pixels -/// @tparam CHECKED true = be prepared to image boundary, false = don't care -template class VerticalDWTPixelIO : protected DWTIO { -protected: - int end; ///< index of bottom neightbor of last pixel of column - int stride; ///< increment of pointer to get to next pixel - - /// Initializes pixel IO - sets end index and a position of first pixel. - /// @param sizeX width of the image - /// @param sizeY height of the image - /// @param firstX x-coordinate of first pixel to use - /// @param firstY y-coordinate of first pixel to use - /// @return index of pixel at position [x, y] in the image - __device__ int initialize(const int sizeX, const int sizeY, int firstX, - int firstY) { - // initialize all pointers and stride - end = CHECKED ? (sizeY * sizeX + firstX) : 0; - stride = sizeX; - return firstX + sizeX * firstY; - } -}; - -/// Writes reverse transformed pixels directly into output image. -/// @tparam T type of output pixels -/// @tparam CHECKED true = be prepared to image boundary, false = don't care -template -class VerticalDWTPixelWriter : VerticalDWTPixelIO { -private: - int next; // index of the next pixel to be loaded - -public: - /// Initializes writer - sets output buffer and a position of first pixel. - /// @param sizeX width of the image - /// @param sizeY height of the image - /// @param firstX x-coordinate of first pixel to write into - /// @param firstY y-coordinate of first pixel to write into - __device__ void init(const int sizeX, const int sizeY, int firstX, - int firstY) { - if (firstX < sizeX) { - next = this->initialize(sizeX, sizeY, firstX, firstY); - } else { - this->end = 0; - this->stride = 0; - next = 0; - } - } - - /// Writes given value at next position and advances internal pointer while - /// correctly handling mirroring. - /// @param output output image to write pixel into - /// @param value value of the pixel to be written - __device__ void writeInto(T *const output, const T &value) { - if ((!CHECKED) || (next != this->end)) { - output[next] = value; - next += this->stride; - } - } -}; - -/// Loads pixels from input image. -/// @tparam T type of image input pixels -/// @tparam CHECKED true = be prepared to image boundary, false = don't care -template -class VerticalDWTPixelLoader : protected VerticalDWTPixelIO { -private: - int last; ///< index of last loaded pixel -public: - //******************* FOR TEST ********************** - __device__ int getlast() { return last; } - __device__ int getend() { return this->end; } - __device__ int getstride() { return this->stride; } - __device__ void setend(int a) { this->end = a; } - //******************* FOR TEST ********************** - - /// Initializes loader - sets input size and a position of first pixel. - /// @param sizeX width of the image - /// @param sizeY height of the image - /// @param firstX x-coordinate of first pixel to load - /// @param firstY y-coordinate of first pixel to load - __device__ void init(const int sizeX, const int sizeY, int firstX, - int firstY) { - // correctly mirror x coordinate - this->mirror(firstX, sizeX); - - // 'last' always points to already loaded pixel (subtract sizeX = stride) - last = this->initialize(sizeX, sizeY, firstX, firstY) - sizeX; - // last = (FirstX + sizeX * FirstY) - sizeX - } - - /// Sets all fields to zeros, for compiler not to complain about - /// uninitialized stuff. - __device__ void clear() { - this->end = 0; - this->stride = 0; - this->last = 0; - } - - /// Gets another pixel and advancees internal pointer to following one. - /// @param input input image to load next pixel from - /// @return next pixel from given image - __device__ T loadFrom(const T *const input) { - last += this->stride; - if (CHECKED && (last == this->end)) { - last -= 2 * this->stride; - this->stride = -this->stride; // reverse loader's direction - } - // avoid reading from negative indices if loader is checked - // return (CHECKED && (last < 0)) ? 0 : input[last]; // TODO: use this - // checked variant later - if (last < 0) { - return 0; - } - - return input[last]; - // return this->end; - // return last; - // return this->stride; - } -}; - -/// Base for band write and loader. Manages computing strides and pointers -/// to first and last pixels in a linearly-stored-bands correct way. -/// @tparam T type of band coefficients -/// @tparam CHECKED true = be prepared to image boundary, false = don't care -template class VerticalDWTBandIO : protected DWTIO { -protected: - /// index of bottom neighbor of last pixel of loaded column - int end; - - /// increment of index to get from highpass band to the lowpass one - int strideHighToLow; - - /// increment of index to get from the lowpass band to the highpass one - int strideLowToHigh; - - /// Initializes IO - sets size of image and a position of first pixel. - /// @param imageSizeX width of the image - /// @param imageSizeY height of the image - /// @param firstX x-coordinate of first pixel to use - /// (Parity determines vertically low or high band.) - /// @param firstY y-coordinate of first pixel to use - /// (Parity determines horizontally low or high band.) - /// @return index of first item specified by firstX and firstY - __device__ int initialize(const int imageSizeX, const int imageSizeY, - int firstX, int firstY) { - // index of first pixel (topmost one) of the column with index firstX - int columnOffset = firstX / 2; - - // difference between indices of two vertically neighboring pixels - // in the same band - int verticalStride; - - // resolve index of first pixel according to horizontal parity - if (firstX & 1) { - // first pixel in one of right bands - verticalStride = imageSizeX / 2; - columnOffset += divRndUp(imageSizeX, 2) * divRndUp(imageSizeY, 2); - strideLowToHigh = (imageSizeX * imageSizeY) / 2; - } else { - // first pixel in one of left bands - verticalStride = imageSizeX / 2 + (imageSizeX & 1); - strideLowToHigh = divRndUp(imageSizeY, 2) * imageSizeX; - } - - // set the other stride - strideHighToLow = verticalStride - strideLowToHigh; - - // compute index of coefficient which indicates end of image - if (CHECKED) { - end = columnOffset // right column - + (imageSizeY / 2) * verticalStride // right row - + (imageSizeY & 1) * strideLowToHigh; // possibly in high band - } else { - end = 0; - } - - //***********for test************** - // end = CHECKED; - //***********for test************** - - // finally, return index of the first item - return columnOffset // right column - + (firstY / 2) * verticalStride // right row - + (firstY & 1) * strideLowToHigh; // possibly in high band - } -}; - -/// Directly loads coefficients from four consecutively stored transformed -/// bands. -/// @tparam T type of input band coefficients -/// @tparam CHECKED true = be prepared to image boundary, false = don't care -template -class VerticalDWTBandLoader : public VerticalDWTBandIO { -private: - int last; ///< index of last loaded pixel - - /// Checks internal index and possibly reverses direction of loader. - /// (Handles mirroring at the bottom of the image.) - /// @param input input image to load next coefficient from - /// @param stride stride to use now (one of two loader's strides) - /// @return loaded coefficient - __device__ T updateAndLoad(const T *const input, const int &stride) { - last += stride; - if (CHECKED && (last == this->end)) { - // undo last two updates of index (to get to previous mirrored item) - last -= (this->strideLowToHigh + this->strideHighToLow); - - // swap and reverse strides (to move up in the loaded column now) - const int temp = this->strideLowToHigh; - this->strideLowToHigh = -this->strideHighToLow; - this->strideHighToLow = -temp; - } - if (last < 0) { - return 0; - } - // avoid reading from negative indices if loader is checked - // return (CHECKED && (last < 0)) ? 0 : input[last]; // TODO: use this - // checked variant later - return input[last]; - } - -public: - /// Initializes loader - sets input size and a position of first pixel. - /// @param imageSizeX width of the image - /// @param imageSizeY height of the image - /// @param firstX x-coordinate of first pixel to load - /// (Parity determines vertically low or high band.) - /// @param firstY y-coordinate of first pixel to load - /// (Parity determines horizontally low or high band.) - __device__ void init(const int imageSizeX, const int imageSizeY, int firstX, - const int firstY) { - this->mirror(firstX, imageSizeX); - last = this->initialize(imageSizeX, imageSizeY, firstX, firstY); - - // adjust to point to previous item - last -= (firstY & 1) ? this->strideLowToHigh : this->strideHighToLow; - } - - /// Sets all fields to zeros, for compiler not to complain about - /// uninitialized stuff. - __device__ void clear() { - this->end = 0; - this->strideHighToLow = 0; - this->strideLowToHigh = 0; - this->last = 0; - } - - /// Gets another coefficient from lowpass band and advances internal index. - /// Call this method first if position of first pixel passed to init - /// was in high band. - /// @param input input image to load next coefficient from - /// @return next coefficient from the lowpass band of the given image - __device__ T loadLowFrom(const T *const input) { - return updateAndLoad(input, this->strideHighToLow); - } - - /// Gets another coefficient from the highpass band and advances index. - /// Call this method first if position of first pixel passed to init - /// was in high band. - /// @param input input image to load next coefficient from - /// @return next coefficient from the highbass band of the given image - __device__ T loadHighFrom(const T *const input) { - return updateAndLoad(input, this->strideLowToHigh); - } -}; - -/// Directly saves coefficients into four transformed bands. -/// @tparam T type of output band coefficients -/// @tparam CHECKED true = be prepared to image boundary, false = don't care -template -class VerticalDWTBandWriter : public VerticalDWTBandIO { -private: - int next; ///< index of last loaded pixel - - /// Checks internal index and possibly stops the writer. - /// (Handles mirroring at edges of the image.) - /// @param output output buffer - /// @param item item to put into the output - /// @param stride increment of the pointer to get to next output index - __device__ int saveAndUpdate(T *const output, const T &item, - const int &stride) { - // if(blockIdx.x == 0 && blockIdx.y == 11 && threadIdx.x == 0){ - ////test, Mar 20 - if ((!CHECKED) || (next != this->end)) { - // if(next == 4) { - // printf(" next: %d stride: %d val: %f \n", next, stride, item ); - // } - output[next] = item; - next += stride; - } - // } - // if((!CHECKED) || (next != this->end)) { //the real one - // output[next] = item; - // next += stride; //stride has been test - // } - return next; - } - -public: - /// Initializes writer - sets output size and a position of first pixel. - /// @param output output image - /// @param imageSizeX width of the image - /// @param imageSizeY height of the image - /// @param firstX x-coordinate of first pixel to write - /// (Parity determines vertically low or high band.) - /// @param firstY y-coordinate of first pixel to write - /// (Parity determines horizontally low or high band.) - __device__ void init(const int imageSizeX, const int imageSizeY, - const int firstX, const int firstY) { - if (firstX < imageSizeX) { - next = this->initialize(imageSizeX, imageSizeY, firstX, firstY); - } else { - clear(); - } - } - - /// Sets all fields to zeros, for compiler not to complain about - /// uninitialized stuff. - __device__ void clear() { - this->end = 0; - this->strideHighToLow = 0; - this->strideLowToHigh = 0; - this->next = 0; - } - - /// Writes another coefficient into the band which was specified using - /// init's firstX and firstY parameters and advances internal pointer. - /// Call this method first if position of first pixel passed to init - /// was in lowpass band. - /// @param output output image - /// @param low lowpass coefficient to save into the lowpass band - __device__ int writeLowInto(T *const output, const T &primary) { - return saveAndUpdate(output, primary, this->strideLowToHigh); - } - - /// Writes another coefficient from the other band and advances pointer. - /// Call this method first if position of first pixel passed to init - /// was in highpass band. - /// @param output output image - /// @param high highpass coefficient to save into the highpass band - __device__ int writeHighInto(T *const output, const T &other) { - return saveAndUpdate(output, other, this->strideHighToLow); - } - - //*******Add three functions to get private values******* - __device__ int getnext() { return next; } - - __device__ int getend() { return this->end; } - - __device__ int getstrideHighToLow() { return this->strideHighToLow; } - - __device__ int getstrideLowToHigh() { return this->strideLowToHigh; } - - //*******Add three functions to get private values******* -}; - -} // namespace dwt_cuda - -#endif // IO_H diff --git a/examples/dwt2d/dwt_cuda/rdwt53.cu b/examples/dwt2d/dwt_cuda/rdwt53.cu deleted file mode 100755 index 7cdaaf0..0000000 --- a/examples/dwt2d/dwt_cuda/rdwt53.cu +++ /dev/null @@ -1,360 +0,0 @@ -/// -/// @file rdwt53.cu -/// @brief CUDA implementation of reverse 5/3 2D DWT. -/// @author Martin Jirman (207962@mail.muni.cz) -/// @date 2011-02-04 14:19 -/// -/// -/// Copyright (c) 2011 Martin Jirman -/// All rights reserved. -/// -/// Redistribution and use in source and binary forms, with or without -/// modification, are permitted provided that the following conditions are met: -/// -/// * Redistributions of source code must retain the above copyright -/// notice, this list of conditions and the following disclaimer. -/// * Redistributions in binary form must reproduce the above copyright -/// notice, this list of conditions and the following disclaimer in the -/// documentation and/or other materials provided with the distribution. -/// -/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -/// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -/// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -/// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -/// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -/// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -/// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -/// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -/// POSSIBILITY OF SUCH DAMAGE. -/// - - -#include "common.h" -#include "transform_buffer.h" -#include "io.h" - - -namespace dwt_cuda { - - - - /// Wraps shared momory buffer and algorithms needed for computing 5/3 RDWT - /// using sliding window and lifting schema. - /// @tparam WIN_SIZE_X width of sliding window - /// @tparam WIN_SIZE_Y height of sliding window - template - class RDWT53 { - private: - - /// Shared memory buffer used for 5/3 DWT transforms. - typedef TransformBuffer RDWT53Buffer; - - /// Shared buffer used for reverse 5/3 DWT. - RDWT53Buffer buffer; - - /// Difference between indices of two vertically neighboring items in buffer. - enum { STRIDE = RDWT53Buffer::VERTICAL_STRIDE }; - - - /// Info needed for loading of one input column from input image. - /// @tparam CHECKED true if loader should check boundaries - template - struct RDWT53Column { - /// loader of pixels from column in input image - VerticalDWTBandLoader loader; - - /// Offset of corresponding column in shared buffer. - int offset; - - /// Sets all fields to some values to avoid 'uninitialized' warnings. - __device__ void clear() { - offset = 0; - loader.clear(); - } - }; - - - /// 5/3 DWT reverse update operation. - struct Reverse53Update { - __device__ void operator() (const int p, int & c, const int n) const { - c -= (p + n + 2) / 4; // F.3, page 118, ITU-T Rec. T.800 final draft - } - }; - - - /// 5/3 DWT reverse predict operation. - struct Reverse53Predict { - __device__ void operator() (const int p, int & c, const int n) const { - c += (p + n) / 2; // F.4, page 118, ITU-T Rec. T.800 final draft - } - }; - - - /// Horizontal 5/3 RDWT on specified lines of transform buffer. - /// @param lines number of lines to be transformed - /// @param firstLine index of the first line to be transformed - __device__ void horizontalTransform(const int lines, const int firstLine) { - __syncthreads(); - buffer.forEachHorizontalEven(firstLine, lines, Reverse53Update()); - __syncthreads(); - buffer.forEachHorizontalOdd(firstLine, lines, Reverse53Predict()); - __syncthreads(); - } - - - /// Using given loader, it loads another WIN_SIZE_Y coefficients - /// into specified column. - /// @tparam CHECKED true if loader should check image boundaries - /// @param input input coefficients to load from - /// @param col info about loaded column - template - inline __device__ void loadWindowIntoColumn(const int * const input, - RDWT53Column & col) { - for(int i = 3; i < (3 + WIN_SIZE_Y); i += 2) { - buffer[col.offset + i * STRIDE] = col.loader.loadLowFrom(input); - buffer[col.offset + (i + 1) * STRIDE] = col.loader.loadHighFrom(input); - } - } - - - /// Initializes one column of shared transform buffer with 7 input pixels. - /// Those 7 pixels will not be transformed. Also initializes given loader. - /// @tparam CHECKED true if loader should check image boundaries - /// @param columnX x coordinate of column in shared transform buffer - /// @param input input image - /// @param sizeX width of the input image - /// @param sizeY height of the input image - /// @param loader (uninitialized) info about loaded column - template - __device__ void initColumn(const int columnX, const int * const input, - const int sizeX, const int sizeY, - RDWT53Column & column, - const int firstY) { - // coordinates of the first coefficient to be loaded - const int firstX = blockIdx.x * WIN_SIZE_X + columnX; - - // offset of the column with index 'colIndex' in the transform buffer - column.offset = buffer.getColumnOffset(columnX); - - if(blockIdx.y == 0) { - // topmost block - apply mirroring rules when loading first 3 rows - column.loader.init(sizeX, sizeY, firstX, firstY); - - // load pixels in mirrored way - buffer[column.offset + 1 * STRIDE] = column.loader.loadLowFrom(input); - buffer[column.offset + 0 * STRIDE] = - buffer[column.offset + 2 * STRIDE] = column.loader.loadHighFrom(input); - } else { - // non-topmost row - regular loading: - column.loader.init(sizeX, sizeY, firstX, firstY - 1); - buffer[column.offset + 0 * STRIDE] = column.loader.loadHighFrom(input); - buffer[column.offset + 1 * STRIDE] = column.loader.loadLowFrom(input); - buffer[column.offset + 2 * STRIDE] = column.loader.loadHighFrom(input); - } - // Now, the next coefficient, which will be loaded by loader, is #2. - } - - - /// Actual GPU 5/3 RDWT implementation. - /// @tparam CHECKED_LOADS true if boundaries must be checked when reading - /// @tparam CHECKED_WRITES true if boundaries must be checked when writing - /// @param in input image (5/3 transformed coefficients) - /// @param out output buffer (for reverse transformed image) - /// @param sizeX width of the output image - /// @param sizeY height of the output image - /// @param winSteps number of sliding window steps - template - __device__ void transform(const int * const in, int * const out, - const int sizeX, const int sizeY, - const int winSteps) { - // info about one main and one boundary column - RDWT53Column column, boundaryColumn; - - // index of first row to be transformed - const int firstY = blockIdx.y * WIN_SIZE_Y * winSteps; - - // some threads initialize boundary columns - boundaryColumn.clear(); - if(threadIdx.x < 3) { - // First 3 threads also handle boundary columns. Thread #0 gets right - // column #0, thread #1 get right column #1 and thread #2 left column. - const int colId = threadIdx.x + ((threadIdx.x != 2) ? WIN_SIZE_X : -3); - - // Thread initializes offset of the boundary column (in shared - // buffer), first 3 pixels of the column and a loader for this column. - initColumn(colId, in, sizeX, sizeY, boundaryColumn, firstY); - } - - // All threads initialize central columns. - initColumn(parityIdx(), in, sizeX, sizeY, column, firstY); - - // horizontally transform first 3 rows - horizontalTransform(3, 0); - - // writer of output pixels - initialize it - const int outX = blockIdx.x * WIN_SIZE_X + threadIdx.x; - VerticalDWTPixelWriter writer; - writer.init(sizeX, sizeY, outX, firstY); - - // offset of column (in transform buffer) saved by this thread - const int outputColumnOffset = buffer.getColumnOffset(threadIdx.x); - - // (Each iteration assumes that first 3 rows of transform buffer are - // already loaded with horizontally transformed pixels.) - for(int w = 0; w < winSteps; w++) { - // Load another WIN_SIZE_Y lines of this thread's column - // into the transform buffer. - loadWindowIntoColumn(in, column); - - // possibly load boundary columns - if(threadIdx.x < 3) { - loadWindowIntoColumn(in, boundaryColumn); - } - - // horizontally transform all newly loaded lines - horizontalTransform(WIN_SIZE_Y, 3); - - // Using 3 registers, remember current values of last 3 rows - // of transform buffer. These rows are transformed horizontally - // only and will be used in next iteration. - int last3Lines[3]; - last3Lines[0] = buffer[outputColumnOffset + (WIN_SIZE_Y + 0) * STRIDE]; - last3Lines[1] = buffer[outputColumnOffset + (WIN_SIZE_Y + 1) * STRIDE]; - last3Lines[2] = buffer[outputColumnOffset + (WIN_SIZE_Y + 2) * STRIDE]; - - // vertically transform all central columns - buffer.forEachVerticalOdd(outputColumnOffset, Reverse53Update()); - buffer.forEachVerticalEven(outputColumnOffset, Reverse53Predict()); - - // Save all results of current window. Results are in transform buffer - // at rows from #1 to #(1 + WIN_SIZE_Y). Other rows are invalid now. - // (They only served as a boundary for vertical RDWT.) - for(int i = 1; i < (1 + WIN_SIZE_Y); i++) { - writer.writeInto(out, buffer[outputColumnOffset + i * STRIDE]); - } - - // Use last 3 remembered lines as first 3 lines for next iteration. - // As expected, these lines are already horizontally transformed. - buffer[outputColumnOffset + 0 * STRIDE] = last3Lines[0]; - buffer[outputColumnOffset + 1 * STRIDE] = last3Lines[1]; - buffer[outputColumnOffset + 2 * STRIDE] = last3Lines[2]; - - // Wait for all writing threads before proceeding to loading new - // coeficients in next iteration. (Not to overwrite those which - // are not written yet.) - __syncthreads(); - } - } - - - public: - /// Main GPU 5/3 RDWT entry point. - /// @param in input image (5/3 transformed coefficients) - /// @param out output buffer (for reverse transformed image) - /// @param sizeX width of the output image - /// @param sizeY height of the output image - /// @param winSteps number of sliding window steps - __device__ static void run(const int * const input, int * const output, - const int sx, const int sy, const int steps) { - // prepare instance with buffer in shared memory - __shared__ RDWT53 rdwt53; - - // Compute limits of this threadblock's block of pixels and use them to - // determine, whether this threadblock will have to deal with boundary. - // (1 in next expressions is for radius of impulse response of 5/3 RDWT.) - const int maxX = (blockIdx.x + 1) * WIN_SIZE_X + 1; - const int maxY = (blockIdx.y + 1) * WIN_SIZE_Y * steps + 1; - const bool atRightBoudary = maxX >= sx; - const bool atBottomBoudary = maxY >= sy; - - // Select specialized version of code according to distance of this - // threadblock's pixels from image boundary. - if(atBottomBoudary) { - // near bottom boundary => check both writing and reading - rdwt53.transform(input, output, sx, sy, steps); - } else if(atRightBoudary) { - // near right boundary only => check writing only - rdwt53.transform(input, output, sx, sy, steps); - } else { - // no nearby boundary => check nothing - rdwt53.transform(input, output, sx, sy, steps); - } - } - - }; // end of class RDWT53 - - - - /// Main GPU 5/3 RDWT entry point. - /// @param in input image (5/3 transformed coefficients) - /// @param out output buffer (for reverse transformed image) - /// @param sizeX width of the output image - /// @param sizeY height of the output image - /// @param winSteps number of sliding window steps - template - __launch_bounds__(WIN_SX, CTMIN(SHM_SIZE/sizeof(RDWT53), 8)) - __global__ void rdwt53Kernel(const int * const in, int * const out, - const int sx, const int sy, const int steps) { - RDWT53::run(in, out, sx, sy, steps); - } - - - - /// Only computes optimal number of sliding window steps, - /// number of threadblocks and then lanches the 5/3 RDWT kernel. - /// @tparam WIN_SX width of sliding window - /// @tparam WIN_SY height of sliding window - /// @param in input image - /// @param out output buffer - /// @param sx width of the input image - /// @param sy height of the input image - template - void launchRDWT53Kernel (int * in, int * out, const int sx, const int sy) { - // compute optimal number of steps of each sliding window - const int steps = divRndUp(sy, 15 * WIN_SY); - - // prepare grid size - dim3 gSize(divRndUp(sx, WIN_SX), divRndUp(sy, WIN_SY * steps)); - - // finally transform this level - PERF_BEGIN - rdwt53Kernel<<>>(in, out, sx, sy, steps); - PERF_END(" RDWT53", sx, sy) - CudaDWTTester::checkLastKernelCall("RDWT 5/3 kernel"); - } - - - - /// Reverse 5/3 2D DWT. See common rules (above) for more details. - /// @param in Input DWT coefficients. Format described in common rules. - /// Will not be preserved (will be overwritten). - /// @param out output buffer on GPU - will contain original image - /// in normalized range [-128, 127]. - /// @param sizeX width of input image (in pixels) - /// @param sizeY height of input image (in pixels) - /// @param levels number of recursive DWT levels - void rdwt53(int * in, int * out, int sizeX, int sizeY, int levels) { - if(levels > 1) { - // let this function recursively reverse transform deeper levels first - const int llSizeX = divRndUp(sizeX, 2); - const int llSizeY = divRndUp(sizeY, 2); - rdwt53(in, out, llSizeX, llSizeY, levels - 1); - - // copy reverse transformed LL band from output back into the input - memCopy(in, out, llSizeX, llSizeY); - } - - // select right width of kernel for the size of the image - if(sizeX >= 960) { - launchRDWT53Kernel<192, 8>(in, out, sizeX, sizeY); - } else if (sizeX >= 480) { - launchRDWT53Kernel<128, 8>(in, out, sizeX, sizeY); - } else { - launchRDWT53Kernel<64, 8>(in, out, sizeX, sizeY); - } - } - - -} // end of namespace dwt_cuda diff --git a/examples/dwt2d/dwt_cuda/rdwt97.cu b/examples/dwt2d/dwt_cuda/rdwt97.cu deleted file mode 100755 index 40c5221..0000000 --- a/examples/dwt2d/dwt_cuda/rdwt97.cu +++ /dev/null @@ -1,363 +0,0 @@ -/// -/// @file rdwt97.cu -/// @brief CUDA implementation of reverse 9/7 2D DWT. -/// @author Martin Jirman (207962@mail.muni.cz) -/// @date 2011-02-03 21:59 -/// -/// -/// Copyright (c) 2011 Martin Jirman -/// All rights reserved. -/// -/// Redistribution and use in source and binary forms, with or without -/// modification, are permitted provided that the following conditions are met: -/// -/// * Redistributions of source code must retain the above copyright -/// notice, this list of conditions and the following disclaimer. -/// * Redistributions in binary form must reproduce the above copyright -/// notice, this list of conditions and the following disclaimer in the -/// documentation and/or other materials provided with the distribution. -/// -/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -/// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -/// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -/// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -/// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -/// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -/// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -/// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -/// POSSIBILITY OF SUCH DAMAGE. -/// - - -#include "common.h" -#include "transform_buffer.h" -#include "io.h" - - -namespace dwt_cuda { - - - /// Wraps shared memory buffer and methods for computing 9/7 RDWT using - /// lifting schema and sliding window. - /// @tparam WIN_SIZE_X width of the sliding window - /// @tparam WIN_SIZE_Y height of the sliding window - template - class RDWT97 { - private: - - /// Info related to loading of one input column. - /// @tparam CHECKED true if boundary chould be checked, - /// false if there is no near boudnary - template - struct RDWT97Column { - /// laoder of input pxels for given column. - VerticalDWTBandLoader loader; - - /// Offset of loaded column in shared memory buffer. - int offset; - - /// Sets all fields to some values to avoid 'uninitialized' warnings. - __device__ void clear() { - loader.clear(); - offset = 0; - } - }; - - - /// Shared memory buffer used for 9/7 DWT transforms. - typedef TransformBuffer RDWT97Buffer; - - /// Shared buffer used for reverse 9/7 DWT. - RDWT97Buffer buffer; - - /// Difference between indices of two vertical neighbors in buffer. - enum { STRIDE = RDWT97Buffer::VERTICAL_STRIDE }; - - - /// Horizontal 9/7 RDWT on specified lines of transform buffer. - /// @param lines number of lines to be transformed - /// @param firstLine index of the first line to be transformed - __device__ void horizontalRDWT97(int lines, int firstLine) { - __syncthreads(); - buffer.scaleHorizontal(scale97Mul, scale97Div, firstLine, lines); - __syncthreads(); - buffer.forEachHorizontalEven(firstLine, lines, AddScaledSum(r97update2)); - __syncthreads(); - buffer.forEachHorizontalOdd(firstLine, lines, AddScaledSum(r97predict2)); - __syncthreads(); - buffer.forEachHorizontalEven(firstLine, lines, AddScaledSum(r97update1)); - __syncthreads(); - buffer.forEachHorizontalOdd(firstLine, lines, AddScaledSum(r97Predict1)); - __syncthreads(); - } - - - /// Initializes one column of shared transform buffer with 7 input pixels. - /// Those 7 pixels will not be transformed. Also initializes given loader. - /// @tparam CHECKED true if there are near image boundaries - /// @param colIndex index of column in shared transform buffer - /// @param input input image - /// @param sizeX width of the input image - /// @param sizeY height of the input image - /// @param column (uninitialized) info about loading one column - /// @param firstY index of first image row to be transformed - template - __device__ void initColumn(const int colIndex, const float * const input, - const int sizeX, const int sizeY, - RDWT97Column & column, - const int firstY) { - // coordinates of the first coefficient to be loaded - const int firstX = blockIdx.x * WIN_SIZE_X + colIndex; - - // offset of the column with index 'colIndex' in the transform buffer - column.offset = buffer.getColumnOffset(colIndex); - - if(blockIdx.y == 0) { - // topmost block - apply mirroring rules when loading first 7 rows - column.loader.init(sizeX, sizeY, firstX, firstY); - - // load pixels in mirrored way - buffer[column.offset + 3 * STRIDE] = column.loader.loadLowFrom(input); - buffer[column.offset + 4 * STRIDE] = - buffer[column.offset + 2 * STRIDE] = column.loader.loadHighFrom(input); - buffer[column.offset + 5 * STRIDE] = - buffer[column.offset + 1 * STRIDE] = column.loader.loadLowFrom(input); - buffer[column.offset + 6 * STRIDE] = - buffer[column.offset + 0 * STRIDE] = column.loader.loadHighFrom(input); - } else { - // non-topmost row - regular loading: - column.loader.init(sizeX, sizeY, firstX, firstY - 3); - buffer[column.offset + 0 * STRIDE] = column.loader.loadHighFrom(input); - buffer[column.offset + 1 * STRIDE] = column.loader.loadLowFrom(input); - buffer[column.offset + 2 * STRIDE] = column.loader.loadHighFrom(input); - buffer[column.offset + 3 * STRIDE] = column.loader.loadLowFrom(input); - buffer[column.offset + 4 * STRIDE] = column.loader.loadHighFrom(input); - buffer[column.offset + 5 * STRIDE] = column.loader.loadLowFrom(input); - buffer[column.offset + 6 * STRIDE] = column.loader.loadHighFrom(input); - } - // Now, the next coefficient, which will be loaded by loader, is #4. - } - - - /// Using given loader, it loads another WIN_SIZE_Y coefficients - /// into specified column. - /// @tparam CHECKED true if there are near image boundaries - /// @param col info about loaded column - /// @param input buffer with input coefficients - template - inline __device__ void loadWindowIntoColumn(RDWT97Column & col, - const float * const input) { - for(int i = 7; i < (7 + WIN_SIZE_Y); i += 2) { - buffer[col.offset + i * STRIDE] = col.loader.loadLowFrom(input); - buffer[col.offset + (i + 1) * STRIDE] = col.loader.loadHighFrom(input); - } - } - - - /// Actual GPU 9/7 RDWT sliding window lifting schema implementation. - /// @tparam CHECKED_LOADS true if loader should check boundaries - /// @tparam CHECKED_WRITES true if boundaries should be taken into account - /// when writing into output buffer - /// @param in input image (9/7 transformed coefficients) - /// @param out output buffer (for reverse transformed image) - /// @param sizeX width of the output image - /// @param sizeY height of the output image - /// @param winSteps number of steps of sliding window - template - __device__ void transform(const float * const in, float * const out, - const int sizeX, const int sizeY, - const int winSteps) { - // info about one main column and one boundary column - RDWT97Column column; - RDWT97Column boundaryColumn; - - // index of first image row to be transformed - const int firstY = blockIdx.y * WIN_SIZE_Y * winSteps; - - // initialize boundary columns - boundaryColumn.clear(); - if(threadIdx.x < 7) { - // each thread among first 7 ones gets index of one of boundary columns - const int colId = threadIdx.x + ((threadIdx.x < 4) ? WIN_SIZE_X : -7); - - // Thread initializes offset of the boundary column (in shared - // buffer), first 7 pixels of the column and a loader for this column. - initColumn(colId, in, sizeX, sizeY, boundaryColumn, firstY); - } - - // All threads initialize central columns. - initColumn(parityIdx(), in, sizeX, sizeY, column, firstY); - - // horizontally transform first 7 rows - horizontalRDWT97(7, 0); - - // writer of output pixels - initialize it - const int outputX = blockIdx.x * WIN_SIZE_X + threadIdx.x; - VerticalDWTPixelWriter writer; - writer.init(sizeX, sizeY, outputX, firstY); - - // offset of column (in transform buffer) saved by this thread - const int outColumnOffset = buffer.getColumnOffset(threadIdx.x); - - // (Each iteration assumes that first 7 rows of transform buffer are - // already loaded with horizontally transformed pixels.) - for(int w = 0; w < winSteps; w++) { - // Load another WIN_SIZE_Y lines of this thread's column - // into the transform buffer. - loadWindowIntoColumn(column, in); - - // possibly load boundary columns - if(threadIdx.x < 7) { - loadWindowIntoColumn(boundaryColumn, in); - } - - // horizontally transform all newly loaded lines - horizontalRDWT97(WIN_SIZE_Y, 7); - - // Using 7 registers, remember current values of last 7 rows - // of transform buffer. These rows are transformed horizontally - // only and will be used in next iteration. - float last7Lines[7]; - for(int i = 0; i < 7; i++) { - last7Lines[i] = buffer[outColumnOffset + (WIN_SIZE_Y + i) * STRIDE]; - } - - // vertically transform all central columns - buffer.scaleVertical(scale97Div, scale97Mul, outColumnOffset, - WIN_SIZE_Y + 7, 0); - buffer.forEachVerticalOdd(outColumnOffset, AddScaledSum(r97update2)); - buffer.forEachVerticalEven(outColumnOffset, AddScaledSum(r97predict2)); - buffer.forEachVerticalOdd(outColumnOffset, AddScaledSum(r97update1)); - buffer.forEachVerticalEven(outColumnOffset, AddScaledSum(r97Predict1)); - - // Save all results of current window. Results are in transform buffer - // at rows from #3 to #(3 + WIN_SIZE_Y). Other rows are invalid now. - // (They only served as a boundary for vertical RDWT.) - for(int i = 3; i < (3 + WIN_SIZE_Y); i++) { - writer.writeInto(out, buffer[outColumnOffset + i * STRIDE]); - } - - // Use last 7 remembered lines as first 7 lines for next iteration. - // As expected, these lines are already horizontally transformed. - for(int i = 0; i < 7; i++) { - buffer[outColumnOffset + i * STRIDE] = last7Lines[i]; - } - - // Wait for all writing threads before proceeding to loading new - // coeficients in next iteration. (Not to overwrite those which - // are not written yet.) - __syncthreads(); - } - } - - - public: - /// Main GPU 9/7 RDWT entry point. - /// @param in input image (9/7 transformed coefficients) - /// @param out output buffer (for reverse transformed image) - /// @param sizeX width of the output image - /// @param sizeY height of the output image - __device__ static void run(const float * const input, float * const output, - const int sx, const int sy, const int steps) { - // prepare instance with buffer in shared memory - __shared__ RDWT97 rdwt97; - - // Compute limits of this threadblock's block of pixels and use them to - // determine, whether this threadblock will have to deal with boundary. - // (3 in next expressions is for radius of impulse response of 9/7 RDWT.) - const int maxX = (blockIdx.x + 1) * WIN_SIZE_X + 3; - const int maxY = (blockIdx.y + 1) * WIN_SIZE_Y * steps + 3; - const bool atRightBoudary = maxX >= sx; - const bool atBottomBoudary = maxY >= sy; - - // Select specialized version of code according to distance of this - // threadblock's pixels from image boundary. - if(atBottomBoudary) { - // near bottom boundary => check both writing and reading - rdwt97.transform(input, output, sx, sy, steps); - } else if(atRightBoudary) { - // near right boundary only => check writing only - rdwt97.transform(input, output, sx, sy, steps); - } else { - // no nearby boundary => check nothing - rdwt97.transform(input, output, sx, sy, steps); - } - } - - }; // end of class RDWT97 - - - - /// Main GPU 9/7 RDWT entry point. - /// @param in input image (9/7 transformed coefficients) - /// @param out output buffer (for reverse transformed image) - /// @param sizeX width of the output image - /// @param sizeY height of the output image - template - __launch_bounds__(WIN_SX, CTMIN(SHM_SIZE/sizeof(RDWT97), 8)) - __global__ void rdwt97Kernel(const float * const in, float * const out, - const int sx, const int sy, const int steps) { - RDWT97::run(in, out, sx, sy, steps); - } - - - - /// Only computes optimal number of sliding window steps, - /// number of threadblocks and then lanches the 9/7 RDWT kernel. - /// @tparam WIN_SX width of sliding window - /// @tparam WIN_SY height of sliding window - /// @param in input image - /// @param out output buffer - /// @param sx width of the input image - /// @param sy height of the input image - template - void launchRDWT97Kernel (float * in, float * out, int sx, int sy) { - // compute optimal number of steps of each sliding window - const int steps = divRndUp(sy, 15 * WIN_SY); - - // prepare grid size - dim3 gSize(divRndUp(sx, WIN_SX), divRndUp(sy, WIN_SY * steps)); - - // finally launch kernel - PERF_BEGIN - rdwt97Kernel<<>>(in, out, sx, sy, steps); - PERF_END(" RDWT97", sx, sy) - CudaDWTTester::checkLastKernelCall("RDWT 9/7 kernel"); - } - - - - /// Reverse 9/7 2D DWT. See common rules (dwt.h) for more details. - /// @param in Input DWT coefficients. Format described in common rules. - /// Will not be preserved (will be overwritten). - /// @param out output buffer on GPU - will contain original image - /// in normalized range [-0.5, 0.5]. - /// @param sizeX width of input image (in pixels) - /// @param sizeY height of input image (in pixels) - /// @param levels number of recursive DWT levels - void rdwt97(float * in, float * out, int sizeX, int sizeY, int levels) { - if(levels > 1) { - // let this function recursively reverse transform deeper levels first - const int llSizeX = divRndUp(sizeX, 2); - const int llSizeY = divRndUp(sizeY, 2); - rdwt97(in, out, llSizeX, llSizeY, levels - 1); - - // copy reverse transformed LL band from output back into the input - memCopy(in, out, llSizeX, llSizeY); - } - - // select right width of kernel for the size of the image - if(sizeX >= 960) { - launchRDWT97Kernel<192, 8>(in, out, sizeX, sizeY); - } else if (sizeX >= 480) { - launchRDWT97Kernel<128, 6>(in, out, sizeX, sizeY); - } else { - launchRDWT97Kernel<64, 6>(in, out, sizeX, sizeY); - } - } - - - -} // end of namespace dwt_cuda diff --git a/examples/dwt2d/dwt_cuda/transform_buffer.h b/examples/dwt2d/dwt_cuda/transform_buffer.h deleted file mode 100644 index ba98b42..0000000 --- a/examples/dwt2d/dwt_cuda/transform_buffer.h +++ /dev/null @@ -1,338 +0,0 @@ -/// line 248 the index -/// @file transform_buffer.h -/// @brief Buffer with separated even and odd columns and related algorithms. -/// @author Martin Jirman (207962@mail.muni.cz) -/// @date 2011-01-20 18:33 -/// -/// -/// Copyright (c) 2011 Martin Jirman -/// All rights reserved. -/// -/// Redistribution and use in source and binary forms, with or without -/// modification, are permitted provided that the following conditions are met: -/// -/// * Redistributions of source code must retain the above copyright -/// notice, this list of conditions and the following disclaimer. -/// * Redistributions in binary form must reproduce the above copyright -/// notice, this list of conditions and the following disclaimer in the -/// documentation and/or other materials provided with the distribution. -/// -/// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -/// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -/// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -/// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE -/// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -/// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -/// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -/// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -/// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -/// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -/// POSSIBILITY OF SUCH DAMAGE. -/// - -#ifndef TRANSFORM_BUFFER_H -#define TRANSFORM_BUFFER_H - -namespace dwt_cuda { - -/// Buffer (in shared memory of GPU) where block of input image is stored, -/// but odd and even lines are separated. (Generates less bank conflicts when -/// using lifting schema.) All operations expect SIZE_X threads. -/// Also implements basic building blocks of lifting schema. -/// @tparam SIZE_X width of the buffer excluding two boundaries (Also -/// a number of threads participating on all operations.) -/// Must be divisible by 4. -/// @tparam SIZE_Y height of buffer (total number of lines) -/// @tparam BOUNDARY_X number of extra pixels at the left and right side -/// boundary is expected to be smaller than half SIZE_X -/// Must be divisible by 2. -template -class TransformBuffer { -public: - enum { - /// difference between pointers to two vertical neigbors - VERTICAL_STRIDE = BOUNDARY_X + (SIZE_X / 2) - }; - -private: - enum { -/// number of shared memory banks - needed for correct padding -#ifdef __CUDA_ARCH__ - SHM_BANKS = ((__CUDA_ARCH__ >= 200) ? 32 : 16), -#else - SHM_BANKS = 16, // for host code only - can be anything, won't be used -#endif - - /// size of one of two buffers (odd or even) - BUFFER_SIZE = VERTICAL_STRIDE * SIZE_Y, - - /// unused space between two buffers - PADDING = SHM_BANKS - ((BUFFER_SIZE + SHM_BANKS / 2) % SHM_BANKS), - - /// offset of the odd columns buffer from the beginning of data buffer - ODD_OFFSET = BUFFER_SIZE + PADDING, - }; - - /// buffer for both even and odd columns - T data[2 * BUFFER_SIZE + PADDING]; - - /// Applies specified function to all central elements while also passing - /// previous and next elements as parameters. - /// @param count count of central elements to apply function to - /// @param prevOffset offset of first central element - /// @param midOffset offset of first central element's predecessor - /// @param nextOffset offset of first central element's successor - /// @param function the function itself - template - __device__ void horizontalStep(const int count, const int prevOffset, - const int midOffset, const int nextOffset, - const FUNC &function) { - // number of unchecked iterations - const int STEPS = count / SIZE_X; - - // items remaining after last unchecked iteration - const int finalCount = count % SIZE_X; - - // offset of items processed in last (checked) iteration - const int finalOffset = count - finalCount; - - // all threads perform fixed number of iterations ... - for (int i = 0; i < STEPS; i++) { - // for(int i = 0; i < 3; i++) { - const T previous = data[prevOffset + i * SIZE_X + threadIdx.x]; - const T next = data[nextOffset + i * SIZE_X + threadIdx.x]; - T ¢er = data[midOffset + i * SIZE_X + threadIdx.x]; - // function(previous, center, (nextOffset + i*SIZE_X+threadIdx.x)); - function(previous, center, next); // the real one - } - - // ... but not all threads participate on final iteration - if (threadIdx.x < finalCount) { - const T previous = data[prevOffset + finalOffset + threadIdx.x]; - const T next = data[nextOffset + finalOffset + threadIdx.x]; - T ¢er = data[midOffset + finalOffset + threadIdx.x]; - // function(previous, center, (nextOffset+finalOffset+threadIdx.x)); - // kaixi - function(previous, center, next); // the real one - } - } - -public: - __device__ void getPrintData() { - // - for (int i = 0; i < 2 * BUFFER_SIZE + PADDING; i++) { - printf(" index: %d data: %f \n ", i, data[i]); - } - } - - /// Gets offset of the column with given index. Central columns have - /// indices from 0 to NUM_LINES - 1, left boundary columns have negative - /// indices and right boundary columns indices start with NUM_LINES. - /// @param columnIndex index of column to get pointer to - /// @return offset of the first item of column with specified index - __device__ int getColumnOffset(int columnIndex) { - columnIndex += BOUNDARY_X; // skip boundary - return columnIndex / 2 // select right column - + (columnIndex & 1) * ODD_OFFSET; // select odd or even buffer - } - - /// Provides access to data of the transform buffer. - /// @param index index of the item to work with - /// @return reference to item at given index - __device__ T &operator[](const int index) { return data[index]; } - - /// Applies specified function to all horizontally even elements in - /// specified lines. (Including even elements in boundaries except - /// first even element in first left boundary.) SIZE_X threads participate - /// and synchronization is needed before result can be used. - /// @param firstLine index of first line - /// @param numLines count of lines - /// @param func function to be applied on all even elements - /// parameters: previous (odd) element, the even - /// element itself and finally next (odd) element - template - __device__ void forEachHorizontalEven(const int firstLine, const int numLines, - const FUNC &func) { - // number of even elemens to apply function to - const int count = numLines * VERTICAL_STRIDE - 1; - // offset of first even element - const int centerOffset = firstLine * VERTICAL_STRIDE + 1; - // offset of odd predecessor of first even element - const int prevOffset = firstLine * VERTICAL_STRIDE + ODD_OFFSET; - // offset of odd successor of first even element - const int nextOffset = prevOffset + 1; - - // if(threadIdx.x == 0) { - - // printf("forEachHorizontalEven count %d, centerOffset %d prevOffset %d - // nextOffset %d \n", count, centerOffset, prevOffset, nextOffset); - // } - - // call generic horizontal step function - horizontalStep(count, prevOffset, centerOffset, nextOffset, func); - } - - /// Applies given function to all horizontally odd elements in specified - /// lines. (Including odd elements in boundaries except last odd element - /// in last right boundary.) SIZE_X threads participate and synchronization - /// is needed before result can be used. - /// @param firstLine index of first line - /// @param numLines count of lines - /// @param func function to be applied on all odd elements - /// parameters: previous (even) element, the odd - /// element itself and finally next (even) element - template - __device__ void forEachHorizontalOdd(const int firstLine, const int numLines, - const FUNC &func) { - // numbet of odd elements to apply function to - const int count = numLines * VERTICAL_STRIDE - 1; - // offset of even predecessor of first odd element - const int prevOffset = firstLine * VERTICAL_STRIDE; - // offset of first odd element - const int centerOffset = prevOffset + ODD_OFFSET; - // offset of even successor of first odd element - const int nextOffset = prevOffset + 1; - - // if(threadIdx.x == 0) { - // printf("forEachHorizontalOdd count %d, centerOffset %d prevOffset %d - // nextOffset %d \n", count, centerOffset, prevOffset, nextOffset); - // } - - // call generic horizontal step function - horizontalStep(count, prevOffset, centerOffset, nextOffset, func); - } - - /// Applies specified function to all even elements (except element #0) - /// of given column. Each thread takes care of one column, so there's - /// no need for synchronization. - /// @param columnOffset offset of thread's column - /// @param f function to be applied on all even elements - /// parameters: previous (odd) element, the even - /// element itself and finally next (odd) element - template - __device__ void forEachVerticalEven(const int columnOffset, const F &f) { - if (SIZE_Y > 3) { // makes no sense otherwise - const int steps = SIZE_Y / 2 - 1; - for (int i = 0; i < steps; i++) { - const int row = 2 + i * 2; - const T prev = data[columnOffset + (row - 1) * VERTICAL_STRIDE]; - const T next = data[columnOffset + (row + 1) * VERTICAL_STRIDE]; - f(prev, data[columnOffset + row * VERTICAL_STRIDE], next); - - //--------------- FOR TEST ----------------- - /* __syncthreads(); - if ((blockIdx.x * blockDim.x + threadIdx.x) == 0){ - diffOut[2500]++; - diffOut[diffOut[2500]] = 2;//data[columnOffset + - row * VERTICAL_STRIDE]; - } - __syncthreads(); - */ //--------------- FOR TEST ----------------- - } - } - } - - /// Applies specified function to all odd elements of given column. - /// Each thread takes care of one column, so there's no need for - /// synchronization. - /// @param columnOffset offset of thread's column - /// @param f function to be applied on all odd elements - /// parameters: previous (even) element, the odd - /// element itself and finally next (even) element - template - __device__ void forEachVerticalOdd(const int columnOffset, const F &f) { - const int steps = (SIZE_Y - 1) / 2; - for (int i = 0; i < steps; i++) { - const int row = i * 2 + 1; - const T prev = data[columnOffset + (row - 1) * VERTICAL_STRIDE]; - const T next = data[columnOffset + (row + 1) * VERTICAL_STRIDE]; - - f(prev, data[columnOffset + row * VERTICAL_STRIDE], next); - - //--------------- FOR TEST ----------------- - /* __syncthreads(); - if ((blockIdx.x * blockDim.x + threadIdx.x) == 0){ - diffOut[2500]++; - diffOut[diffOut[2500]] = 1; //data[columnOffset + - row * VERTICAL_STRIDE]; - } - - __syncthreads(); - */ //--------------- FOR TEST ----------------- - } - } - - /// Scales elements at specified lines. - /// @param evenScale scaling factor for horizontally even elements - /// @param oddScale scaling factor for horizontally odd elements - /// @param numLines number of lines, whose elements should be scaled - /// @param firstLine index of first line to scale elements in - __device__ void scaleHorizontal(const T evenScale, const T oddScale, - const int firstLine, const int numLines) { - const int offset = firstLine * VERTICAL_STRIDE; - const int count = numLines * VERTICAL_STRIDE; - const int steps = count / SIZE_X; - const int finalCount = count % SIZE_X; - const int finalOffset = count - finalCount; - - // printf("scaleHorizontal sizeX: %d offset %d, count, %d, steps, %d, - // finalCount %d, finalOffset %d \n", SIZE_X, offset, count, steps, - // finalCount, finalOffset); - - // run iterations, whete all threads participate - for (int i = 0; i < steps; i++) { - data[threadIdx.x + i * SIZE_X + offset] *= evenScale; - // if(threadIdx.x + i * SIZE_X + offset == 531) { - // printf("threadidx 531: %d \n", threadIdx.x); - // } - // if(threadIdx.x + i * SIZE_X + offset + ODD_OFFSET == 531) { - // printf("threadidx 531: %d \n", threadIdx.x); - // } - data[threadIdx.x + i * SIZE_X + offset + ODD_OFFSET] *= oddScale; - } - - // some threads also finish remaining unscaled items - if (threadIdx.x < finalCount) { - data[threadIdx.x + finalOffset + offset] *= evenScale; - // if(threadIdx.x + finalOffset + offset == 531) { - // printf("threadidx 531: %d \n", threadIdx.x); - // } - // if(threadIdx.x + finalOffset + offset + ODD_OFFSET == 531) { - // printf("threadidx 531: %d \n", threadIdx.x); - // } - data[threadIdx.x + finalOffset + offset + ODD_OFFSET] *= oddScale; - } - } - - /// Scales elements in specified column. - /// @param evenScale scaling factor for vertically even elements - /// @param oddScale scaling factor for vertically odd elements - /// @param columnOffset offset of the column to work with - /// @param numLines number of lines, whose elements should be scaled - /// @param firstLine index of first line to scale elements in - __device__ void scaleVertical(const T evenScale, const T oddScale, - const int columnOffset, const int numLines, - const int firstLine) { - for (int i = firstLine; i < (numLines + firstLine); i++) { - if (i & 1) { - data[columnOffset + i * VERTICAL_STRIDE] *= oddScale; - } else { - data[columnOffset + i * VERTICAL_STRIDE] *= evenScale; - } - } - } - - //****************For Test(Feb23), test inter parameters************* - __device__ int getVERTICAL_STRIDE() { return VERTICAL_STRIDE; } - __device__ int getSHM_BANKS() { return SHM_BANKS; } - __device__ int getBuffersize() { return BUFFER_SIZE; } - __device__ int getPADDING() { return PADDING; } - __device__ int getODD_OFFSET() { return ODD_OFFSET; } - - //****************For Test(Feb23), test inter parameters************* - -}; // end of class TransformBuffer - -} // namespace dwt_cuda - -#endif // TRANSFORM_BUFFER_H diff --git a/examples/dwt2d/main.cu b/examples/dwt2d/main.cu deleted file mode 100755 index 212d09e..0000000 --- a/examples/dwt2d/main.cu +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright (c) 2009, Jiri Matela - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE - * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE - * POSSIBILITY OF SUCH DAMAGE. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "common.h" -#include "components.h" -#include "dwt.h" - -struct dwt { - char * srcFilename; - char * outFilename; - unsigned char *srcImg; - int pixWidth; - int pixHeight; - int components; - int dwtLvls; -}; - -int getImg(char * srcFilename, unsigned char *srcImg, int inputSize) -{ - // printf("Loading ipnput: %s\n", srcFilename); - char *path = "../../data/dwt2d/"; - char *newSrc = NULL; - - if((newSrc = (char *)malloc(strlen(srcFilename)+strlen(path)+1)) != NULL) - { - newSrc[0] = '\0'; - strcat(newSrc, path); - strcat(newSrc, srcFilename); - srcFilename= newSrc; - } - printf("Loading ipnput: %s\n", srcFilename); - - //srcFilename = strcat("../../data/dwt2d/",srcFilename); - //read image - int i = open(srcFilename, O_RDONLY, 0644); - if (i == -1) { - error(0,errno,"cannot access %s", srcFilename); - return -1; - } - int ret = read(i, srcImg, inputSize); - printf("precteno %d, inputsize %d\n", ret, inputSize); - close(i); - - return 0; -} - - -void usage() { - printf("dwt [otpions] src_img.rgb \n\ - -d, --dimension\t\tdimensions of src img, e.g. 1920x1080\n\ - -c, --components\t\tnumber of color components, default 3\n\ - -b, --depth\t\t\tbit depth, default 8\n\ - -l, --level\t\t\tDWT level, default 3\n\ - -D, --device\t\t\tcuda device\n\ - -f, --forward\t\t\tforward transform\n\ - -r, --reverse\t\t\treverse transform\n\ - -9, --97\t\t\t9/7 transform\n\ - -5, --53\t\t\t5/3 transform\n\ - -w --write-visual\t\twrite output in visual (tiled) fashion instead of the linear\n"); -} - -template -void processDWT(struct dwt *d, int forward, int writeVisual) -{ - int componentSize = d->pixWidth*d->pixHeight*sizeof(T); - - T *c_r_out, *backup ; - cudaMalloc((void**)&c_r_out, componentSize); //< aligned component size - cudaCheckError("Alloc device memory"); - cudaMemset(c_r_out, 0, componentSize); - cudaCheckError("Memset device memory"); - - cudaMalloc((void**)&backup, componentSize); //< aligned component size - cudaCheckError("Alloc device memory"); - cudaMemset(backup, 0, componentSize); - cudaCheckError("Memset device memory"); - - if (d->components == 3) { - /* Alloc two more buffers for G and B */ - T *c_g_out, *c_b_out; - cudaMalloc((void**)&c_g_out, componentSize); //< aligned component size - cudaCheckError("Alloc device memory"); - cudaMemset(c_g_out, 0, componentSize); - cudaCheckError("Memset device memory"); - - cudaMalloc((void**)&c_b_out, componentSize); //< aligned component size - cudaCheckError("Alloc device memory"); - cudaMemset(c_b_out, 0, componentSize); - cudaCheckError("Memset device memory"); - - /* Load components */ - T *c_r, *c_g, *c_b; - cudaMalloc((void**)&c_r, componentSize); //< R, aligned component size - cudaCheckError("Alloc device memory"); - cudaMemset(c_r, 0, componentSize); - cudaCheckError("Memset device memory"); - - cudaMalloc((void**)&c_g, componentSize); //< G, aligned component size - cudaCheckError("Alloc device memory"); - cudaMemset(c_g, 0, componentSize); - cudaCheckError("Memset device memory"); - - cudaMalloc((void**)&c_b, componentSize); //< B, aligned component size - cudaCheckError("Alloc device memory"); - cudaMemset(c_b, 0, componentSize); - cudaCheckError("Memset device memory"); - - rgbToComponents(c_r, c_g, c_b, d->srcImg, d->pixWidth, d->pixHeight); - - - /* Compute DWT and always store into file */ - nStage2dDWT(c_r, c_r_out, backup, d->pixWidth, d->pixHeight, d->dwtLvls, forward); - nStage2dDWT(c_g, c_g_out, backup, d->pixWidth, d->pixHeight, d->dwtLvls, forward); - nStage2dDWT(c_b, c_b_out, backup, d->pixWidth, d->pixHeight, d->dwtLvls, forward); - - // -------test---------- - // T *h_r_out=(T*)malloc(componentSize); - // cudaMemcpy(h_r_out, c_g_out, componentSize, cudaMemcpyDeviceToHost); - // int ii; - // for(ii=0;iipixWidth) == 0) fprintf(stderr, "\n"); - // } - // -------test---------- - - - /* Store DWT to file */ - writeLinear(c_r_out, d->pixWidth, d->pixHeight, d->outFilename, ".r"); - // writeLinear(c_g_out, d->pixWidth, d->pixHeight, d->outFilename, ".g"); - // writeLinear(c_b_out, d->pixWidth, d->pixHeight, d->outFilename, ".b"); -#ifdef OUTPUT - if (writeVisual) { - writeNStage2DDWT(c_r_out, d->pixWidth, d->pixHeight, d->dwtLvls, d->outFilename, ".r"); - writeNStage2DDWT(c_g_out, d->pixWidth, d->pixHeight, d->dwtLvls, d->outFilename, ".g"); - writeNStage2DDWT(c_b_out, d->pixWidth, d->pixHeight, d->dwtLvls, d->outFilename, ".b"); - } else { - writeLinear(c_r_out, d->pixWidth, d->pixHeight, d->outFilename, ".r"); - writeLinear(c_g_out, d->pixWidth, d->pixHeight, d->outFilename, ".g"); - writeLinear(c_b_out, d->pixWidth, d->pixHeight, d->outFilename, ".b"); - } -#endif - - - cudaFree(c_r); - cudaCheckError("Cuda free"); - cudaFree(c_g); - cudaCheckError("Cuda free"); - cudaFree(c_b); - cudaCheckError("Cuda free"); - cudaFree(c_g_out); - cudaCheckError("Cuda free"); - cudaFree(c_b_out); - cudaCheckError("Cuda free"); - - } - else if (d->components == 1) { - //Load component - T *c_r; - cudaMalloc((void**)&(c_r), componentSize); //< R, aligned component size - cudaCheckError("Alloc device memory"); - cudaMemset(c_r, 0, componentSize); - cudaCheckError("Memset device memory"); - - bwToComponent(c_r, d->srcImg, d->pixWidth, d->pixHeight); - - // Compute DWT - nStage2dDWT(c_r, c_r_out, backup, d->pixWidth, d->pixHeight, d->dwtLvls, forward); - - // Store DWT to file -// #ifdef OUTPUT - if (writeVisual) { - writeNStage2DDWT(c_r_out, d->pixWidth, d->pixHeight, d->dwtLvls, d->outFilename, ".out"); - } else { - writeLinear(c_r_out, d->pixWidth, d->pixHeight, d->outFilename, ".lin.out"); - } -// #endif - cudaFree(c_r); - cudaCheckError("Cuda free"); - } - - cudaFree(c_r_out); - cudaCheckError("Cuda free device"); - cudaFree(backup); - cudaCheckError("Cuda free device"); -} - -int main(int argc, char **argv) -{ - int optindex = 0; - char ch; - struct option longopts[] = { - {"dimension", required_argument, 0, 'd'}, //dimensions of src img - {"components", required_argument, 0, 'c'}, //numger of components of src img - {"depth", required_argument, 0, 'b'}, //bit depth of src img - {"level", required_argument, 0, 'l'}, //level of dwt - {"device", required_argument, 0, 'D'}, //cuda device - {"forward", no_argument, 0, 'f'}, //forward transform - {"reverse", no_argument, 0, 'r'}, //reverse transform - {"97", no_argument, 0, '9'}, //9/7 transform - {"53", no_argument, 0, '5' }, //5/3transform - {"write-visual",no_argument, 0, 'w' }, //write output (subbands) in visual (tiled) order instead of linear - {"help", no_argument, 0, 'h'} - }; - - int pixWidth = 0; //= strlen(optarg))) { - usage(); - return -1; - } - pixHeight = atoi(pos+1); - break; - case 'c': - compCount = atoi(optarg); - break; - case 'b': - bitDepth = atoi(optarg); - break; - case 'l': - dwtLvls = atoi(optarg); - break; - case 'D': - device = atoi(optarg); - break; - case 'f': - forward = 1; - break; - case 'r': - forward = 0; - break; - case '9': - dwt97 = 1; - break; - case '5': - dwt97 = 0; - break; - case 'w': - writeVisual = 1; - break; - case 'h': - usage(); - return 0; - case '?': - return -1; - default : - usage(); - return -1; - } - } - argc -= optind; - argv += optind; - - if (argc == 0) { // at least one filename is expected - printf("Please supply src file name\n"); - usage(); - return -1; - } - - if (pixWidth <= 0 || pixHeight <=0) { - printf("Wrong or missing dimensions\n"); - usage(); - return -1; - } - - if (forward == 0) { - writeVisual = 0; //do not write visual when RDWT - } - - // device init - int devCount; - cudaSetDevice(0); - cudaGetDeviceCount(&devCount); - cudaCheckError("Get device count"); - if (devCount == 0) { - printf("No CUDA enabled device\n"); - return -1; - } - if (device < 0 || device > devCount -1) { - printf("Selected device %d is out of bound. Devices on your system are in range %d - %d\n", - device, 0, devCount -1); - return -1; - } - cudaDeviceProp devProp; - cudaGetDeviceProperties(&devProp, device); - cudaCheckError("Get device properties"); - // if (devProp.major < 1) { - // printf("Device %d does not support CUDA\n", device); - // return -1; - // } - printf("Using device %d: %s\n", device, devProp.name); - cudaSetDevice(device); - cudaCheckError("Set selected device"); - - struct dwt *d; - d = (struct dwt *)malloc(sizeof(struct dwt)); - d->srcImg = NULL; - d->pixWidth = pixWidth; - d->pixHeight = pixHeight; - d->components = compCount; - d->dwtLvls = dwtLvls; - - // file names - d->srcFilename = (char *)malloc(strlen(argv[0])); - strcpy(d->srcFilename, argv[0]); - if (argc == 1) { // only one filename supplyed - d->outFilename = (char *)malloc(strlen(d->srcFilename)+4); - strcpy(d->outFilename, d->srcFilename); - strcpy(d->outFilename+strlen(d->srcFilename), ".dwt"); - } else { - d->outFilename = strdup(argv[1]); - } - - //Input review - printf("Source file:\t\t%s\n", d->srcFilename); - printf(" Dimensions:\t\t%dx%d\n", pixWidth, pixHeight); - printf(" Components count:\t%d\n", compCount); - printf(" Bit depth:\t\t%d\n", bitDepth); - printf(" DWT levels:\t\t%d\n", dwtLvls); - printf(" Forward transform:\t%d\n", forward); - printf(" 9/7 transform:\t\t%d\n", dwt97); - - //data sizes - int inputSize = pixWidth*pixHeight*compCount; //srcImg, inputSize); - cudaCheckError("Alloc host memory"); - if (getImg(d->srcFilename, d->srcImg, inputSize) == -1) - return -1; - - /* DWT */ - if (forward == 1) { - if(dwt97 == 1 ) - processDWT(d, forward, writeVisual); - else // 5/3 - processDWT(d, forward, writeVisual); - } - else { // reverse - if(dwt97 == 1 ) - processDWT(d, forward, writeVisual); - else // 5/3 - processDWT(d, forward, writeVisual); - } - - //writeComponent(r_cuda, pixWidth, pixHeight, srcFilename, ".g"); - //writeComponent(g_wave_cuda, 512000, ".g"); - //writeComponent(g_cuda, componentSize, ".g"); - //writeComponent(b_wave_cuda, componentSize, ".b"); - cudaFreeHost(d->srcImg); - cudaCheckError("Cuda free host"); - - return 0; -} diff --git a/examples/dwt2d/run.sh b/examples/dwt2d/run.sh deleted file mode 100755 index ce51838..0000000 --- a/examples/dwt2d/run.sh +++ /dev/null @@ -1,8 +0,0 @@ -./dwt2d 4.bmp z.dwt -d 4x4 -f -5 -l 3 -# ./dwt2d 8.bmp -d 8x8 -f -5 -l 3 -# ./dwt2d 16.bmp -d 16x16 -f -5 -l 3 -# ./dwt2d 64.bmp -d 64x64 -f -5 -l 3 - -# ./dwt2d 192.bmp -d 192x192 -f -5 -l 3 -# ls -# ./dwt2d rgb.bmp -d 1024x1024 -f -5 -l 3 diff --git a/examples/dwt2d/run_cpu.sh b/examples/dwt2d/run_cpu.sh deleted file mode 100755 index 028379c..0000000 --- a/examples/dwt2d/run_cpu.sh +++ /dev/null @@ -1,7 +0,0 @@ -# ./dwt2d 192.bmp -d 192x192 -f -5 -l 3 -# ls -# ./dwt2d rgb.bmp -d 1024x1024 -f -5 -l 3 -# ./dwt2d 16.bmp -d 16x16 -f -9 -l 3\ -./dwt2d 4.bmp -d 4x4 -r -5 -l 3 -# ./dwt2d 4.bmp -d 4x4 -r -9 -l 3 -# ./dwt2d 8.bmp -d 8x8 -f -9 -l 3 diff --git a/examples/dwt2d/run_nvcc.sh b/examples/dwt2d/run_nvcc.sh deleted file mode 100644 index f197cc3..0000000 --- a/examples/dwt2d/run_nvcc.sh +++ /dev/null @@ -1,14 +0,0 @@ -# ./nvcc_dwt2d 192.bmp -d 192x192 -f -5 -l 3 -# ls -# ./nvcc_dwt2d rgb.bmp -d 1024x1024 -f -5 -l 3 -# ./nvcc_dwt2d 4.bmp -d 4x4 -f -9 -l 3 -./nvcc_dwt2d 4.bmp -d 4x4 -f -5 -l 3 -# ./nvcc_dwt2d 8.bmp -d 8x8 -f -9 -l 3 -# ./nvcc_dwt2d 16.bmp -d 16x16 -f -5 -l 3 -# ./nvcc_dwt2d 16.bmp -d 16x16 -r -5 -l 3 -# ./nvcc_dwt2d 16.bmp -d 16x16 -f -9 -l 3 -# ./nvcc_dwt2d 4.bmp -d 4x4 -r -9 -l 3 -# ./nvcc_dwt2d 64.bmp -d 64x64 -f -5 -l 3 -# ./nvcc_dwt2d 192.bmp -d 192x192 -f -5 -l 3 -# ls -# ./nvcc_dwt2d rgb.bmp -d 1024x1024 -f -5 -l 3 diff --git a/examples/dwt2d/test_compile_cpu.sh b/examples/dwt2d/test_compile_cpu.sh deleted file mode 100644 index c84f63c..0000000 --- a/examples/dwt2d/test_compile_cpu.sh +++ /dev/null @@ -1,51 +0,0 @@ - - -#!/bin/bash - -clang++ -I. -I/include -fno-strict-aliasing dwt_cuda/fdwt53.cu dwt_cuda/fdwt97.cu dwt_cuda/common.cu dwt_cuda/rdwt97.cu dwt_cuda/rdwt53.cu components.cu dwt.cu main.cu -c --cuda-path=/usr/local/cuda-10.1 --cuda-gpu-arch=sm_50 -I. -I/include -L/usr/local/cuda-10.1/lib64 -lcudart_static -ldl -lrt -pthread -save-temps -v - -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH - -../../build/compilation/kernelTranslator common-cuda-nvptx64-nvidia-cuda-sm_50.bc common.bc -../../build/compilation/kernelTranslator components-cuda-nvptx64-nvidia-cuda-sm_50.bc components.bc -../../build/compilation/kernelTranslator fdwt53-cuda-nvptx64-nvidia-cuda-sm_50.bc fdwt53.bc - -../../build/compilation/kernelTranslator dwt-cuda-nvptx64-nvidia-cuda-sm_50.bc dwt.bc - -../../build/compilation/hostTranslator main-host-x86_64-unknown-linux-gnu.bc host.bc -../../build/compilation/hostTranslator common-host-x86_64-unknown-linux-gnu.bc common_host.bc -../../build/compilation/hostTranslator components-host-x86_64-unknown-linux-gnu.bc components_host.bc -../../build/compilation/hostTranslator dwt-host-x86_64-unknown-linux-gnu.bc dwt_host.bc -../../build/compilation/hostTranslator fdwt53-host-x86_64-unknown-linux-gnu.bc fdwt53_host.bc - -../../build/compilation/hostTranslator fdwt97-host-x86_64-unknown-linux-gnu.bc fdwt97_host.bc -../../build/compilation/hostTranslator rdwt53-host-x86_64-unknown-linux-gnu.bc rdwt53_host.bc -../../build/compilation/hostTranslator rdwt97-host-x86_64-unknown-linux-gnu.bc rdwt97_host.bc -../../build/compilation/kernelTranslator fdwt97-cuda-nvptx64-nvidia-cuda-sm_50.bc fdwt97.bc -../../build/compilation/kernelTranslator rdwt97-cuda-nvptx64-nvidia-cuda-sm_50.bc rdwt97.bc -../../build/compilation/kernelTranslator rdwt53-cuda-nvptx64-nvidia-cuda-sm_50.bc rdwt53.bc - -llc --relocation-model=pic --filetype=obj common.bc -llc --relocation-model=pic --filetype=obj components.bc -llc --relocation-model=pic --filetype=obj fdwt53.bc - -llc --relocation-model=pic --filetype=obj dwt.bc - - -llc --relocation-model=pic --filetype=obj host.bc - -llc --relocation-model=pic --filetype=obj common_host.bc -llc --relocation-model=pic --filetype=obj components_host.bc -llc --relocation-model=pic --filetype=obj fdwt53_host.bc - -llc --relocation-model=pic --filetype=obj dwt_host.bc - - -llc --relocation-model=pic --filetype=obj fdwt97_host.bc -llc --relocation-model=pic --filetype=obj rdwt97_host.bc -llc --relocation-model=pic --filetype=obj rdwt53_host.bc -llc --relocation-model=pic --filetype=obj fdwt97.bc -llc --relocation-model=pic --filetype=obj rdwt97.bc -llc --relocation-model=pic --filetype=obj rdwt53.bc - -g++ -g -Wall -L../../build/runtime -L../../build/runtime/threadPool -o dwt2d -fPIC -no-pie common.o components.o dwt.o fdwt53.o fdwt97.o rdwt97.o rdwt53.o host.o common_host.o components_host.o dwt_host.o fdwt53_host.o fdwt97_host.o rdwt97_host.o rdwt53_host.o -lc -lx86Runtime -lthreadPool -lpthread diff --git a/examples/dwt2d/test_compile_nvcc.sh b/examples/dwt2d/test_compile_nvcc.sh deleted file mode 100755 index 3810261..0000000 --- a/examples/dwt2d/test_compile_nvcc.sh +++ /dev/null @@ -1,9 +0,0 @@ -/usr/local/cuda/bin/nvcc -arch sm_50 -I. -I/include -O2 --compiler-options -fno-strict-aliasing -c main.cu -o main.cu.o -/usr/local/cuda/bin/nvcc -arch sm_50 -I. -I/include -O2 --compiler-options -fno-strict-aliasing -c dwt.cu -o dwt.cu.o -/usr/local/cuda/bin/nvcc -arch sm_50 -I. -I/include -O2 --compiler-options -fno-strict-aliasing -c components.cu -o components.cu.o -/usr/local/cuda/bin/nvcc -arch sm_50 -I. -I/include -O2 --compiler-options -fno-strict-aliasing -c dwt_cuda/fdwt53.cu -o dwt_cuda/fdwt53.cu.o -/usr/local/cuda/bin/nvcc -arch sm_50 -I. -I/include -O2 --compiler-options -fno-strict-aliasing -c dwt_cuda/fdwt97.cu -o dwt_cuda/fdwt97.cu.o -/usr/local/cuda/bin/nvcc -arch sm_50 -I. -I/include -O2 --compiler-options -fno-strict-aliasing -c dwt_cuda/common.cu -o dwt_cuda/common.cu.o -/usr/local/cuda/bin/nvcc -arch sm_50 -I. -I/include -O2 --compiler-options -fno-strict-aliasing -c dwt_cuda/rdwt97.cu -o dwt_cuda/rdwt97.cu.o -/usr/local/cuda/bin/nvcc -arch sm_50 -I. -I/include -O2 --compiler-options -fno-strict-aliasing -c dwt_cuda/rdwt53.cu -o dwt_cuda/rdwt53.cu.o -g++ -fPIC -o nvcc_dwt2d main.cu.o dwt.cu.o components.cu.o dwt_cuda/fdwt53.cu.o dwt_cuda/fdwt97.cu.o dwt_cuda/common.cu.o dwt_cuda/rdwt97.cu.o dwt_cuda/rdwt53.cu.o -L/usr/local/cuda/lib64 -lcudart diff --git a/examples/gauss/gaussian-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/gauss/gaussian-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index e0f12f5..0000000 --- a/examples/gauss/gaussian-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,396 +0,0 @@ -; ModuleID = 'gaussian-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "gaussian.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_blockDim_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv = comdat any - -$_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_yEv = comdat any - -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@blockDim = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockDim_t, align 1 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z4Fan1PfS_ii(float* %m_cuda, float* %a_cuda, i32 %Size, i32 %t) #0 { -entry: - %m_cuda.addr = alloca float*, align 8 - %a_cuda.addr = alloca float*, align 8 - %Size.addr = alloca i32, align 4 - %t.addr = alloca i32, align 4 - store float* %m_cuda, float** %m_cuda.addr, align 8 - store float* %a_cuda, float** %a_cuda.addr, align 8 - store i32 %Size, i32* %Size.addr, align 4 - store i32 %t, i32* %t.addr, align 4 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #3 - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #3 - %call2 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #3 - %mul = mul i32 %call1, %call2 - %add = add i32 %call, %mul - %0 = load i32, i32* %Size.addr, align 4 - %sub = sub nsw i32 %0, 1 - %1 = load i32, i32* %t.addr, align 4 - %sub3 = sub nsw i32 %sub, %1 - %cmp = icmp uge i32 %add, %sub3 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - br label %return - -if.end: ; preds = %entry - %2 = load float*, float** %a_cuda.addr, align 8 - %3 = load i32, i32* %Size.addr, align 4 - %call4 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #3 - %call5 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #3 - %mul6 = mul i32 %call4, %call5 - %call7 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #3 - %add8 = add i32 %mul6, %call7 - %4 = load i32, i32* %t.addr, align 4 - %add9 = add i32 %add8, %4 - %add10 = add i32 %add9, 1 - %mul11 = mul i32 %3, %add10 - %idx.ext = zext i32 %mul11 to i64 - %add.ptr = getelementptr inbounds float, float* %2, i64 %idx.ext - %5 = load i32, i32* %t.addr, align 4 - %idx.ext12 = sext i32 %5 to i64 - %add.ptr13 = getelementptr inbounds float, float* %add.ptr, i64 %idx.ext12 - %6 = load float, float* %add.ptr13, align 4 - %7 = load float*, float** %a_cuda.addr, align 8 - %8 = load i32, i32* %Size.addr, align 4 - %9 = load i32, i32* %t.addr, align 4 - %mul14 = mul nsw i32 %8, %9 - %idx.ext15 = sext i32 %mul14 to i64 - %add.ptr16 = getelementptr inbounds float, float* %7, i64 %idx.ext15 - %10 = load i32, i32* %t.addr, align 4 - %idx.ext17 = sext i32 %10 to i64 - %add.ptr18 = getelementptr inbounds float, float* %add.ptr16, i64 %idx.ext17 - %11 = load float, float* %add.ptr18, align 4 - %div = fdiv float %6, %11 - %12 = load float*, float** %m_cuda.addr, align 8 - %13 = load i32, i32* %Size.addr, align 4 - %call19 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #3 - %call20 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #3 - %mul21 = mul i32 %call19, %call20 - %call22 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #3 - %add23 = add i32 %mul21, %call22 - %14 = load i32, i32* %t.addr, align 4 - %add24 = add i32 %add23, %14 - %add25 = add i32 %add24, 1 - %mul26 = mul i32 %13, %add25 - %idx.ext27 = zext i32 %mul26 to i64 - %add.ptr28 = getelementptr inbounds float, float* %12, i64 %idx.ext27 - %15 = load i32, i32* %t.addr, align 4 - %idx.ext29 = sext i32 %15 to i64 - %add.ptr30 = getelementptr inbounds float, float* %add.ptr28, i64 %idx.ext29 - store float %div, float* %add.ptr30, align 4 - br label %return - -return: ; preds = %if.end, %if.then - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - ret i32 %0 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z4Fan2PfS_S_iii(float* %m_cuda, float* %a_cuda, float* %b_cuda, i32 %Size, i32 %j1, i32 %t) #0 { -entry: - %m_cuda.addr = alloca float*, align 8 - %a_cuda.addr = alloca float*, align 8 - %b_cuda.addr = alloca float*, align 8 - %Size.addr = alloca i32, align 4 - %j1.addr = alloca i32, align 4 - %t.addr = alloca i32, align 4 - %xidx = alloca i32, align 4 - %yidx = alloca i32, align 4 - store float* %m_cuda, float** %m_cuda.addr, align 8 - store float* %a_cuda, float** %a_cuda.addr, align 8 - store float* %b_cuda, float** %b_cuda.addr, align 8 - store i32 %Size, i32* %Size.addr, align 4 - store i32 %j1, i32* %j1.addr, align 4 - store i32 %t, i32* %t.addr, align 4 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #3 - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #3 - %call2 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #3 - %mul = mul i32 %call1, %call2 - %add = add i32 %call, %mul - %0 = load i32, i32* %Size.addr, align 4 - %sub = sub nsw i32 %0, 1 - %1 = load i32, i32* %t.addr, align 4 - %sub3 = sub nsw i32 %sub, %1 - %cmp = icmp uge i32 %add, %sub3 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - br label %if.end58 - -if.end: ; preds = %entry - %call4 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #3 - %call5 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #3 - %call6 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_yEv() #3 - %mul7 = mul i32 %call5, %call6 - %add8 = add i32 %call4, %mul7 - %2 = load i32, i32* %Size.addr, align 4 - %3 = load i32, i32* %t.addr, align 4 - %sub9 = sub nsw i32 %2, %3 - %cmp10 = icmp uge i32 %add8, %sub9 - br i1 %cmp10, label %if.then11, label %if.end12 - -if.then11: ; preds = %if.end - br label %if.end58 - -if.end12: ; preds = %if.end - %call13 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #3 - %call14 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #3 - %mul15 = mul i32 %call13, %call14 - %call16 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #3 - %add17 = add i32 %mul15, %call16 - store i32 %add17, i32* %xidx, align 4 - %call18 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #3 - %call19 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_yEv() #3 - %mul20 = mul i32 %call18, %call19 - %call21 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #3 - %add22 = add i32 %mul20, %call21 - store i32 %add22, i32* %yidx, align 4 - %4 = load float*, float** %m_cuda.addr, align 8 - %5 = load i32, i32* %Size.addr, align 4 - %6 = load i32, i32* %xidx, align 4 - %add23 = add nsw i32 %6, 1 - %7 = load i32, i32* %t.addr, align 4 - %add24 = add nsw i32 %add23, %7 - %mul25 = mul nsw i32 %5, %add24 - %8 = load i32, i32* %t.addr, align 4 - %add26 = add nsw i32 %mul25, %8 - %idxprom = sext i32 %add26 to i64 - %arrayidx = getelementptr inbounds float, float* %4, i64 %idxprom - %9 = load float, float* %arrayidx, align 4 - %10 = load float*, float** %a_cuda.addr, align 8 - %11 = load i32, i32* %Size.addr, align 4 - %12 = load i32, i32* %t.addr, align 4 - %mul27 = mul nsw i32 %11, %12 - %13 = load i32, i32* %yidx, align 4 - %14 = load i32, i32* %t.addr, align 4 - %add28 = add nsw i32 %13, %14 - %add29 = add nsw i32 %mul27, %add28 - %idxprom30 = sext i32 %add29 to i64 - %arrayidx31 = getelementptr inbounds float, float* %10, i64 %idxprom30 - %15 = load float, float* %arrayidx31, align 4 - %mul32 = fmul contract float %9, %15 - %16 = load float*, float** %a_cuda.addr, align 8 - %17 = load i32, i32* %Size.addr, align 4 - %18 = load i32, i32* %xidx, align 4 - %add33 = add nsw i32 %18, 1 - %19 = load i32, i32* %t.addr, align 4 - %add34 = add nsw i32 %add33, %19 - %mul35 = mul nsw i32 %17, %add34 - %20 = load i32, i32* %yidx, align 4 - %21 = load i32, i32* %t.addr, align 4 - %add36 = add nsw i32 %20, %21 - %add37 = add nsw i32 %mul35, %add36 - %idxprom38 = sext i32 %add37 to i64 - %arrayidx39 = getelementptr inbounds float, float* %16, i64 %idxprom38 - %22 = load float, float* %arrayidx39, align 4 - %sub40 = fsub contract float %22, %mul32 - store float %sub40, float* %arrayidx39, align 4 - %23 = load i32, i32* %yidx, align 4 - %cmp41 = icmp eq i32 %23, 0 - br i1 %cmp41, label %if.then42, label %if.end58 - -if.then42: ; preds = %if.end12 - %24 = load float*, float** %m_cuda.addr, align 8 - %25 = load i32, i32* %Size.addr, align 4 - %26 = load i32, i32* %xidx, align 4 - %add43 = add nsw i32 %26, 1 - %27 = load i32, i32* %t.addr, align 4 - %add44 = add nsw i32 %add43, %27 - %mul45 = mul nsw i32 %25, %add44 - %28 = load i32, i32* %yidx, align 4 - %29 = load i32, i32* %t.addr, align 4 - %add46 = add nsw i32 %28, %29 - %add47 = add nsw i32 %mul45, %add46 - %idxprom48 = sext i32 %add47 to i64 - %arrayidx49 = getelementptr inbounds float, float* %24, i64 %idxprom48 - %30 = load float, float* %arrayidx49, align 4 - %31 = load float*, float** %b_cuda.addr, align 8 - %32 = load i32, i32* %t.addr, align 4 - %idxprom50 = sext i32 %32 to i64 - %arrayidx51 = getelementptr inbounds float, float* %31, i64 %idxprom50 - %33 = load float, float* %arrayidx51, align 4 - %mul52 = fmul contract float %30, %33 - %34 = load float*, float** %b_cuda.addr, align 8 - %35 = load i32, i32* %xidx, align 4 - %add53 = add nsw i32 %35, 1 - %36 = load i32, i32* %t.addr, align 4 - %add54 = add nsw i32 %add53, %36 - %idxprom55 = sext i32 %add54 to i64 - %arrayidx56 = getelementptr inbounds float, float* %34, i64 %idxprom55 - %37 = load float, float* %arrayidx56, align 4 - %sub57 = fsub contract float %37, %mul52 - store float %sub57, float* %arrayidx56, align 4 - br label %if.end58 - -if.end58: ; preds = %if.then, %if.then11, %if.then42, %if.end12 - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() - ret i32 %0 -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y() #2 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } -attributes #3 = { convergent nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !6, !5, !7, !7, !7, !7, !8, !8, !7} -!llvm.ident = !{!9} -!nvvmir.version = !{!10} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (float*, float*, i32, i32)* @_Z4Fan1PfS_ii, !"kernel", i32 1} -!4 = !{void (float*, float*, float*, i32, i32, i32)* @_Z4Fan2PfS_S_iii, !"kernel", i32 1} -!5 = !{null, !"align", i32 8} -!6 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!7 = !{null, !"align", i32 16} -!8 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!9 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!10 = !{i32 1, i32 4} diff --git a/examples/gauss/gaussian-host-x86_64-unknown-linux-gnu.ll b/examples/gauss/gaussian-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index a3dae03..0000000 --- a/examples/gauss/gaussian-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,1551 +0,0 @@ -; ModuleID = 'gaussian-host-x86_64-unknown-linux-gnu.bc' -source_filename = "gaussian.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.timeval = type { i64, i64 } -%struct.timezone = type { i32, i32 } -%struct.cudaDeviceProp = type { [256 x i8], %struct.CUuuid_st, [8 x i8], i32, i64, i64, i32, i32, i64, i32, [3 x i32], [3 x i32], i32, i64, i32, i32, i64, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], [2 x i32], [3 x i32], [2 x i32], [3 x i32], [3 x i32], i32, [2 x i32], [3 x i32], [2 x i32], i32, [2 x i32], [3 x i32], [2 x i32], [3 x i32], i32, [2 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, i32 } -%struct.CUuuid_st = type { [16 x i8] } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque - -$_ZSt3expf = comdat any - -$_ZN4dim3C2Ejjj = comdat any - -@Size = dso_local global i32 0, align 4 -@a = dso_local global float* null, align 8 -@b = dso_local global float* null, align 8 -@finalVec = dso_local global float* null, align 8 -@m = dso_local global float* null, align 8 -@fp = dso_local global %struct._IO_FILE* null, align 8 -@totalKernelTime = dso_local global i32 0, align 4 -@.str = private unnamed_addr constant [56 x i8] c"WG size of kernel 1 = %d, WG size of kernel 2= %d X %d\0A\00", align 1 -@.str.1 = private unnamed_addr constant [45 x i8] c"Usage: gaussian -f filename / -s size [-q]\0A\0A\00", align 1 -@.str.2 = private unnamed_addr constant [62 x i8] c"-q (quiet) suppresses printing the matrix and result values.\0A\00", align 1 -@.str.3 = private unnamed_addr constant [34 x i8] c"-f (filename) path of input file\0A\00", align 1 -@.str.4 = private unnamed_addr constant [66 x i8] c"-s (size) size of matrix. Create matrix and rhs in this program \0A\00", align 1 -@.str.5 = private unnamed_addr constant [68 x i8] c"The first line of the file contains the dimension of the matrix, n.\00", align 1 -@.str.6 = private unnamed_addr constant [43 x i8] c"The second line of the file is a newline.\0A\00", align 1 -@.str.7 = private unnamed_addr constant [64 x i8] c"The next n lines contain n tab separated values for the matrix.\00", align 1 -@.str.8 = private unnamed_addr constant [41 x i8] c"The next line of the file is a newline.\0A\00", align 1 -@.str.9 = private unnamed_addr constant [70 x i8] c"The next line of the file is a 1xn vector with tab separated values.\0A\00", align 1 -@.str.10 = private unnamed_addr constant [52 x i8] c"The next line of the file is a newline. (optional)\0A\00", align 1 -@.str.11 = private unnamed_addr constant [69 x i8] c"The final line of the file is the pre-computed solution. (optional)\0A\00", align 1 -@.str.12 = private unnamed_addr constant [23 x i8] c"Example: matrix4.txt:\0A\00", align 1 -@.str.13 = private unnamed_addr constant [3 x i8] c"4\0A\00", align 1 -@.str.14 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 -@.str.15 = private unnamed_addr constant [19 x i8] c"-0.6\09-0.5\090.7\090.3\0A\00", align 1 -@.str.16 = private unnamed_addr constant [19 x i8] c"-0.3\09-0.9\090.3\090.7\0A\00", align 1 -@.str.17 = private unnamed_addr constant [21 x i8] c"-0.4\09-0.5\09-0.3\09-0.8\0A\00", align 1 -@.str.18 = private unnamed_addr constant [18 x i8] c"0.0\09-0.1\090.2\090.9\0A\00", align 1 -@.str.19 = private unnamed_addr constant [24 x i8] c"-0.85\09-0.68\090.24\09-0.53\0A\00", align 1 -@.str.20 = private unnamed_addr constant [19 x i8] c"0.7\090.0\09-0.4\09-0.5\0A\00", align 1 -@.str.21 = private unnamed_addr constant [47 x i8] c"Create matrix internally in parse, size = %d \0A\00", align 1 -@.str.22 = private unnamed_addr constant [20 x i8] c"Read file from %s \0A\00", align 1 -@.str.23 = private unnamed_addr constant [15 x i8] c"Matrix m is: \0A\00", align 1 -@.str.24 = private unnamed_addr constant [15 x i8] c"Matrix a is: \0A\00", align 1 -@.str.25 = private unnamed_addr constant [14 x i8] c"Array b is: \0A\00", align 1 -@.str.26 = private unnamed_addr constant [25 x i8] c"The final solution is: \0A\00", align 1 -@.str.27 = private unnamed_addr constant [49 x i8] c"\0ATime total (including memory transfers)\09%f sec\0A\00", align 1 -@.str.28 = private unnamed_addr constant [31 x i8] c"Time for CUDA kernels:\09%f sec\0A\00", align 1 -@.str.29 = private unnamed_addr constant [23 x i8] c"Total Device found: %d\00", align 1 -@.str.30 = private unnamed_addr constant [22 x i8] c"\0ADevice Name \09\09 - %s \00", align 1 -@.str.31 = private unnamed_addr constant [40 x i8] c"\0A**************************************\00", align 1 -@.str.32 = private unnamed_addr constant [33 x i8] c"\0ATotal Global Memory\09\09\09 - %lu KB\00", align 1 -@.str.33 = private unnamed_addr constant [46 x i8] c"\0AShared memory available per block \09 - %lu KB\00", align 1 -@.str.34 = private unnamed_addr constant [45 x i8] c"\0ANumber of registers per thread block \09 - %d\00", align 1 -@.str.35 = private unnamed_addr constant [31 x i8] c"\0AWarp size in threads \09\09\09 - %d\00", align 1 -@.str.36 = private unnamed_addr constant [31 x i8] c"\0AMemory Pitch \09\09\09\09 - %zu bytes\00", align 1 -@.str.37 = private unnamed_addr constant [35 x i8] c"\0AMaximum threads per block \09\09 - %d\00", align 1 -@.str.38 = private unnamed_addr constant [47 x i8] c"\0AMaximum Thread Dimension (block) \09 - %d %d %d\00", align 1 -@.str.39 = private unnamed_addr constant [46 x i8] c"\0AMaximum Thread Dimension (grid) \09 - %d %d %d\00", align 1 -@.str.40 = private unnamed_addr constant [39 x i8] c"\0ATotal constant memory \09\09\09 - %zu bytes\00", align 1 -@.str.41 = private unnamed_addr constant [23 x i8] c"\0ACUDA ver \09\09\09\09 - %d.%d\00", align 1 -@.str.42 = private unnamed_addr constant [26 x i8] c"\0AClock rate \09\09\09\09 - %d KHz\00", align 1 -@.str.43 = private unnamed_addr constant [35 x i8] c"\0ATexture Alignment \09\09\09 - %zu bytes\00", align 1 -@.str.44 = private unnamed_addr constant [26 x i8] c"\0ADevice Overlap \09\09\09\09 - %s\00", align 1 -@.str.45 = private unnamed_addr constant [8 x i8] c"Allowed\00", align 1 -@.str.46 = private unnamed_addr constant [12 x i8] c"Not Allowed\00", align 1 -@.str.47 = private unnamed_addr constant [38 x i8] c"\0ANumber of Multi processors \09\09 - %d\0A\0A\00", align 1 -@.str.48 = private unnamed_addr constant [4 x i8] c"\0A%s\00", align 1 -@.str.49 = private unnamed_addr constant [22 x i8] c"The file name is: %s\0A\00", align 1 -@.str.50 = private unnamed_addr constant [2 x i8] c"r\00", align 1 -@.str.51 = private unnamed_addr constant [3 x i8] c"%d\00", align 1 -@.str.52 = private unnamed_addr constant [24 x i8] c"The input matrix a is:\0A\00", align 1 -@.str.53 = private unnamed_addr constant [23 x i8] c"The input array b is:\0A\00", align 1 -@.str.54 = private unnamed_addr constant [18 x i8] c"1d grid size: %d\0A\00", align 1 -@.str.55 = private unnamed_addr constant [14 x i8] c"BlockXY: %d \0A\00", align 1 -@.str.56 = private unnamed_addr constant [32 x i8] c"first grid size: %d second: %d\0A\00", align 1 -@.str.57 = private unnamed_addr constant [5 x i8] c"Fan2\00", align 1 -@.str.58 = private unnamed_addr constant [3 x i8] c"%f\00", align 1 -@.str.59 = private unnamed_addr constant [6 x i8] c"%.2f \00", align 1 -@.str.60 = private unnamed_addr constant [3 x i8] c"\0A\0A\00", align 1 -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str.61 = private unnamed_addr constant [21 x i8] c"Cuda error: %s: %s.\0A\00", align 1 -@0 = private unnamed_addr constant [14 x i8] c"_Z4Fan1PfS_ii\00", align 1 -@1 = private unnamed_addr constant [17 x i8] c"_Z4Fan2PfS_S_iii\00", align 1 -@2 = private constant [16065 x i8] c"P\EDU\BA\01\00\10\00\B0>\00\00\00\00\00\00\02\00\01\01@\00\00\00h4\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\C03\00\00\00\00\00\00\C00\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0C\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z4Fan2PfS_S_iii\00.nv.info._Z4Fan2PfS_S_iii\00.nv.shared._Z4Fan2PfS_S_iii\00.nv.global\00.nv.constant0._Z4Fan2PfS_S_iii\00.text._Z4Fan1PfS_ii\00.nv.info._Z4Fan1PfS_ii\00.nv.shared._Z4Fan1PfS_ii\00.nv.constant0._Z4Fan1PfS_ii\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z4Fan2PfS_S_iii\00.text._Z4Fan2PfS_S_iii\00.nv.info._Z4Fan2PfS_S_iii\00.nv.shared._Z4Fan2PfS_S_iii\00.nv.global\00threadIdx\00blockIdx\00blockDim\00.nv.constant0._Z4Fan2PfS_S_iii\00_param\00_Z4Fan1PfS_ii\00.text._Z4Fan1PfS_ii\00.nv.info._Z4Fan1PfS_ii\00.nv.shared._Z4Fan1PfS_ii\00$_Z4Fan1PfS_ii$__cuda_sm3x_div_rn_noftz_f32\00$_Z4Fan1PfS_ii$__cuda_sm3x_div_rn_noftz_f32_slowpath\00.nv.constant0._Z4Fan1PfS_ii\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00C\00\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\90\00\00\00\03\00\0B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\9B\00\00\00\01\00\0B\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\A5\00\00\00\01\00\0B\00\02\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\AE\00\00\00\01\00\0B\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\B7\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EB\00\00\00\03\00\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00/\01\00\00\22\00\0A\00\D8\09\00\00\00\00\00\00`\01\00\00\00\00\00\00[\01\00\00\22\00\0A\008\0B\00\00\00\00\00\00H\08\00\00\00\00\00\00\90\01\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\09\00\00\00\00\00\00\00\00\00\80\14\00\00\00\00\00\00\DD\00\00\00\12\10\0A\00\00\00\00\00\00\00\00\00\80\13\00\00\00\00\00\00\04/\08\00\0C\00\00\00\13\00\00\00\04#\08\00\09\00\00\00\00\00\00\00\04\12\08\00\09\00\00\00\00\00\00\00\04\11\08\00\09\00\00\00\00\00\00\00\04#\08\00\08\00\00\00\00\00\00\00\04\12\08\00\08\00\00\00\00\00\00\00\04\11\08\00\08\00\00\00\00\00\00\00\04#\08\00\0C\00\00\00\00\00\00\00\04\12\08\00\0C\00\00\00\18\00\00\00\04\11\08\00\0C\00\00\00\18\00\00\00\04/\08\00\0B\00\00\00\0F\00\00\00\04#\08\00\0B\00\00\00\00\00\00\00\04\12\08\00\0B\00\00\000\00\00\00\04\11\08\00\0B\00\00\000\00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\01$\00\03\19$\00\04\17\0C\00\00\00\00\00\05\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\10\00\F8\04\00\00h\06\00\00\90\07\00\008\08\00\00\04\1C\04\00p\14\00\00\04\1E\04\00 \00\00\00\010\00\00\01*\00\00\04\0A\08\00\0A\00\00\00@\01\18\00\03\19\18\00\04\17\0C\00\00\00\00\00\03\00\14\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00\98\03\00\00\C8\05\00\00\04\1C\04\00\D0\09\00\00\04\1E\04\00p\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveB3\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F6\0Dvisible .entry _Z4Fan1PfS_ii\96\04\00\90\00\09\1B\00\0Em\04\0F#\00\05\07D\04\00\A8\00\0F#\00\01\1F2#\00\08\0F\EE\07\1BO6[24\A6\03\15wpred %px\0A\10fH\01\18f\B8\03\03\9A\0A\0E\12\08/21\CB\03\0C\1F6\CB\03\1C\0E\F5\00\0FM\03\06\0EC\01\0F$\03\07\0E\92\01\0F\FB\02\07\0F\E1\01\02\13]\C8\00#to\DB\12\07+\04\02\C5\02\01\9E\0D\0A\1C\00\144\B4\02\0F;\00\03\145\16\03\0F;\00\00\116\1C\00\1F5H\03\02\1F6H\03\02\1F4H\03\09\04\16\00/201\03\02h%tid.x\15\00\00\BB\00\0A\1F2;\0A\02\03~\00\179~\00$3,@\0AS;\0Aneg\16\00\114\B7\0AV;\0Afma\CC\0A$5,\1A\00\132l\0A\0E\81\0A\175s\01(43s\01\01j\04\22ne\C1\003p3,!\00\02g\04\163g\04\1B6g\04\135O\04\185\B3\0D/21)\03\01/44)\03\02/45)\03\02/46)\03\03347,5\00\00$\00\09\1A\00\02u\01\1E7)\03349,\80\00\00&\00\07e\00/50w\02\03351,\1E\00\0Ce\00552,Q\00-51\F9\01\02\EE\0C\1A5\F9\01\02\F6\04\01\1C\00\0B\F9\01$4,/\01\01'\00\07\F9\01\136\F9\01\194Z\03(25\F7\0E\0C|\00\146\AF\00\08|\00$7,\1C\00\0B|\00$8,R\00\01'\00\07|\00\137|\00\1F8d\00\00\02J\01\0A0\0E\02I\05\01\1C\00\0Ad\00(31d\00'30d\00\128d\00)31\D9\02\00^\00+f6\D9\02\01y\04\02\1B\00\00\CD\00)f8\DA\02!31\DA\02\00h\08\0F\E8\06\02\B06:\0Aret;\0A\0A}\0A\00\00\00\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([16065 x i8], [16065 x i8]* @2, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z13create_matrixPfi(float* %m, i32 %size) #0 { -entry: - %m.addr = alloca float*, align 8 - %size.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %lamda = alloca float, align 4 - %saved_stack = alloca i8*, align 8 - %__vla_expr0 = alloca i64, align 8 - %coe_i = alloca float, align 4 - store float* %m, float** %m.addr, align 8 - store i32 %size, i32* %size.addr, align 4 - store float 0xBF847AE140000000, float* %lamda, align 4 - %0 = load i32, i32* %size.addr, align 4 - %mul = mul nsw i32 2, %0 - %sub = sub nsw i32 %mul, 1 - %1 = zext i32 %sub to i64 - %2 = call i8* @llvm.stacksave() - store i8* %2, i8** %saved_stack, align 8 - %vla = alloca float, i64 %1, align 16 - store i64 %1, i64* %__vla_expr0, align 8 - store float 0.000000e+00, float* %coe_i, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %3 = load i32, i32* %i, align 4 - %4 = load i32, i32* %size.addr, align 4 - %cmp = icmp slt i32 %3, %4 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %5 = load float, float* %lamda, align 4 - %6 = load i32, i32* %i, align 4 - %conv = sitofp i32 %6 to float - %mul1 = fmul contract float %5, %conv - %call = call float @_ZSt3expf(float %mul1) - %mul2 = fmul contract float 1.000000e+01, %call - store float %mul2, float* %coe_i, align 4 - %7 = load i32, i32* %size.addr, align 4 - %sub3 = sub nsw i32 %7, 1 - %8 = load i32, i32* %i, align 4 - %add = add nsw i32 %sub3, %8 - store i32 %add, i32* %j, align 4 - %9 = load float, float* %coe_i, align 4 - %10 = load i32, i32* %j, align 4 - %idxprom = sext i32 %10 to i64 - %arrayidx = getelementptr inbounds float, float* %vla, i64 %idxprom - store float %9, float* %arrayidx, align 4 - %11 = load i32, i32* %size.addr, align 4 - %sub4 = sub nsw i32 %11, 1 - %12 = load i32, i32* %i, align 4 - %sub5 = sub nsw i32 %sub4, %12 - store i32 %sub5, i32* %j, align 4 - %13 = load float, float* %coe_i, align 4 - %14 = load i32, i32* %j, align 4 - %idxprom6 = sext i32 %14 to i64 - %arrayidx7 = getelementptr inbounds float, float* %vla, i64 %idxprom6 - store float %13, float* %arrayidx7, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %15 = load i32, i32* %i, align 4 - %inc = add nsw i32 %15, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - store i32 0, i32* %i, align 4 - br label %for.cond8 - -for.cond8: ; preds = %for.inc26, %for.end - %16 = load i32, i32* %i, align 4 - %17 = load i32, i32* %size.addr, align 4 - %cmp9 = icmp slt i32 %16, %17 - br i1 %cmp9, label %for.body10, label %for.end28 - -for.body10: ; preds = %for.cond8 - store i32 0, i32* %j, align 4 - br label %for.cond11 - -for.cond11: ; preds = %for.inc23, %for.body10 - %18 = load i32, i32* %j, align 4 - %19 = load i32, i32* %size.addr, align 4 - %cmp12 = icmp slt i32 %18, %19 - br i1 %cmp12, label %for.body13, label %for.end25 - -for.body13: ; preds = %for.cond11 - %20 = load i32, i32* %size.addr, align 4 - %sub14 = sub nsw i32 %20, 1 - %21 = load i32, i32* %i, align 4 - %sub15 = sub nsw i32 %sub14, %21 - %22 = load i32, i32* %j, align 4 - %add16 = add nsw i32 %sub15, %22 - %idxprom17 = sext i32 %add16 to i64 - %arrayidx18 = getelementptr inbounds float, float* %vla, i64 %idxprom17 - %23 = load float, float* %arrayidx18, align 4 - %24 = load float*, float** %m.addr, align 8 - %25 = load i32, i32* %i, align 4 - %26 = load i32, i32* %size.addr, align 4 - %mul19 = mul nsw i32 %25, %26 - %27 = load i32, i32* %j, align 4 - %add20 = add nsw i32 %mul19, %27 - %idxprom21 = sext i32 %add20 to i64 - %arrayidx22 = getelementptr inbounds float, float* %24, i64 %idxprom21 - store float %23, float* %arrayidx22, align 4 - br label %for.inc23 - -for.inc23: ; preds = %for.body13 - %28 = load i32, i32* %j, align 4 - %inc24 = add nsw i32 %28, 1 - store i32 %inc24, i32* %j, align 4 - br label %for.cond11 - -for.end25: ; preds = %for.cond11 - br label %for.inc26 - -for.inc26: ; preds = %for.end25 - %29 = load i32, i32* %i, align 4 - %inc27 = add nsw i32 %29, 1 - store i32 %inc27, i32* %i, align 4 - br label %for.cond8 - -for.end28: ; preds = %for.cond8 - %30 = load i8*, i8** %saved_stack, align 8 - call void @llvm.stackrestore(i8* %30) - ret void -} - -; Function Attrs: nounwind -declare i8* @llvm.stacksave() #1 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local float @_ZSt3expf(float %__x) #2 comdat { -entry: - %__x.addr = alloca float, align 4 - store float %__x, float* %__x.addr, align 4 - %0 = load float, float* %__x.addr, align 4 - %call = call float @expf(float %0) #1 - ret float %call -} - -; Function Attrs: nounwind -declare void @llvm.stackrestore(i8*) #1 - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #3 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %verbose = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %flag = alloca i8, align 1 - %time_start = alloca %struct.timeval, align 8 - %time_end = alloca %struct.timeval, align 8 - %time_total = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([56 x i8], [56 x i8]* @.str, i64 0, i64 0), i32 512, i32 1, i32 1) - store i32 1, i32* %verbose, align 4 - %0 = load i32, i32* %argc.addr, align 4 - %cmp = icmp slt i32 %0, 2 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([45 x i8], [45 x i8]* @.str.1, i64 0, i64 0)) - %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([62 x i8], [62 x i8]* @.str.2, i64 0, i64 0)) - %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.3, i64 0, i64 0)) - %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([66 x i8], [66 x i8]* @.str.4, i64 0, i64 0)) - %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([68 x i8], [68 x i8]* @.str.5, i64 0, i64 0)) - %call6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str.6, i64 0, i64 0)) - %call7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([64 x i8], [64 x i8]* @.str.7, i64 0, i64 0)) - %call8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.8, i64 0, i64 0)) - %call9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([70 x i8], [70 x i8]* @.str.9, i64 0, i64 0)) - %call10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @.str.10, i64 0, i64 0)) - %call11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([69 x i8], [69 x i8]* @.str.11, i64 0, i64 0)) - %call12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.12, i64 0, i64 0)) - %call13 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.13, i64 0, i64 0)) - %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.14, i64 0, i64 0)) - %call15 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.15, i64 0, i64 0)) - %call16 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.16, i64 0, i64 0)) - %call17 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.17, i64 0, i64 0)) - %call18 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.18, i64 0, i64 0)) - %call19 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.14, i64 0, i64 0)) - %call20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.19, i64 0, i64 0)) - %call21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.14, i64 0, i64 0)) - %call22 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.20, i64 0, i64 0)) - call void @exit(i32 0) #9 - unreachable - -if.end: ; preds = %entry - %call23 = call i32 @cudaSetDevice(i32 0) - call void @_Z21PrintDevicePropertiesv() - store i32 1, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc61, %if.end - %1 = load i32, i32* %i, align 4 - %2 = load i32, i32* %argc.addr, align 4 - %cmp24 = icmp slt i32 %1, %2 - br i1 %cmp24, label %for.body, label %for.end63 - -for.body: ; preds = %for.cond - %3 = load i8**, i8*** %argv.addr, align 8 - %4 = load i32, i32* %i, align 4 - %idxprom = sext i32 %4 to i64 - %arrayidx = getelementptr inbounds i8*, i8** %3, i64 %idxprom - %5 = load i8*, i8** %arrayidx, align 8 - %arrayidx25 = getelementptr inbounds i8, i8* %5, i64 0 - %6 = load i8, i8* %arrayidx25, align 1 - %conv = sext i8 %6 to i32 - %cmp26 = icmp eq i32 %conv, 45 - br i1 %cmp26, label %if.then27, label %if.end60 - -if.then27: ; preds = %for.body - %7 = load i8**, i8*** %argv.addr, align 8 - %8 = load i32, i32* %i, align 4 - %idxprom28 = sext i32 %8 to i64 - %arrayidx29 = getelementptr inbounds i8*, i8** %7, i64 %idxprom28 - %9 = load i8*, i8** %arrayidx29, align 8 - %arrayidx30 = getelementptr inbounds i8, i8* %9, i64 1 - %10 = load i8, i8* %arrayidx30, align 1 - store i8 %10, i8* %flag, align 1 - %11 = load i8, i8* %flag, align 1 - %conv31 = sext i8 %11 to i32 - switch i32 %conv31, label %sw.epilog [ - i32 115, label %sw.bb - i32 102, label %sw.bb52 - i32 113, label %sw.bb59 - ] - -sw.bb: ; preds = %if.then27 - %12 = load i32, i32* %i, align 4 - %inc = add nsw i32 %12, 1 - store i32 %inc, i32* %i, align 4 - %13 = load i8**, i8*** %argv.addr, align 8 - %14 = load i32, i32* %i, align 4 - %idxprom32 = sext i32 %14 to i64 - %arrayidx33 = getelementptr inbounds i8*, i8** %13, i64 %idxprom32 - %15 = load i8*, i8** %arrayidx33, align 8 - %call34 = call i32 @atoi(i8* %15) #10 - store i32 %call34, i32* @Size, align 4 - %16 = load i32, i32* @Size, align 4 - %call35 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.21, i64 0, i64 0), i32 %16) - %17 = load i32, i32* @Size, align 4 - %18 = load i32, i32* @Size, align 4 - %mul = mul nsw i32 %17, %18 - %conv36 = sext i32 %mul to i64 - %mul37 = mul i64 %conv36, 4 - %call38 = call noalias i8* @malloc(i64 %mul37) #1 - %19 = bitcast i8* %call38 to float* - store float* %19, float** @a, align 8 - %20 = load float*, float** @a, align 8 - %21 = load i32, i32* @Size, align 4 - call void @_Z13create_matrixPfi(float* %20, i32 %21) - %22 = load i32, i32* @Size, align 4 - %conv39 = sext i32 %22 to i64 - %mul40 = mul i64 %conv39, 4 - %call41 = call noalias i8* @malloc(i64 %mul40) #1 - %23 = bitcast i8* %call41 to float* - store float* %23, float** @b, align 8 - store i32 0, i32* %j, align 4 - br label %for.cond42 - -for.cond42: ; preds = %for.inc, %sw.bb - %24 = load i32, i32* %j, align 4 - %25 = load i32, i32* @Size, align 4 - %cmp43 = icmp slt i32 %24, %25 - br i1 %cmp43, label %for.body44, label %for.end - -for.body44: ; preds = %for.cond42 - %26 = load float*, float** @b, align 8 - %27 = load i32, i32* %j, align 4 - %idxprom45 = sext i32 %27 to i64 - %arrayidx46 = getelementptr inbounds float, float* %26, i64 %idxprom45 - store float 1.000000e+00, float* %arrayidx46, align 4 - br label %for.inc - -for.inc: ; preds = %for.body44 - %28 = load i32, i32* %j, align 4 - %inc47 = add nsw i32 %28, 1 - store i32 %inc47, i32* %j, align 4 - br label %for.cond42 - -for.end: ; preds = %for.cond42 - %29 = load i32, i32* @Size, align 4 - %30 = load i32, i32* @Size, align 4 - %mul48 = mul nsw i32 %29, %30 - %conv49 = sext i32 %mul48 to i64 - %mul50 = mul i64 %conv49, 4 - %call51 = call noalias i8* @malloc(i64 %mul50) #1 - %31 = bitcast i8* %call51 to float* - store float* %31, float** @m, align 8 - br label %sw.epilog - -sw.bb52: ; preds = %if.then27 - %32 = load i32, i32* %i, align 4 - %inc53 = add nsw i32 %32, 1 - store i32 %inc53, i32* %i, align 4 - %33 = load i8**, i8*** %argv.addr, align 8 - %34 = load i32, i32* %i, align 4 - %idxprom54 = sext i32 %34 to i64 - %arrayidx55 = getelementptr inbounds i8*, i8** %33, i64 %idxprom54 - %35 = load i8*, i8** %arrayidx55, align 8 - %call56 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.22, i64 0, i64 0), i8* %35) - %36 = load i8**, i8*** %argv.addr, align 8 - %37 = load i32, i32* %i, align 4 - %idxprom57 = sext i32 %37 to i64 - %arrayidx58 = getelementptr inbounds i8*, i8** %36, i64 %idxprom57 - %38 = load i8*, i8** %arrayidx58, align 8 - call void @_Z15InitProblemOncePc(i8* %38) - br label %sw.epilog - -sw.bb59: ; preds = %if.then27 - store i32 1, i32* %verbose, align 4 - br label %sw.epilog - -sw.epilog: ; preds = %if.then27, %sw.bb59, %sw.bb52, %for.end - br label %if.end60 - -if.end60: ; preds = %sw.epilog, %for.body - br label %for.inc61 - -for.inc61: ; preds = %if.end60 - %39 = load i32, i32* %i, align 4 - %inc62 = add nsw i32 %39, 1 - store i32 %inc62, i32* %i, align 4 - br label %for.cond - -for.end63: ; preds = %for.cond - call void @_Z10InitPerRunv() - %call64 = call i32 @gettimeofday(%struct.timeval* %time_start, %struct.timezone* null) #1 - call void @_Z10ForwardSubv() - %call65 = call i32 @gettimeofday(%struct.timeval* %time_end, %struct.timezone* null) #1 - %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %time_end, i32 0, i32 0 - %40 = load i64, i64* %tv_sec, align 8 - %mul66 = mul nsw i64 %40, 1000000 - %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %time_end, i32 0, i32 1 - %41 = load i64, i64* %tv_usec, align 8 - %add = add nsw i64 %mul66, %41 - %tv_sec67 = getelementptr inbounds %struct.timeval, %struct.timeval* %time_start, i32 0, i32 0 - %42 = load i64, i64* %tv_sec67, align 8 - %mul68 = mul nsw i64 %42, 1000000 - %tv_usec69 = getelementptr inbounds %struct.timeval, %struct.timeval* %time_start, i32 0, i32 1 - %43 = load i64, i64* %tv_usec69, align 8 - %add70 = add nsw i64 %mul68, %43 - %sub = sub nsw i64 %add, %add70 - %conv71 = trunc i64 %sub to i32 - store i32 %conv71, i32* %time_total, align 4 - %44 = load i32, i32* %verbose, align 4 - %tobool = icmp ne i32 %44, 0 - br i1 %tobool, label %if.then72, label %if.end76 - -if.then72: ; preds = %for.end63 - %call73 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.23, i64 0, i64 0)) - %45 = load float*, float** @m, align 8 - %46 = load i32, i32* @Size, align 4 - %47 = load i32, i32* @Size, align 4 - call void @_Z8PrintMatPfii(float* %45, i32 %46, i32 %47) - %call74 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.24, i64 0, i64 0)) - %48 = load float*, float** @a, align 8 - %49 = load i32, i32* @Size, align 4 - %50 = load i32, i32* @Size, align 4 - call void @_Z8PrintMatPfii(float* %48, i32 %49, i32 %50) - %call75 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.25, i64 0, i64 0)) - %51 = load float*, float** @b, align 8 - %52 = load i32, i32* @Size, align 4 - call void @_Z8PrintAryPfi(float* %51, i32 %52) - br label %if.end76 - -if.end76: ; preds = %if.then72, %for.end63 - call void @_Z7BackSubv() - %53 = load i32, i32* %verbose, align 4 - %tobool77 = icmp ne i32 %53, 0 - br i1 %tobool77, label %if.then78, label %if.end80 - -if.then78: ; preds = %if.end76 - %call79 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.26, i64 0, i64 0)) - %54 = load float*, float** @finalVec, align 8 - %55 = load i32, i32* @Size, align 4 - call void @_Z8PrintAryPfi(float* %54, i32 %55) - br label %if.end80 - -if.end80: ; preds = %if.then78, %if.end76 - %56 = load i32, i32* %time_total, align 4 - %conv81 = uitofp i32 %56 to double - %mul82 = fmul contract double %conv81, 0x3EB0C6F7A0B5ED8D - %call83 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([49 x i8], [49 x i8]* @.str.27, i64 0, i64 0), double %mul82) - %57 = load i32, i32* @totalKernelTime, align 4 - %conv84 = uitofp i32 %57 to double - %mul85 = fmul contract double %conv84, 0x3EB0C6F7A0B5ED8D - %call86 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str.28, i64 0, i64 0), double %mul85) - %58 = load float*, float** @m, align 8 - %59 = bitcast float* %58 to i8* - call void @free(i8* %59) #1 - %60 = load float*, float** @a, align 8 - %61 = bitcast float* %60 to i8* - call void @free(i8* %61) #1 - %62 = load float*, float** @b, align 8 - %63 = bitcast float* %62 to i8* - call void @free(i8* %63) #1 - %64 = load i32, i32* %retval, align 4 - ret i32 %64 -} - -declare dso_local i32 @printf(i8*, ...) #4 - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #5 - -declare dso_local i32 @cudaSetDevice(i32) #4 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z21PrintDevicePropertiesv() #0 { -entry: - %deviceProp = alloca %struct.cudaDeviceProp, align 8 - %nDevCount = alloca i32, align 4 - %nDeviceIdx = alloca i32, align 4 - store i32 0, i32* %nDevCount, align 4 - %call = call i32 @cudaGetDeviceCount(i32* %nDevCount) - %0 = load i32, i32* %nDevCount, align 4 - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.29, i64 0, i64 0), i32 %0) - store i32 0, i32* %nDeviceIdx, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %1 = load i32, i32* %nDeviceIdx, align 4 - %2 = load i32, i32* %nDevCount, align 4 - %cmp = icmp slt i32 %1, %2 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %3 = bitcast %struct.cudaDeviceProp* %deviceProp to i8* - call void @llvm.memset.p0i8.i64(i8* align 8 %3, i8 0, i64 712, i1 false) - %4 = load i32, i32* %nDeviceIdx, align 4 - %call2 = call i32 @cudaGetDeviceProperties(%struct.cudaDeviceProp* %deviceProp, i32 %4) - %cmp3 = icmp eq i32 0, %call2 - br i1 %cmp3, label %if.then, label %if.else - -if.then: ; preds = %for.body - %name = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 0 - %arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %name, i64 0, i64 0 - %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.30, i64 0, i64 0), i8* %arraydecay) - %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([40 x i8], [40 x i8]* @.str.31, i64 0, i64 0)) - %totalGlobalMem = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 4 - %5 = load i64, i64* %totalGlobalMem, align 8 - %div = udiv i64 %5, 1024 - %call6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.32, i64 0, i64 0), i64 %div) - %sharedMemPerBlock = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 5 - %6 = load i64, i64* %sharedMemPerBlock, align 8 - %div7 = udiv i64 %6, 1024 - %call8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.33, i64 0, i64 0), i64 %div7) - %regsPerBlock = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 6 - %7 = load i32, i32* %regsPerBlock, align 8 - %call9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([45 x i8], [45 x i8]* @.str.34, i64 0, i64 0), i32 %7) - %warpSize = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 7 - %8 = load i32, i32* %warpSize, align 4 - %call10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str.35, i64 0, i64 0), i32 %8) - %memPitch = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 8 - %9 = load i64, i64* %memPitch, align 8 - %call11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str.36, i64 0, i64 0), i64 %9) - %maxThreadsPerBlock = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 9 - %10 = load i32, i32* %maxThreadsPerBlock, align 8 - %call12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.37, i64 0, i64 0), i32 %10) - %maxThreadsDim = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 10 - %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* %maxThreadsDim, i64 0, i64 0 - %11 = load i32, i32* %arrayidx, align 4 - %maxThreadsDim13 = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 10 - %arrayidx14 = getelementptr inbounds [3 x i32], [3 x i32]* %maxThreadsDim13, i64 0, i64 1 - %12 = load i32, i32* %arrayidx14, align 4 - %maxThreadsDim15 = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 10 - %arrayidx16 = getelementptr inbounds [3 x i32], [3 x i32]* %maxThreadsDim15, i64 0, i64 2 - %13 = load i32, i32* %arrayidx16, align 4 - %call17 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.38, i64 0, i64 0), i32 %11, i32 %12, i32 %13) - %maxGridSize = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 11 - %arrayidx18 = getelementptr inbounds [3 x i32], [3 x i32]* %maxGridSize, i64 0, i64 0 - %14 = load i32, i32* %arrayidx18, align 8 - %maxGridSize19 = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 11 - %arrayidx20 = getelementptr inbounds [3 x i32], [3 x i32]* %maxGridSize19, i64 0, i64 1 - %15 = load i32, i32* %arrayidx20, align 4 - %maxGridSize21 = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 11 - %arrayidx22 = getelementptr inbounds [3 x i32], [3 x i32]* %maxGridSize21, i64 0, i64 2 - %16 = load i32, i32* %arrayidx22, align 8 - %call23 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.39, i64 0, i64 0), i32 %14, i32 %15, i32 %16) - %totalConstMem = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 13 - %17 = load i64, i64* %totalConstMem, align 8 - %call24 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([39 x i8], [39 x i8]* @.str.40, i64 0, i64 0), i64 %17) - %major = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 14 - %18 = load i32, i32* %major, align 8 - %minor = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 15 - %19 = load i32, i32* %minor, align 4 - %call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.41, i64 0, i64 0), i32 %18, i32 %19) - %clockRate = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 12 - %20 = load i32, i32* %clockRate, align 4 - %call26 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.42, i64 0, i64 0), i32 %20) - %textureAlignment = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 16 - %21 = load i64, i64* %textureAlignment, align 8 - %call27 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.43, i64 0, i64 0), i64 %21) - %deviceOverlap = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 18 - %22 = load i32, i32* %deviceOverlap, align 8 - %tobool = icmp ne i32 %22, 0 - %23 = zext i1 %tobool to i64 - %cond = select i1 %tobool, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.45, i64 0, i64 0), i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.46, i64 0, i64 0) - %call28 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.44, i64 0, i64 0), i8* %cond) - %multiProcessorCount = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 19 - %24 = load i32, i32* %multiProcessorCount, align 4 - %call29 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([38 x i8], [38 x i8]* @.str.47, i64 0, i64 0), i32 %24) - br label %if.end - -if.else: ; preds = %for.body - %call30 = call i32 @cudaGetLastError() - %call31 = call i8* @cudaGetErrorString(i32 %call30) - %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.48, i64 0, i64 0), i8* %call31) - br label %if.end - -if.end: ; preds = %if.else, %if.then - br label %for.inc - -for.inc: ; preds = %if.end - %25 = load i32, i32* %nDeviceIdx, align 4 - %inc = add nsw i32 %25, 1 - store i32 %inc, i32* %nDeviceIdx, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -; Function Attrs: nounwind readonly -declare dso_local i32 @atoi(i8*) #6 - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #7 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z15InitProblemOncePc(i8* %filename) #0 { -entry: - %filename.addr = alloca i8*, align 8 - store i8* %filename, i8** %filename.addr, align 8 - %0 = load i8*, i8** %filename.addr, align 8 - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.49, i64 0, i64 0), i8* %0) - %1 = load i8*, i8** %filename.addr, align 8 - %call1 = call %struct._IO_FILE* @fopen(i8* %1, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.50, i64 0, i64 0)) - store %struct._IO_FILE* %call1, %struct._IO_FILE** @fp, align 8 - %2 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.51, i64 0, i64 0), i32* @Size) - %3 = load i32, i32* @Size, align 4 - %4 = load i32, i32* @Size, align 4 - %mul = mul nsw i32 %3, %4 - %conv = sext i32 %mul to i64 - %mul3 = mul i64 %conv, 4 - %call4 = call noalias i8* @malloc(i64 %mul3) #1 - %5 = bitcast i8* %call4 to float* - store float* %5, float** @a, align 8 - %6 = load float*, float** @a, align 8 - %7 = load i32, i32* @Size, align 4 - %8 = load i32, i32* @Size, align 4 - call void @_Z7InitMatPfii(float* %6, i32 %7, i32 %8) - %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.52, i64 0, i64 0)) - %9 = load float*, float** @a, align 8 - %10 = load i32, i32* @Size, align 4 - %11 = load i32, i32* @Size, align 4 - call void @_Z8PrintMatPfii(float* %9, i32 %10, i32 %11) - %12 = load i32, i32* @Size, align 4 - %conv6 = sext i32 %12 to i64 - %mul7 = mul i64 %conv6, 4 - %call8 = call noalias i8* @malloc(i64 %mul7) #1 - %13 = bitcast i8* %call8 to float* - store float* %13, float** @b, align 8 - %14 = load float*, float** @b, align 8 - %15 = load i32, i32* @Size, align 4 - call void @_Z7InitAryPfi(float* %14, i32 %15) - %call9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.53, i64 0, i64 0)) - %16 = load float*, float** @b, align 8 - %17 = load i32, i32* @Size, align 4 - call void @_Z8PrintAryPfi(float* %16, i32 %17) - %18 = load i32, i32* @Size, align 4 - %19 = load i32, i32* @Size, align 4 - %mul10 = mul nsw i32 %18, %19 - %conv11 = sext i32 %mul10 to i64 - %mul12 = mul i64 %conv11, 4 - %call13 = call noalias i8* @malloc(i64 %mul12) #1 - %20 = bitcast i8* %call13 to float* - store float* %20, float** @m, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z10InitPerRunv() #2 { -entry: - %i = alloca i32, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* @Size, align 4 - %2 = load i32, i32* @Size, align 4 - %mul = mul nsw i32 %1, %2 - %cmp = icmp slt i32 %0, %mul - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %3 = load float*, float** @m, align 8 - %4 = load i32, i32* %i, align 4 - %idx.ext = sext i32 %4 to i64 - %add.ptr = getelementptr inbounds float, float* %3, i64 %idx.ext - store float 0.000000e+00, float* %add.ptr, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, i32* %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -; Function Attrs: nounwind -declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #7 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z10ForwardSubv() #0 { -entry: - %t = alloca i32, align 4 - %m_cuda = alloca float*, align 8 - %a_cuda = alloca float*, align 8 - %b_cuda = alloca float*, align 8 - %A = alloca i32, align 4 - %B = alloca i32, align 4 - %C = alloca i32, align 4 - %D = alloca i32, align 4 - %E = alloca i32, align 4 - %F = alloca i32, align 4 - %block_size = alloca i32, align 4 - %grid_size = alloca i32, align 4 - %dimBlock = alloca %struct.dim3, align 4 - %dimGrid = alloca %struct.dim3, align 4 - %blockSize2d = alloca i32, align 4 - %gridSize2d = alloca i32, align 4 - %dimBlockXY = alloca %struct.dim3, align 4 - %dimGridXY = alloca %struct.dim3, align 4 - %time_start = alloca %struct.timeval, align 8 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp32 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp32.coerce = alloca { i64, i32 }, align 4 - %agg.tmp36 = alloca %struct.dim3, align 4 - %agg.tmp37 = alloca %struct.dim3, align 4 - %agg.tmp36.coerce = alloca { i64, i32 }, align 4 - %agg.tmp37.coerce = alloca { i64, i32 }, align 4 - %time_end = alloca %struct.timeval, align 8 - store i32 1, i32* %A, align 4 - store i32 2, i32* %B, align 4 - store i32 3, i32* %C, align 4 - store i32 4, i32* %D, align 4 - store i32 5, i32* %E, align 4 - store i32 6, i32* %F, align 4 - %0 = bitcast float** %m_cuda to i8** - %1 = load i32, i32* @Size, align 4 - %2 = load i32, i32* @Size, align 4 - %mul = mul nsw i32 %1, %2 - %conv = sext i32 %mul to i64 - %mul1 = mul i64 %conv, 4 - %call = call i32 @cudaMalloc(i8** %0, i64 %mul1) - %3 = bitcast float** %a_cuda to i8** - %4 = load i32, i32* @Size, align 4 - %5 = load i32, i32* @Size, align 4 - %mul2 = mul nsw i32 %4, %5 - %conv3 = sext i32 %mul2 to i64 - %mul4 = mul i64 %conv3, 4 - %call5 = call i32 @cudaMalloc(i8** %3, i64 %mul4) - %6 = bitcast float** %b_cuda to i8** - %7 = load i32, i32* @Size, align 4 - %conv6 = sext i32 %7 to i64 - %mul7 = mul i64 %conv6, 4 - %call8 = call i32 @cudaMalloc(i8** %6, i64 %mul7) - %8 = load float*, float** %m_cuda, align 8 - %9 = bitcast float* %8 to i8* - %10 = load float*, float** @m, align 8 - %11 = bitcast float* %10 to i8* - %12 = load i32, i32* @Size, align 4 - %13 = load i32, i32* @Size, align 4 - %mul9 = mul nsw i32 %12, %13 - %conv10 = sext i32 %mul9 to i64 - %mul11 = mul i64 %conv10, 4 - %call12 = call i32 @cudaMemcpy(i8* %9, i8* %11, i64 %mul11, i32 1) - %14 = load float*, float** %a_cuda, align 8 - %15 = bitcast float* %14 to i8* - %16 = load float*, float** @a, align 8 - %17 = bitcast float* %16 to i8* - %18 = load i32, i32* @Size, align 4 - %19 = load i32, i32* @Size, align 4 - %mul13 = mul nsw i32 %18, %19 - %conv14 = sext i32 %mul13 to i64 - %mul15 = mul i64 %conv14, 4 - %call16 = call i32 @cudaMemcpy(i8* %15, i8* %17, i64 %mul15, i32 1) - %20 = load float*, float** %b_cuda, align 8 - %21 = bitcast float* %20 to i8* - %22 = load float*, float** @b, align 8 - %23 = bitcast float* %22 to i8* - %24 = load i32, i32* @Size, align 4 - %conv17 = sext i32 %24 to i64 - %mul18 = mul i64 %conv17, 4 - %call19 = call i32 @cudaMemcpy(i8* %21, i8* %23, i64 %mul18, i32 1) - store i32 512, i32* %block_size, align 4 - %25 = load i32, i32* @Size, align 4 - %26 = load i32, i32* %block_size, align 4 - %div = sdiv i32 %25, %26 - %27 = load i32, i32* @Size, align 4 - %28 = load i32, i32* %block_size, align 4 - %rem = srem i32 %27, %28 - %tobool = icmp ne i32 %rem, 0 - %lnot = xor i1 %tobool, true - %29 = zext i1 %lnot to i64 - %cond = select i1 %lnot, i32 0, i32 1 - %add = add nsw i32 %div, %cond - store i32 %add, i32* %grid_size, align 4 - %30 = load i32, i32* %grid_size, align 4 - %call20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.54, i64 0, i64 0), i32 %30) - %31 = load i32, i32* %block_size, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimBlock, i32 %31, i32 1, i32 1) - %32 = load i32, i32* %grid_size, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimGrid, i32 %32, i32 1, i32 1) - store i32 1, i32* %blockSize2d, align 4 - %33 = load i32, i32* @Size, align 4 - %34 = load i32, i32* %blockSize2d, align 4 - %div21 = sdiv i32 %33, %34 - %35 = load i32, i32* @Size, align 4 - %36 = load i32, i32* %blockSize2d, align 4 - %rem22 = srem i32 %35, %36 - %tobool23 = icmp ne i32 %rem22, 0 - %37 = zext i1 %tobool23 to i64 - %cond24 = select i1 %tobool23, i32 0, i32 1 - %tobool25 = icmp ne i32 %cond24, 0 - %lnot26 = xor i1 %tobool25, true - %conv27 = zext i1 %lnot26 to i32 - %add28 = add nsw i32 %div21, %conv27 - store i32 %add28, i32* %gridSize2d, align 4 - %38 = load i32, i32* %blockSize2d, align 4 - %39 = load i32, i32* %blockSize2d, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimBlockXY, i32 %38, i32 %39, i32 1) - %40 = load i32, i32* %blockSize2d, align 4 - %call29 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.55, i64 0, i64 0), i32 %40) - %41 = load i32, i32* %gridSize2d, align 4 - %42 = load i32, i32* %gridSize2d, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimGridXY, i32 %41, i32 %42, i32 1) - %43 = load i32, i32* %grid_size, align 4 - %44 = load i32, i32* %gridSize2d, align 4 - %call30 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str.56, i64 0, i64 0), i32 %43, i32 %44) - %call31 = call i32 @gettimeofday(%struct.timeval* %time_start, %struct.timezone* null) #1 - store i32 0, i32* %t, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %45 = load i32, i32* %t, align 4 - %46 = load i32, i32* @Size, align 4 - %sub = sub nsw i32 %46, 1 - %cmp = icmp slt i32 %45, %sub - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %47 = bitcast %struct.dim3* %agg.tmp to i8* - %48 = bitcast %struct.dim3* %dimGrid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %47, i8* align 4 %48, i64 12, i1 false) - %49 = bitcast %struct.dim3* %agg.tmp32 to i8* - %50 = bitcast %struct.dim3* %dimBlock to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %49, i8* align 4 %50, i64 12, i1 false) - %51 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %52 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %51, i8* align 4 %52, i64 12, i1 false) - %53 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %54 = load i64, i64* %53, align 4 - %55 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %56 = load i32, i32* %55, align 4 - %57 = bitcast { i64, i32 }* %agg.tmp32.coerce to i8* - %58 = bitcast %struct.dim3* %agg.tmp32 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %57, i8* align 4 %58, i64 12, i1 false) - %59 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp32.coerce, i32 0, i32 0 - %60 = load i64, i64* %59, align 4 - %61 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp32.coerce, i32 0, i32 1 - %62 = load i32, i32* %61, align 4 - %call33 = call i32 @__cudaPushCallConfiguration(i64 %54, i32 %56, i64 %60, i32 %62, i64 0, i8* null) - %tobool34 = icmp ne i32 %call33, 0 - br i1 %tobool34, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %for.body - %63 = load float*, float** %m_cuda, align 8 - %64 = load float*, float** %a_cuda, align 8 - %65 = load i32, i32* @Size, align 4 - %66 = load i32, i32* %t, align 4 - call void @_Z4Fan1PfS_ii(float* %63, float* %64, i32 %65, i32 %66) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %for.body - %call35 = call i32 @cudaDeviceSynchronize() - %67 = bitcast %struct.dim3* %agg.tmp36 to i8* - %68 = bitcast %struct.dim3* %dimGridXY to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %67, i8* align 4 %68, i64 12, i1 false) - %69 = bitcast %struct.dim3* %agg.tmp37 to i8* - %70 = bitcast %struct.dim3* %dimBlockXY to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %69, i8* align 4 %70, i64 12, i1 false) - %71 = bitcast { i64, i32 }* %agg.tmp36.coerce to i8* - %72 = bitcast %struct.dim3* %agg.tmp36 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %71, i8* align 4 %72, i64 12, i1 false) - %73 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp36.coerce, i32 0, i32 0 - %74 = load i64, i64* %73, align 4 - %75 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp36.coerce, i32 0, i32 1 - %76 = load i32, i32* %75, align 4 - %77 = bitcast { i64, i32 }* %agg.tmp37.coerce to i8* - %78 = bitcast %struct.dim3* %agg.tmp37 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %77, i8* align 4 %78, i64 12, i1 false) - %79 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp37.coerce, i32 0, i32 0 - %80 = load i64, i64* %79, align 4 - %81 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp37.coerce, i32 0, i32 1 - %82 = load i32, i32* %81, align 4 - %call38 = call i32 @__cudaPushCallConfiguration(i64 %74, i32 %76, i64 %80, i32 %82, i64 0, i8* null) - %tobool39 = icmp ne i32 %call38, 0 - br i1 %tobool39, label %kcall.end42, label %kcall.configok40 - -kcall.configok40: ; preds = %kcall.end - %83 = load float*, float** %m_cuda, align 8 - %84 = load float*, float** %a_cuda, align 8 - %85 = load float*, float** %b_cuda, align 8 - %86 = load i32, i32* @Size, align 4 - %87 = load i32, i32* @Size, align 4 - %88 = load i32, i32* %t, align 4 - %sub41 = sub nsw i32 %87, %88 - %89 = load i32, i32* %t, align 4 - call void @_Z4Fan2PfS_S_iii(float* %83, float* %84, float* %85, i32 %86, i32 %sub41, i32 %89) - br label %kcall.end42 - -kcall.end42: ; preds = %kcall.configok40, %kcall.end - %call43 = call i32 @cudaDeviceSynchronize() - call void @_Z14checkCUDAErrorPKc(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.57, i64 0, i64 0)) - br label %for.inc - -for.inc: ; preds = %kcall.end42 - %90 = load i32, i32* %t, align 4 - %inc = add nsw i32 %90, 1 - store i32 %inc, i32* %t, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %call44 = call i32 @gettimeofday(%struct.timeval* %time_end, %struct.timezone* null) #1 - %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %time_end, i32 0, i32 0 - %91 = load i64, i64* %tv_sec, align 8 - %mul45 = mul nsw i64 %91, 1000000 - %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %time_end, i32 0, i32 1 - %92 = load i64, i64* %tv_usec, align 8 - %add46 = add nsw i64 %mul45, %92 - %tv_sec47 = getelementptr inbounds %struct.timeval, %struct.timeval* %time_start, i32 0, i32 0 - %93 = load i64, i64* %tv_sec47, align 8 - %mul48 = mul nsw i64 %93, 1000000 - %tv_usec49 = getelementptr inbounds %struct.timeval, %struct.timeval* %time_start, i32 0, i32 1 - %94 = load i64, i64* %tv_usec49, align 8 - %add50 = add nsw i64 %mul48, %94 - %sub51 = sub nsw i64 %add46, %add50 - %conv52 = trunc i64 %sub51 to i32 - store i32 %conv52, i32* @totalKernelTime, align 4 - %95 = load float*, float** @m, align 8 - %96 = bitcast float* %95 to i8* - %97 = load float*, float** %m_cuda, align 8 - %98 = bitcast float* %97 to i8* - %99 = load i32, i32* @Size, align 4 - %100 = load i32, i32* @Size, align 4 - %mul53 = mul nsw i32 %99, %100 - %conv54 = sext i32 %mul53 to i64 - %mul55 = mul i64 %conv54, 4 - %call56 = call i32 @cudaMemcpy(i8* %96, i8* %98, i64 %mul55, i32 2) - %101 = load float*, float** @a, align 8 - %102 = bitcast float* %101 to i8* - %103 = load float*, float** %a_cuda, align 8 - %104 = bitcast float* %103 to i8* - %105 = load i32, i32* @Size, align 4 - %106 = load i32, i32* @Size, align 4 - %mul57 = mul nsw i32 %105, %106 - %conv58 = sext i32 %mul57 to i64 - %mul59 = mul i64 %conv58, 4 - %call60 = call i32 @cudaMemcpy(i8* %102, i8* %104, i64 %mul59, i32 2) - %107 = load float*, float** @b, align 8 - %108 = bitcast float* %107 to i8* - %109 = load float*, float** %b_cuda, align 8 - %110 = bitcast float* %109 to i8* - %111 = load i32, i32* @Size, align 4 - %conv61 = sext i32 %111 to i64 - %mul62 = mul i64 %conv61, 4 - %call63 = call i32 @cudaMemcpy(i8* %108, i8* %110, i64 %mul62, i32 2) - %112 = load float*, float** %m_cuda, align 8 - %113 = bitcast float* %112 to i8* - %call64 = call i32 @cudaFree(i8* %113) - %114 = load float*, float** %a_cuda, align 8 - %115 = bitcast float* %114 to i8* - %call65 = call i32 @cudaFree(i8* %115) - %116 = load float*, float** %b_cuda, align 8 - %117 = bitcast float* %116 to i8* - %call66 = call i32 @cudaFree(i8* %117) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z8PrintMatPfii(float* %ary, i32 %nrow, i32 %ncol) #2 { -entry: - %ary.addr = alloca float*, align 8 - %nrow.addr = alloca i32, align 4 - %ncol.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - store float* %ary, float** %ary.addr, align 8 - store i32 %nrow, i32* %nrow.addr, align 4 - store i32 %ncol, i32* %ncol.addr, align 4 - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z8PrintAryPfi(float* %ary, i32 %ary_size) #0 { -entry: - %ary.addr = alloca float*, align 8 - %ary_size.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store float* %ary, float** %ary.addr, align 8 - store i32 %ary_size, i32* %ary_size.addr, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %ary_size.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load float*, float** %ary.addr, align 8 - %3 = load i32, i32* %i, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds float, float* %2, i64 %idxprom - %4 = load float, float* %arrayidx, align 4 - %conv = fpext float %4 to double - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.59, i64 0, i64 0), double %conv) - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, i32* %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.60, i64 0, i64 0)) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z7BackSubv() #2 { -entry: - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %0 = load i32, i32* @Size, align 4 - %conv = sext i32 %0 to i64 - %mul = mul i64 %conv, 4 - %call = call noalias i8* @malloc(i64 %mul) #1 - %1 = bitcast i8* %call to float* - store float* %1, float** @finalVec, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc43, %entry - %2 = load i32, i32* %i, align 4 - %3 = load i32, i32* @Size, align 4 - %cmp = icmp slt i32 %2, %3 - br i1 %cmp, label %for.body, label %for.end45 - -for.body: ; preds = %for.cond - %4 = load float*, float** @b, align 8 - %5 = load i32, i32* @Size, align 4 - %6 = load i32, i32* %i, align 4 - %sub = sub nsw i32 %5, %6 - %sub1 = sub nsw i32 %sub, 1 - %idxprom = sext i32 %sub1 to i64 - %arrayidx = getelementptr inbounds float, float* %4, i64 %idxprom - %7 = load float, float* %arrayidx, align 4 - %8 = load float*, float** @finalVec, align 8 - %9 = load i32, i32* @Size, align 4 - %10 = load i32, i32* %i, align 4 - %sub2 = sub nsw i32 %9, %10 - %sub3 = sub nsw i32 %sub2, 1 - %idxprom4 = sext i32 %sub3 to i64 - %arrayidx5 = getelementptr inbounds float, float* %8, i64 %idxprom4 - store float %7, float* %arrayidx5, align 4 - store i32 0, i32* %j, align 4 - br label %for.cond6 - -for.cond6: ; preds = %for.inc, %for.body - %11 = load i32, i32* %j, align 4 - %12 = load i32, i32* %i, align 4 - %cmp7 = icmp slt i32 %11, %12 - br i1 %cmp7, label %for.body8, label %for.end - -for.body8: ; preds = %for.cond6 - %13 = load float*, float** @a, align 8 - %14 = load i32, i32* @Size, align 4 - %15 = load i32, i32* @Size, align 4 - %16 = load i32, i32* %i, align 4 - %sub9 = sub nsw i32 %15, %16 - %sub10 = sub nsw i32 %sub9, 1 - %mul11 = mul nsw i32 %14, %sub10 - %idx.ext = sext i32 %mul11 to i64 - %add.ptr = getelementptr inbounds float, float* %13, i64 %idx.ext - %17 = load i32, i32* @Size, align 4 - %18 = load i32, i32* %j, align 4 - %sub12 = sub nsw i32 %17, %18 - %sub13 = sub nsw i32 %sub12, 1 - %idx.ext14 = sext i32 %sub13 to i64 - %add.ptr15 = getelementptr inbounds float, float* %add.ptr, i64 %idx.ext14 - %19 = load float, float* %add.ptr15, align 4 - %20 = load float*, float** @finalVec, align 8 - %21 = load i32, i32* @Size, align 4 - %22 = load i32, i32* %j, align 4 - %sub16 = sub nsw i32 %21, %22 - %sub17 = sub nsw i32 %sub16, 1 - %idxprom18 = sext i32 %sub17 to i64 - %arrayidx19 = getelementptr inbounds float, float* %20, i64 %idxprom18 - %23 = load float, float* %arrayidx19, align 4 - %mul20 = fmul contract float %19, %23 - %24 = load float*, float** @finalVec, align 8 - %25 = load i32, i32* @Size, align 4 - %26 = load i32, i32* %i, align 4 - %sub21 = sub nsw i32 %25, %26 - %sub22 = sub nsw i32 %sub21, 1 - %idxprom23 = sext i32 %sub22 to i64 - %arrayidx24 = getelementptr inbounds float, float* %24, i64 %idxprom23 - %27 = load float, float* %arrayidx24, align 4 - %sub25 = fsub contract float %27, %mul20 - store float %sub25, float* %arrayidx24, align 4 - br label %for.inc - -for.inc: ; preds = %for.body8 - %28 = load i32, i32* %j, align 4 - %inc = add nsw i32 %28, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond6 - -for.end: ; preds = %for.cond6 - %29 = load float*, float** @finalVec, align 8 - %30 = load i32, i32* @Size, align 4 - %31 = load i32, i32* %i, align 4 - %sub26 = sub nsw i32 %30, %31 - %sub27 = sub nsw i32 %sub26, 1 - %idxprom28 = sext i32 %sub27 to i64 - %arrayidx29 = getelementptr inbounds float, float* %29, i64 %idxprom28 - %32 = load float, float* %arrayidx29, align 4 - %33 = load float*, float** @a, align 8 - %34 = load i32, i32* @Size, align 4 - %35 = load i32, i32* @Size, align 4 - %36 = load i32, i32* %i, align 4 - %sub30 = sub nsw i32 %35, %36 - %sub31 = sub nsw i32 %sub30, 1 - %mul32 = mul nsw i32 %34, %sub31 - %idx.ext33 = sext i32 %mul32 to i64 - %add.ptr34 = getelementptr inbounds float, float* %33, i64 %idx.ext33 - %37 = load i32, i32* @Size, align 4 - %38 = load i32, i32* %i, align 4 - %sub35 = sub nsw i32 %37, %38 - %sub36 = sub nsw i32 %sub35, 1 - %idx.ext37 = sext i32 %sub36 to i64 - %add.ptr38 = getelementptr inbounds float, float* %add.ptr34, i64 %idx.ext37 - %39 = load float, float* %add.ptr38, align 4 - %div = fdiv float %32, %39 - %40 = load float*, float** @finalVec, align 8 - %41 = load i32, i32* @Size, align 4 - %42 = load i32, i32* %i, align 4 - %sub39 = sub nsw i32 %41, %42 - %sub40 = sub nsw i32 %sub39, 1 - %idxprom41 = sext i32 %sub40 to i64 - %arrayidx42 = getelementptr inbounds float, float* %40, i64 %idxprom41 - store float %div, float* %arrayidx42, align 4 - br label %for.inc43 - -for.inc43: ; preds = %for.end - %43 = load i32, i32* %i, align 4 - %inc44 = add nsw i32 %43, 1 - store i32 %inc44, i32* %i, align 4 - br label %for.cond - -for.end45: ; preds = %for.cond - ret void -} - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #7 - -declare dso_local i32 @cudaGetDeviceCount(i32*) #4 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #8 - -declare dso_local i32 @cudaGetDeviceProperties(%struct.cudaDeviceProp*, i32) #4 - -declare dso_local i8* @cudaGetErrorString(i32) #4 - -declare dso_local i32 @cudaGetLastError() #4 - -declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #4 - -declare dso_local i32 @fscanf(%struct._IO_FILE*, i8*, ...) #4 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z7InitMatPfii(float* %ary, i32 %nrow, i32 %ncol) #0 { -entry: - %ary.addr = alloca float*, align 8 - %nrow.addr = alloca i32, align 4 - %ncol.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - store float* %ary, float** %ary.addr, align 8 - store i32 %nrow, i32* %nrow.addr, align 4 - store i32 %ncol, i32* %ncol.addr, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc6, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %nrow.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end8 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond1 - -for.cond1: ; preds = %for.inc, %for.body - %2 = load i32, i32* %j, align 4 - %3 = load i32, i32* %ncol.addr, align 4 - %cmp2 = icmp slt i32 %2, %3 - br i1 %cmp2, label %for.body3, label %for.end - -for.body3: ; preds = %for.cond1 - %4 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %5 = load float*, float** %ary.addr, align 8 - %6 = load i32, i32* @Size, align 4 - %7 = load i32, i32* %i, align 4 - %mul = mul nsw i32 %6, %7 - %idx.ext = sext i32 %mul to i64 - %add.ptr = getelementptr inbounds float, float* %5, i64 %idx.ext - %8 = load i32, i32* %j, align 4 - %idx.ext4 = sext i32 %8 to i64 - %add.ptr5 = getelementptr inbounds float, float* %add.ptr, i64 %idx.ext4 - %call = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.58, i64 0, i64 0), float* %add.ptr5) - br label %for.inc - -for.inc: ; preds = %for.body3 - %9 = load i32, i32* %j, align 4 - %inc = add nsw i32 %9, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond1 - -for.end: ; preds = %for.cond1 - br label %for.inc6 - -for.inc6: ; preds = %for.end - %10 = load i32, i32* %i, align 4 - %inc7 = add nsw i32 %10, 1 - store i32 %inc7, i32* %i, align 4 - br label %for.cond - -for.end8: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z7InitAryPfi(float* %ary, i32 %ary_size) #0 { -entry: - %ary.addr = alloca float*, align 8 - %ary_size.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store float* %ary, float** %ary.addr, align 8 - store i32 %ary_size, i32* %ary_size.addr, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %ary_size.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load %struct._IO_FILE*, %struct._IO_FILE** @fp, align 8 - %3 = load float*, float** %ary.addr, align 8 - %4 = load i32, i32* %i, align 4 - %idxprom = sext i32 %4 to i64 - %arrayidx = getelementptr inbounds float, float* %3, i64 %idxprom - %call = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.58, i64 0, i64 0), float* %arrayidx) - br label %for.inc - -for.inc: ; preds = %for.body - %5 = load i32, i32* %i, align 4 - %inc = add nsw i32 %5, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z4Fan1PfS_ii(float* %m_cuda, float* %a_cuda, i32 %Size, i32 %t) #0 { -entry: - %m_cuda.addr = alloca float*, align 8 - %a_cuda.addr = alloca float*, align 8 - %Size.addr = alloca i32, align 4 - %t.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store float* %m_cuda, float** %m_cuda.addr, align 8 - store float* %a_cuda, float** %a_cuda.addr, align 8 - store i32 %Size, i32* %Size.addr, align 4 - store i32 %t, i32* %t.addr, align 4 - %kernel_args = alloca i8*, i64 4, align 16 - %0 = bitcast float** %m_cuda.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast float** %a_cuda.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32* %Size.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32* %t.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %9 = load i64, i64* %shmem_size, align 8 - %10 = load i8*, i8** %stream, align 8 - %11 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %12 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %11, i8* align 8 %12, i64 12, i1 false) - %13 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %14 = load i64, i64* %13, align 8 - %15 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %16 = load i32, i32* %15, align 8 - %17 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %18 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %17, i8* align 8 %18, i64 12, i1 false) - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %20 = load i64, i64* %19, align 8 - %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %22 = load i32, i32* %21, align 8 - %23 = bitcast i8* %10 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, float*, i32, i32)* @_Z4Fan1PfS_ii to i8*), i64 %14, i32 %16, i64 %20, i32 %22, i8** %kernel_args, i64 %9, %struct.CUstream_st* %23) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #8 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z4Fan2PfS_S_iii(float* %m_cuda, float* %a_cuda, float* %b_cuda, i32 %Size, i32 %j1, i32 %t) #0 { -entry: - %m_cuda.addr = alloca float*, align 8 - %a_cuda.addr = alloca float*, align 8 - %b_cuda.addr = alloca float*, align 8 - %Size.addr = alloca i32, align 4 - %j1.addr = alloca i32, align 4 - %t.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store float* %m_cuda, float** %m_cuda.addr, align 8 - store float* %a_cuda, float** %a_cuda.addr, align 8 - store float* %b_cuda, float** %b_cuda.addr, align 8 - store i32 %Size, i32* %Size.addr, align 4 - store i32 %j1, i32* %j1.addr, align 4 - store i32 %t, i32* %t.addr, align 4 - %kernel_args = alloca i8*, i64 6, align 16 - %0 = bitcast float** %m_cuda.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast float** %a_cuda.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast float** %b_cuda.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32* %Size.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %j1.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32* %t.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %13 = load i64, i64* %shmem_size, align 8 - %14 = load i8*, i8** %stream, align 8 - %15 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %16 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %22 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false) - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %24 = load i64, i64* %23, align 8 - %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %26 = load i32, i32* %25, align 8 - %27 = bitcast i8* %14 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, float*, float*, i32, i32, i32)* @_Z4Fan2PfS_S_iii to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @cudaMalloc(i8**, i64) #4 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #4 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #2 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #4 - -declare dso_local i32 @cudaDeviceSynchronize() #4 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z14checkCUDAErrorPKc(i8* %msg) #0 { -entry: - %msg.addr = alloca i8*, align 8 - %err = alloca i32, align 4 - store i8* %msg, i8** %msg.addr, align 8 - %call = call i32 @cudaGetLastError() - store i32 %call, i32* %err, align 4 - %0 = load i32, i32* %err, align 4 - %cmp = icmp ne i32 0, %0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %2 = load i8*, i8** %msg.addr, align 8 - %3 = load i32, i32* %err, align 4 - %call1 = call i8* @cudaGetErrorString(i32 %3) - %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.61, i64 0, i64 0), i8* %2, i8* %call1) - call void @exit(i32 1) #9 - unreachable - -if.end: ; preds = %entry - ret void -} - -declare dso_local i32 @cudaFree(i8*) #4 - -declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #4 - -; Function Attrs: nounwind -declare dso_local float @expf(float) #7 - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, float*, i32, i32)* @_Z4Fan1PfS_ii to i8*), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([14 x i8], [14 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %2 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, float*, float*, i32, i32, i32)* @_Z4Fan2PfS_S_iii to i8*), i8* getelementptr inbounds ([17 x i8], [17 x i8]* @1, i64 0, i64 0), i8* getelementptr inbounds ([17 x i8], [17 x i8]* @1, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind } -attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { argmemonly nounwind willreturn } -attributes #9 = { noreturn nounwind } -attributes #10 = { nounwind readonly } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/gauss/gaussian.cu b/examples/gauss/gaussian.cu deleted file mode 100644 index 637d900..0000000 --- a/examples/gauss/gaussian.cu +++ /dev/null @@ -1,522 +0,0 @@ -/*----------------------------------------------------------- - ** gaussian.cu -- The program is to solve a linear system Ax = b - ** by using Gaussian Elimination. The algorithm on page 101 - ** ("Foundations of Parallel Programming") is used. - ** The sequential version is gaussian.c. This parallel - ** implementation converts three independent for() loops - ** into three Fans. Use the data file ge_3.dat to verify - ** the correction of the output. - ** - ** Written by Andreas Kura, 02/15/95 - ** Modified by Chong-wei Xu, 04/20/95 - ** Modified by Chris Gregg for CUDA, 07/20/2009 - **----------------------------------------------------------- - */ -#include "cuda_runtime.h" -#include -#include -#include -#include -#include - -#ifdef TIMING -#include "timing.h" -#endif - -#ifdef RD_WG_SIZE_0_0 -#define MAXBLOCKSIZE RD_WG_SIZE_0_0 -#elif defined(RD_WG_SIZE_0) -#define MAXBLOCKSIZE RD_WG_SIZE_0 -#elif defined(RD_WG_SIZE) -#define MAXBLOCKSIZE RD_WG_SIZE -#else -#define MAXBLOCKSIZE 512 -#endif - -// 2D defines. Go from specific to general -#ifdef RD_WG_SIZE_1_0 -#define BLOCK_SIZE_XY RD_WG_SIZE_1_0 -#elif defined(RD_WG_SIZE_1) -#define BLOCK_SIZE_XY RD_WG_SIZE_1 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE_XY RD_WG_SIZE -#else -#define BLOCK_SIZE_XY 1 -#endif - -#ifdef TIMING -struct timeval tv; -struct timeval tv_total_start, tv_total_end; -struct timeval tv_h2d_start, tv_h2d_end; -struct timeval tv_d2h_start, tv_d2h_end; -struct timeval tv_kernel_start, tv_kernel_end; -struct timeval tv_mem_alloc_start, tv_mem_alloc_end; -struct timeval tv_close_start, tv_close_end; -float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, - d2h_time = 0, close_time = 0, total_time = 0; -#endif - -int Size; -float *a, *b, *finalVec; -float *m; - -FILE *fp; - -void InitProblemOnce(char *filename); -void InitPerRun(); -void ForwardSub(); -void BackSub(); -__global__ void Fan1(float *m, float *a, int Size, int t); -__global__ void Fan2(float *m, float *a, float *b, int Size, int j1, int t); -void InitMat(float *ary, int nrow, int ncol); -void InitAry(float *ary, int ary_size); -void PrintMat(float *ary, int nrow, int ncolumn); -void PrintAry(float *ary, int ary_size); -void PrintDeviceProperties(); -void checkCUDAError(const char *msg); - -unsigned int totalKernelTime = 0; - -// create both matrix and right hand side, Ke Wang 2013/08/12 11:51:06 -void create_matrix(float *m, int size) { - int i, j; - float lamda = -0.01; - float coe[2 * size - 1]; - float coe_i = 0.0; - - for (i = 0; i < size; i++) { - coe_i = 10 * exp(lamda * i); - j = size - 1 + i; - coe[j] = coe_i; - j = size - 1 - i; - coe[j] = coe_i; - } - - for (i = 0; i < size; i++) { - for (j = 0; j < size; j++) { - m[i * size + j] = coe[size - 1 - i + j]; - } - } -} - -int main(int argc, char *argv[]) { - printf("WG size of kernel 1 = %d, WG size of kernel 2= %d X %d\n", - MAXBLOCKSIZE, BLOCK_SIZE_XY, BLOCK_SIZE_XY); - int verbose = 1; - int i, j; - char flag; - if (argc < 2) { - printf("Usage: gaussian -f filename / -s size [-q]\n\n"); - printf("-q (quiet) suppresses printing the matrix and result values.\n"); - printf("-f (filename) path of input file\n"); - printf( - "-s (size) size of matrix. Create matrix and rhs in this program \n"); - printf( - "The first line of the file contains the dimension of the matrix, n."); - printf("The second line of the file is a newline.\n"); - printf("The next n lines contain n tab separated values for the matrix."); - printf("The next line of the file is a newline.\n"); - printf("The next line of the file is a 1xn vector with tab separated " - "values.\n"); - printf("The next line of the file is a newline. (optional)\n"); - printf("The final line of the file is the pre-computed solution. " - "(optional)\n"); - printf("Example: matrix4.txt:\n"); - printf("4\n"); - printf("\n"); - printf("-0.6 -0.5 0.7 0.3\n"); - printf("-0.3 -0.9 0.3 0.7\n"); - printf("-0.4 -0.5 -0.3 -0.8\n"); - printf("0.0 -0.1 0.2 0.9\n"); - printf("\n"); - printf("-0.85 -0.68 0.24 -0.53\n"); - printf("\n"); - printf("0.7 0.0 -0.4 -0.5\n"); - exit(0); - } - - cudaSetDevice(0); - - PrintDeviceProperties(); - // char filename[100]; - // sprintf(filename,"matrices/matrix%d.txt",size); - - for (i = 1; i < argc; i++) { - if (argv[i][0] == '-') { // flag - flag = argv[i][1]; - switch (flag) { - case 's': // platform - i++; - Size = atoi(argv[i]); - printf("Create matrix internally in parse, size = %d \n", Size); - - a = (float *)malloc(Size * Size * sizeof(float)); - create_matrix(a, Size); - - b = (float *)malloc(Size * sizeof(float)); - for (j = 0; j < Size; j++) - b[j] = 1.0; - - m = (float *)malloc(Size * Size * sizeof(float)); - break; - case 'f': // platform - i++; - printf("Read file from %s \n", argv[i]); - InitProblemOnce(argv[i]); - break; - case 'q': // quiet - verbose = 1; - break; - } - } - } - - // InitProblemOnce(filename); - - InitPerRun(); - // begin timing - struct timeval time_start; - gettimeofday(&time_start, NULL); - - // run kernels - ForwardSub(); - - // end timing - struct timeval time_end; - gettimeofday(&time_end, NULL); - unsigned int time_total = (time_end.tv_sec * 1000000 + time_end.tv_usec) - - (time_start.tv_sec * 1000000 + time_start.tv_usec); - - if (verbose) { - printf("Matrix m is: \n"); - PrintMat(m, Size, Size); - - printf("Matrix a is: \n"); - PrintMat(a, Size, Size); - - printf("Array b is: \n"); - PrintAry(b, Size); - } - BackSub(); - if (verbose) { - printf("The final solution is: \n"); - PrintAry(finalVec, Size); - } - printf("\nTime total (including memory transfers)\t%f sec\n", - time_total * 1e-6); - printf("Time for CUDA kernels:\t%f sec\n", totalKernelTime * 1e-6); - - /*printf("%d,%d\n",size,time_total); - fprintf(stderr,"%d,%d\n",size,time_total);*/ - - free(m); - free(a); - free(b); - -#ifdef TIMING - printf("Exec: %f\n", kernel_time); -#endif -} -/*------------------------------------------------------ - ** PrintDeviceProperties - **----------------------------------------------------- - */ -void PrintDeviceProperties() { - cudaDeviceProp deviceProp; - int nDevCount = 0; - - cudaGetDeviceCount(&nDevCount); - printf("Total Device found: %d", nDevCount); - for (int nDeviceIdx = 0; nDeviceIdx < nDevCount; ++nDeviceIdx) { - memset(&deviceProp, 0, sizeof(deviceProp)); - if (cudaSuccess == cudaGetDeviceProperties(&deviceProp, nDeviceIdx)) { - printf("\nDevice Name \t\t - %s ", deviceProp.name); - printf("\n**************************************"); - printf("\nTotal Global Memory\t\t\t - %lu KB", - deviceProp.totalGlobalMem / 1024); - printf("\nShared memory available per block \t - %lu KB", - deviceProp.sharedMemPerBlock / 1024); - printf("\nNumber of registers per thread block \t - %d", - deviceProp.regsPerBlock); - printf("\nWarp size in threads \t\t\t - %d", deviceProp.warpSize); - printf("\nMemory Pitch \t\t\t\t - %zu bytes", deviceProp.memPitch); - printf("\nMaximum threads per block \t\t - %d", - deviceProp.maxThreadsPerBlock); - printf("\nMaximum Thread Dimension (block) \t - %d %d %d", - deviceProp.maxThreadsDim[0], deviceProp.maxThreadsDim[1], - deviceProp.maxThreadsDim[2]); - printf("\nMaximum Thread Dimension (grid) \t - %d %d %d", - deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], - deviceProp.maxGridSize[2]); - printf("\nTotal constant memory \t\t\t - %zu bytes", - deviceProp.totalConstMem); - printf("\nCUDA ver \t\t\t\t - %d.%d", deviceProp.major, deviceProp.minor); - printf("\nClock rate \t\t\t\t - %d KHz", deviceProp.clockRate); - printf("\nTexture Alignment \t\t\t - %zu bytes", - deviceProp.textureAlignment); - printf("\nDevice Overlap \t\t\t\t - %s", - deviceProp.deviceOverlap ? "Allowed" : "Not Allowed"); - printf("\nNumber of Multi processors \t\t - %d\n\n", - deviceProp.multiProcessorCount); - } else - printf("\n%s", cudaGetErrorString(cudaGetLastError())); - } -} - -/*------------------------------------------------------ - ** InitProblemOnce -- Initialize all of matrices and - ** vectors by opening a data file specified by the user. - ** - ** We used dynamic array *a, *b, and *m to allocate - ** the memory storages. - **------------------------------------------------------ - */ -void InitProblemOnce(char *filename) { - // char *filename = argv[1]; - - // printf("Enter the data file name: "); - // scanf("%s", filename); - printf("The file name is: %s\n", filename); - - fp = fopen(filename, "r"); - - fscanf(fp, "%d", &Size); - - a = (float *)malloc(Size * Size * sizeof(float)); - - InitMat(a, Size, Size); - printf("The input matrix a is:\n"); - PrintMat(a, Size, Size); - b = (float *)malloc(Size * sizeof(float)); - - InitAry(b, Size); - printf("The input array b is:\n"); - PrintAry(b, Size); - - m = (float *)malloc(Size * Size * sizeof(float)); -} - -/*------------------------------------------------------ - ** InitPerRun() -- Initialize the contents of the - ** multipier matrix **m - **------------------------------------------------------ - */ -void InitPerRun() { - int i; - for (i = 0; i < Size * Size; i++) - *(m + i) = 0.0; -} - -/*------------------------------------------------------- - ** Fan1() -- Calculate multiplier matrix - ** Pay attention to the index. Index i give the range - ** which starts from 0 to range-1. The real values of - ** the index should be adjust and related with the value - ** of t which is defined on the ForwardSub(). - **------------------------------------------------------- - */ -__global__ void Fan1(float *m_cuda, float *a_cuda, int Size, int t) { - // if(threadIdx.x + blockIdx.x * blockDim.x >= Size-1-t) { - // printf("blockIDx.x: %d, threadIdx.x: %d, Size: %d, t:%d, - // Size-1-t: %d\n",blockIdx.x,threadIdx.x,Size,t,Size-1-t); - // } - - if (threadIdx.x + blockIdx.x * blockDim.x >= Size - 1 - t) - return; - *(m_cuda + Size * (blockDim.x * blockIdx.x + threadIdx.x + t + 1) + t) = - *(a_cuda + Size * (blockDim.x * blockIdx.x + threadIdx.x + t + 1) + t) / - *(a_cuda + Size * t + t); -} - -/*------------------------------------------------------- - ** Fan2() -- Modify the matrix A into LUD - **------------------------------------------------------- - */ - -__global__ void Fan2(float *m_cuda, float *a_cuda, float *b_cuda, int Size, - int j1, int t) { - if (threadIdx.x + blockIdx.x * blockDim.x >= Size - 1 - t) - return; - if (threadIdx.y + blockIdx.y * blockDim.y >= Size - t) - return; - - int xidx = blockIdx.x * blockDim.x + threadIdx.x; - int yidx = blockIdx.y * blockDim.y + threadIdx.y; - // printf("blockIdx.x: %d, threadIdx.x: %d, blockIdx.y: %d, threadIdx.y: %d, - // blockDim.x: %d, blockDim.y: - // %d\n",blockIdx.x,threadIdx.x,blockIdx.y,threadIdx.y,blockDim.x,blockDim.y); - - a_cuda[Size * (xidx + 1 + t) + (yidx + t)] -= - m_cuda[Size * (xidx + 1 + t) + t] * a_cuda[Size * t + (yidx + t)]; - // a_cuda[xidx+1+t][yidx+t] -= m_cuda[xidx+1+t][t] * a_cuda[t][yidx+t]; - if (yidx == 0) { - // printf("blockIdx.x:%d,threadIdx.x:%d,blockIdx.y:%d,threadIdx.y:%d,blockDim.x:%d,blockDim.y:%d\n",blockIdx.x,threadIdx.x,blockIdx.y,threadIdx.y,blockDim.x,blockDim.y); - // printf("xidx:%d,yidx:%d\n",xidx,yidx); - b_cuda[xidx + 1 + t] -= - m_cuda[Size * (xidx + 1 + t) + (yidx + t)] * b_cuda[t]; - } -} - -/*------------------------------------------------------ - ** ForwardSub() -- Forward substitution of Gaussian - ** elimination. - **------------------------------------------------------ - */ -void ForwardSub() { - int t; - float *m_cuda, *a_cuda, *b_cuda; - - int A = 1; - int B = 2; - int C = 3; - int D = 4; - int E = 5; - int F = 6; - // printf("blockIDx.x: %d, threadIdx.x: %d, Size: %d, t: %d, Size-1-t: %d\n", - // A, B, C, D, E); printf("blockIdx.x: %d, threadIdx.x: %d, blockIdx.y: %d, - // threadIdx.y: %d, blockDim.x: %d, blockDim.y: %d\n", A , B, C, D, E, F); - - // allocate memory on GPU - cudaMalloc((void **)&m_cuda, Size * Size * sizeof(float)); - - cudaMalloc((void **)&a_cuda, Size * Size * sizeof(float)); - - cudaMalloc((void **)&b_cuda, Size * sizeof(float)); - - // copy memory to GPU - cudaMemcpy(m_cuda, m, Size * Size * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(a_cuda, a, Size * Size * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(b_cuda, b, Size * sizeof(float), cudaMemcpyHostToDevice); - - int block_size, grid_size; - - block_size = MAXBLOCKSIZE; - grid_size = (Size / block_size) + (!(Size % block_size) ? 0 : 1); - printf("1d grid size: %d\n", grid_size); - - dim3 dimBlock(block_size); - dim3 dimGrid(grid_size); - // dim3 dimGrid( (N/dimBlock.x) + (!(N%dimBlock.x)?0:1) ); - - int blockSize2d, gridSize2d; - blockSize2d = BLOCK_SIZE_XY; - gridSize2d = (Size / blockSize2d) + (!(Size % blockSize2d ? 0 : 1)); - - dim3 dimBlockXY(blockSize2d, blockSize2d); - - printf("BlockXY: %d \n", blockSize2d); - dim3 dimGridXY(gridSize2d, gridSize2d); - -#ifdef TIMING - gettimeofday(&tv_kernel_start, NULL); -#endif - printf("first grid size: %d second: %d\n", grid_size, gridSize2d); - // begin timing kernels - struct timeval time_start; - gettimeofday(&time_start, NULL); - for (t = 0; t < (Size - 1); t++) { - Fan1<<>>(m_cuda, a_cuda, Size, t); - cudaDeviceSynchronize(); - Fan2<<>>(m_cuda, a_cuda, b_cuda, Size, Size - t, t); - cudaDeviceSynchronize(); - checkCUDAError("Fan2"); - } - // end timing kernels - struct timeval time_end; - gettimeofday(&time_end, NULL); - totalKernelTime = (time_end.tv_sec * 1000000 + time_end.tv_usec) - - (time_start.tv_sec * 1000000 + time_start.tv_usec); - -#ifdef TIMING - tvsub(&time_end, &tv_kernel_start, &tv); - kernel_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; -#endif - - // copy memory back to CPU - cudaMemcpy(m, m_cuda, Size * Size * sizeof(float), cudaMemcpyDeviceToHost); - cudaMemcpy(a, a_cuda, Size * Size * sizeof(float), cudaMemcpyDeviceToHost); - cudaMemcpy(b, b_cuda, Size * sizeof(float), cudaMemcpyDeviceToHost); - cudaFree(m_cuda); - cudaFree(a_cuda); - cudaFree(b_cuda); -} - -/*------------------------------------------------------ - ** BackSub() -- Backward substitution - **------------------------------------------------------ - */ - -void BackSub() { - // create a new vector to hold the final answer - finalVec = (float *)malloc(Size * sizeof(float)); - // solve "bottom up" - int i, j; - for (i = 0; i < Size; i++) { - finalVec[Size - i - 1] = b[Size - i - 1]; - for (j = 0; j < i; j++) { - finalVec[Size - i - 1] -= *(a + Size * (Size - i - 1) + (Size - j - 1)) * - finalVec[Size - j - 1]; - } - finalVec[Size - i - 1] = - finalVec[Size - i - 1] / *(a + Size * (Size - i - 1) + (Size - i - 1)); - } -} - -void InitMat(float *ary, int nrow, int ncol) { - int i, j; - - for (i = 0; i < nrow; i++) { - for (j = 0; j < ncol; j++) { - fscanf(fp, "%f", ary + Size * i + j); - } - } -} - -/*------------------------------------------------------ - ** PrintMat() -- Print the contents of the matrix - **------------------------------------------------------ - */ -void PrintMat(float *ary, int nrow, int ncol) { - return; - int i, j; - - for (i = 0; i < nrow; i++) { - for (j = 0; j < ncol; j++) { - printf("%8.2f ", *(ary + Size * i + j)); - } - printf("\n"); - } - printf("\n"); -} - -/*------------------------------------------------------ - ** InitAry() -- Initialize the array (vector) by reading - ** data from the data file - **------------------------------------------------------ - */ -void InitAry(float *ary, int ary_size) { - int i; - - for (i = 0; i < ary_size; i++) { - fscanf(fp, "%f", &ary[i]); - } -} - -/*------------------------------------------------------ - ** PrintAry() -- Print the contents of the array (vector) - **------------------------------------------------------ - */ -void PrintAry(float *ary, int ary_size) { - int i; - for (i = 0; i < ary_size; i++) { - printf("%.2f ", ary[i]); - } - printf("\n\n"); -} -void checkCUDAError(const char *msg) { - cudaError_t err = cudaGetLastError(); - if (cudaSuccess != err) { - fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString(err)); - exit(EXIT_FAILURE); - } -} diff --git a/examples/gauss/run.sh b/examples/gauss/run.sh deleted file mode 100755 index e689c70..0000000 --- a/examples/gauss/run.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -set -e -llvm-as gaussian-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as gaussian-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator gaussian-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator gaussian-host-x86_64-unknown-linux-gnu.bc host.bc - -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - -g++ -Wall -L../../build/runtime \ - -L../../build/runtime/threadPool \ - -o gaussian -fPIC -no-pie host.o kernel.o -lc -lx86Runtime -lthreadPool -lpthread - -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -./gaussian -f ../../rodinia-data/gaussian/matrix4.txt >> res.log - -if grep -q "0.70 0.00 -0.40 -0.50" res.log; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/heartwall/AVI/avilib.c b/examples/heartwall/AVI/avilib.c deleted file mode 100644 index 11f4008..0000000 --- a/examples/heartwall/AVI/avilib.c +++ /dev/null @@ -1,1829 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -/* - * avilib.c - * - * Copyright (C) Thomas Östreich - June 2001 - * multiple audio track support Copyright (C) 2002 Thomas Östreich - * - * Original code: - * Copyright (C) 1999 Rainer Johanni - * - * This file is part of transcode, a linux video stream processing tool - * - * transcode is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * transcode is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include "avilib.h" -//#include - -#define INFO_LIST - -/* The following variable indicates the kind of error */ - -long AVI_errno; - -#define MAX_INFO_STRLEN 64 -static char id_str[MAX_INFO_STRLEN]; - -#define FRAME_RATE_SCALE 1000000 - -#ifndef PACKAGE -#define PACKAGE "my" -#define VERSION "0.00" -#endif - -#ifndef O_BINARY -/* win32 wants a binary flag to open(); this sets it to null - on platforms that don't have it. */ -#define O_BINARY 0 -#endif - -/******************************************************************* - * * - * Utilities for writing an AVI File * - * * - *******************************************************************/ - -static size_t avi_read(int fd, char *buf, size_t len) { - size_t n = 0; - size_t r = 0; - - while (r < len) { - n = read(fd, buf + r, len - r); - - if (n <= 0) - return r; - r += n; - } - - return r; -} - -static size_t avi_write(int fd, char *buf, size_t len) { - size_t n = 0; - size_t r = 0; - - while (r < len) { - n = write(fd, buf + r, len - r); - if (n < 0) - return n; - - r += n; - } - return r; -} - -/* HEADERBYTES: The number of bytes to reserve for the header */ - -#define HEADERBYTES 2048 - -/* AVI_MAX_LEN: The maximum length of an AVI file, we stay a bit below - the 2GB limit (Remember: 2*10^9 is smaller than 2 GB) */ - -#define AVI_MAX_LEN (UINT_MAX - (1 << 20) * 16 - HEADERBYTES) - -#define PAD_EVEN(x) (((x) + 1) & ~1) - -/* Copy n into dst as a 4 byte, little endian number. - Should also work on big endian machines */ - -static void long2str(unsigned char *dst, int n) { - dst[0] = (n)&0xff; - dst[1] = (n >> 8) & 0xff; - dst[2] = (n >> 16) & 0xff; - dst[3] = (n >> 24) & 0xff; -} - -/* Convert a string of 4 or 2 bytes to a number, - also working on big endian machines */ - -static unsigned long str2ulong(unsigned char *str) { - return (str[0] | (str[1] << 8) | (str[2] << 16) | (str[3] << 24)); -} -static unsigned long str2ushort(unsigned char *str) { - return (str[0] | (str[1] << 8)); -} - -/* Calculate audio sample size from number of bits and number of channels. - This may have to be adjusted for eg. 12 bits and stereo */ - -static int avi_sampsize(avi_t *AVI, int j) { - int s; - s = ((AVI->track[j].a_bits + 7) / 8) * AVI->track[j].a_chans; - // if(s==0) s=1; /* avoid possible zero divisions */ - if (s < 4) - s = 4; /* avoid possible zero divisions */ - return s; -} - -/* Add a chunk (=tag and data) to the AVI file, - returns -1 on write error, 0 on success */ - -static int avi_add_chunk(avi_t *AVI, unsigned char *tag, unsigned char *data, - int length) { - unsigned char c[8]; - - /* Copy tag and length int c, so that we need only 1 write system call - for these two values */ - - memcpy(c, tag, 4); - long2str(c + 4, length); - - /* Output tag, length and data, restore previous position - if the write fails */ - - length = PAD_EVEN(length); - - if (avi_write(AVI->fdes, (char *)c, 8) != 8 || - avi_write(AVI->fdes, (char *)data, length) != length) { - lseek(AVI->fdes, AVI->pos, SEEK_SET); - AVI_errno = AVI_ERR_WRITE; - return -1; - } - - /* Update file position */ - - AVI->pos += 8 + length; - - // fprintf(stderr, "pos=%lu %s\n", AVI->pos, tag); - - return 0; -} - -static int avi_add_index_entry(avi_t *AVI, unsigned char *tag, long flags, - unsigned long pos, unsigned long len) { - void *ptr; - - if (AVI->n_idx >= AVI->max_idx) { - ptr = realloc((void *)AVI->idx, (AVI->max_idx + 4096) * 16); - - if (ptr == 0) { - AVI_errno = AVI_ERR_NO_MEM; - return -1; - } - AVI->max_idx += 4096; - AVI->idx = (unsigned char((*)[16]))ptr; - } - - /* Add index entry */ - - // fprintf(stderr, "INDEX %s %ld %lu %lu\n", tag, flags, pos, len); - - memcpy(AVI->idx[AVI->n_idx], tag, 4); - long2str(AVI->idx[AVI->n_idx] + 4, flags); - long2str(AVI->idx[AVI->n_idx] + 8, pos); - long2str(AVI->idx[AVI->n_idx] + 12, len); - - /* Update counter */ - - AVI->n_idx++; - - if (len > AVI->max_len) - AVI->max_len = len; - - return 0; -} - -/* - AVI_open_output_file: Open an AVI File and write a bunch - of zero bytes as space for the header. - - returns a pointer to avi_t on success, a zero pointer on error -*/ - -avi_t *AVI_open_output_file(char *filename) { - avi_t *AVI; - int i; - - int mask = 0; - - unsigned char AVI_header[HEADERBYTES]; - - /* Allocate the avi_t struct and zero it */ - - AVI = (avi_t *)malloc(sizeof(avi_t)); - if (AVI == 0) { - AVI_errno = AVI_ERR_NO_MEM; - return 0; - } - memset((void *)AVI, 0, sizeof(avi_t)); - - /* Since Linux needs a long time when deleting big files, - we do not truncate the file when we open it. - Instead it is truncated when the AVI file is closed */ - - /* mask = umask (0); - umask (mask);*/ - - AVI->fdes = open(filename, O_RDWR | O_CREAT | O_BINARY, 0644 & ~mask); - if (AVI->fdes < 0) { - AVI_errno = AVI_ERR_OPEN; - free(AVI); - return 0; - } - - /* Write out HEADERBYTES bytes, the header will go here - when we are finished with writing */ - - for (i = 0; i < HEADERBYTES; i++) - AVI_header[i] = 0; - i = avi_write(AVI->fdes, (char *)AVI_header, HEADERBYTES); - if (i != HEADERBYTES) { - close(AVI->fdes); - AVI_errno = AVI_ERR_WRITE; - free(AVI); - return 0; - } - - AVI->pos = HEADERBYTES; - AVI->mode = AVI_MODE_WRITE; /* open for writing */ - - // init - AVI->anum = 0; - AVI->aptr = 0; - - return AVI; -} - -void AVI_set_video(avi_t *AVI, int width, int height, double fps, - char *compressor) { - /* may only be called if file is open for writing */ - - if (AVI->mode == AVI_MODE_READ) - return; - - AVI->width = width; - AVI->height = height; - AVI->fps = fps; - - if (strncmp(compressor, "RGB", 3) == 0) { - memset(AVI->compressor, 0, 4); - } else { - memcpy(AVI->compressor, compressor, 4); - } - - AVI->compressor[4] = 0; - - avi_update_header(AVI); -} - -void AVI_set_audio(avi_t *AVI, int channels, long rate, int bits, int format, - long mp3rate) { - /* may only be called if file is open for writing */ - - if (AVI->mode == AVI_MODE_READ) - return; - - // inc audio tracks - AVI->aptr = AVI->anum; - ++AVI->anum; - - if (AVI->anum > AVI_MAX_TRACKS) { - fprintf(stderr, "error - only %d audio tracks supported\n", AVI_MAX_TRACKS); - exit(1); - } - - AVI->track[AVI->aptr].a_chans = channels; - AVI->track[AVI->aptr].a_rate = rate; - AVI->track[AVI->aptr].a_bits = bits; - AVI->track[AVI->aptr].a_fmt = format; - AVI->track[AVI->aptr].mp3rate = mp3rate; - - avi_update_header(AVI); -} - -#define OUT4CC(s) \ - if (nhb <= HEADERBYTES - 4) \ - memcpy(AVI_header + nhb, s, 4); \ - nhb += 4 - -#define OUTLONG(n) \ - if (nhb <= HEADERBYTES - 4) \ - long2str(AVI_header + nhb, n); \ - nhb += 4 - -#define OUTSHRT(n) \ - if (nhb <= HEADERBYTES - 2) { \ - AVI_header[nhb] = (n)&0xff; \ - AVI_header[nhb + 1] = (n >> 8) & 0xff; \ - } \ - nhb += 2 - -// ThOe write preliminary AVI file header: 0 frames, max vid/aud size -int avi_update_header(avi_t *AVI) { - int njunk, sampsize, hasIndex, ms_per_frame, frate, flag; - int movi_len, hdrl_start, strl_start, j; - unsigned char AVI_header[HEADERBYTES]; - long nhb; - - // assume max size - movi_len = AVI_MAX_LEN - HEADERBYTES + 4; - - // assume index will be written - hasIndex = 1; - - if (AVI->fps < 0.001) { - frate = 0; - ms_per_frame = 0; - } else { - frate = (int)(FRAME_RATE_SCALE * AVI->fps + 0.5); - ms_per_frame = (int)(1000000 / AVI->fps + 0.5); - } - - /* Prepare the file header */ - - nhb = 0; - - /* The RIFF header */ - - OUT4CC("RIFF"); - OUTLONG(movi_len); // assume max size - OUT4CC("AVI "); - - /* Start the header list */ - - OUT4CC("LIST"); - OUTLONG(0); /* Length of list in bytes, don't know yet */ - hdrl_start = nhb; /* Store start position */ - OUT4CC("hdrl"); - - /* The main AVI header */ - - /* The Flags in AVI File header */ - -#define AVIF_HASINDEX 0x00000010 /* Index at end of file */ -#define AVIF_MUSTUSEINDEX 0x00000020 -#define AVIF_ISINTERLEAVED 0x00000100 -#define AVIF_TRUSTCKTYPE 0x00000800 /* Use CKType to find key frames */ -#define AVIF_WASCAPTUREFILE 0x00010000 -#define AVIF_COPYRIGHTED 0x00020000 - - OUT4CC("avih"); - OUTLONG(56); /* # of bytes to follow */ - OUTLONG(ms_per_frame); /* Microseconds per frame */ - // ThOe ->0 - // OUTLONG(10000000); /* MaxBytesPerSec, I hope this will never - // be used */ - OUTLONG(0); - OUTLONG(0); /* PaddingGranularity (whatever that might be) */ - /* Other sources call it 'reserved' */ - flag = AVIF_ISINTERLEAVED; - if (hasIndex) - flag |= AVIF_HASINDEX; - if (hasIndex && AVI->must_use_index) - flag |= AVIF_MUSTUSEINDEX; - OUTLONG(flag); /* Flags */ - OUTLONG(0); // no frames yet - OUTLONG(0); /* InitialFrames */ - - OUTLONG(AVI->anum + 1); - - OUTLONG(0); /* SuggestedBufferSize */ - OUTLONG(AVI->width); /* Width */ - OUTLONG(AVI->height); /* Height */ - /* MS calls the following 'reserved': */ - OUTLONG(0); /* TimeScale: Unit used to measure time */ - OUTLONG(0); /* DataRate: Data rate of playback */ - OUTLONG(0); /* StartTime: Starting time of AVI data */ - OUTLONG(0); /* DataLength: Size of AVI data chunk */ - - /* Start the video stream list ---------------------------------- */ - - OUT4CC("LIST"); - OUTLONG(0); /* Length of list in bytes, don't know yet */ - strl_start = nhb; /* Store start position */ - OUT4CC("strl"); - - /* The video stream header */ - - OUT4CC("strh"); - OUTLONG(56); /* # of bytes to follow */ - OUT4CC("vids"); /* Type */ - OUT4CC(AVI->compressor); /* Handler */ - OUTLONG(0); /* Flags */ - OUTLONG(0); /* Reserved, MS says: wPriority, wLanguage */ - OUTLONG(0); /* InitialFrames */ - OUTLONG(FRAME_RATE_SCALE); /* Scale */ - OUTLONG(frate); /* Rate: Rate/Scale == samples/second */ - OUTLONG(0); /* Start */ - OUTLONG(0); // no frames yet - OUTLONG(0); /* SuggestedBufferSize */ - OUTLONG(-1); /* Quality */ - OUTLONG(0); /* SampleSize */ - OUTLONG(0); /* Frame */ - OUTLONG(0); /* Frame */ - // OUTLONG(0); /* Frame */ - // OUTLONG(0); /* Frame */ - - /* The video stream format */ - - OUT4CC("strf"); - OUTLONG(40); /* # of bytes to follow */ - OUTLONG(40); /* Size */ - OUTLONG(AVI->width); /* Width */ - OUTLONG(AVI->height); /* Height */ - OUTSHRT(1); - OUTSHRT(24); /* Planes, Count */ - OUT4CC(AVI->compressor); /* Compression */ - // ThOe (*3) - OUTLONG(AVI->width * AVI->height * 3); /* SizeImage (in bytes?) */ - OUTLONG(0); /* XPelsPerMeter */ - OUTLONG(0); /* YPelsPerMeter */ - OUTLONG(0); /* ClrUsed: Number of colors used */ - OUTLONG(0); /* ClrImportant: Number of colors important */ - - /* Finish stream list, i.e. put number of bytes in the list to proper pos */ - - long2str(AVI_header + strl_start - 4, nhb - strl_start); - - /* Start the audio stream list ---------------------------------- */ - - for (j = 0; j < AVI->anum; ++j) { - - sampsize = avi_sampsize(AVI, j); - - OUT4CC("LIST"); - OUTLONG(0); /* Length of list in bytes, don't know yet */ - strl_start = nhb; /* Store start position */ - OUT4CC("strl"); - - /* The audio stream header */ - - OUT4CC("strh"); - OUTLONG(56); /* # of bytes to follow */ - OUT4CC("auds"); - - // ----------- - // ThOe - OUTLONG(0); /* Format (Optionally) */ - // ----------- - - OUTLONG(0); /* Flags */ - OUTLONG(0); /* Reserved, MS says: wPriority, wLanguage */ - OUTLONG(0); /* InitialFrames */ - - // ThOe /4 - OUTLONG(sampsize / 4); /* Scale */ - OUTLONG(1000 * AVI->track[j].mp3rate / 8); - OUTLONG(0); /* Start */ - OUTLONG(4 * AVI->track[j].audio_bytes / sampsize); /* Length */ - OUTLONG(0); /* SuggestedBufferSize */ - OUTLONG(-1); /* Quality */ - - // ThOe /4 - OUTLONG(sampsize / 4); /* SampleSize */ - - OUTLONG(0); /* Frame */ - OUTLONG(0); /* Frame */ - // OUTLONG(0); /* Frame */ - // OUTLONG(0); /* Frame */ - - /* The audio stream format */ - - OUT4CC("strf"); - OUTLONG(16); /* # of bytes to follow */ - OUTSHRT(AVI->track[j].a_fmt); /* Format */ - OUTSHRT(AVI->track[j].a_chans); /* Number of channels */ - OUTLONG(AVI->track[j].a_rate); /* SamplesPerSec */ - // ThOe - OUTLONG(1000 * AVI->track[j].mp3rate / 8); - // ThOe (/4) - - OUTSHRT(sampsize / 4); /* BlockAlign */ - - OUTSHRT(AVI->track[j].a_bits); /* BitsPerSample */ - - /* Finish stream list, i.e. put number of bytes in the list to proper pos */ - - long2str(AVI_header + strl_start - 4, nhb - strl_start); - } - - /* Finish header list */ - - long2str(AVI_header + hdrl_start - 4, nhb - hdrl_start); - - /* Calculate the needed amount of junk bytes, output junk */ - - njunk = HEADERBYTES - nhb - 8 - 12; - - /* Safety first: if njunk <= 0, somebody has played with - HEADERBYTES without knowing what (s)he did. - This is a fatal error */ - - if (njunk <= 0) { - fprintf(stderr, "AVI_close_output_file: # of header bytes too small\n"); - exit(1); - } - - OUT4CC("JUNK"); - OUTLONG(njunk); - memset(AVI_header + nhb, 0, njunk); - - // 11/14/01 added id string - - if (njunk > strlen(id_str) + 8) { - sprintf(id_str, "%s-%s", PACKAGE, VERSION); - memcpy(AVI_header + nhb, id_str, strlen(id_str)); - } - - nhb += njunk; - - /* Start the movi list */ - - OUT4CC("LIST"); - OUTLONG(movi_len); /* Length of list in bytes */ - OUT4CC("movi"); - - /* Output the header, truncate the file to the number of bytes - actually written, report an error if someting goes wrong */ - - if (lseek(AVI->fdes, 0, SEEK_SET) < 0 || - avi_write(AVI->fdes, (char *)AVI_header, HEADERBYTES) != HEADERBYTES || - lseek(AVI->fdes, AVI->pos, SEEK_SET) < 0) { - AVI_errno = AVI_ERR_CLOSE; - return -1; - } - - return 0; -} - -/* - Write the header of an AVI file and close it. - returns 0 on success, -1 on write error. -*/ - -static int avi_close_output_file(avi_t *AVI) { - - int ret, njunk, sampsize, hasIndex, ms_per_frame, frate, idxerror, flag; - unsigned long movi_len; - int hdrl_start, strl_start, j; - unsigned char AVI_header[HEADERBYTES]; - long nhb; - -#ifdef INFO_LIST - long info_len; -// time_t calptr; -#endif - - /* Calculate length of movi list */ - - movi_len = AVI->pos - HEADERBYTES + 4; - - /* Try to ouput the index entries. This may fail e.g. if no space - is left on device. We will report this as an error, but we still - try to write the header correctly (so that the file still may be - readable in the most cases */ - - idxerror = 0; - // fprintf(stderr, "pos=%lu, index_len=%ld \n", AVI->pos, - // AVI->n_idx*16); - ret = avi_add_chunk(AVI, (unsigned char *)"idx1", - (unsigned char *)((void *)AVI->idx), AVI->n_idx * 16); - hasIndex = (ret == 0); - // fprintf(stderr, "pos=%lu, index_len=%d\n", AVI->pos, hasIndex); - - if (ret) { - idxerror = 1; - AVI_errno = AVI_ERR_WRITE_INDEX; - } - - /* Calculate Microseconds per frame */ - - if (AVI->fps < 0.001) { - frate = 0; - ms_per_frame = 0; - } else { - frate = (int)(FRAME_RATE_SCALE * AVI->fps + 0.5); - ms_per_frame = (int)(1000000 / AVI->fps + 0.5); - } - - /* Prepare the file header */ - - nhb = 0; - - /* The RIFF header */ - - OUT4CC("RIFF"); - OUTLONG(AVI->pos - 8); /* # of bytes to follow */ - OUT4CC("AVI "); - - /* Start the header list */ - - OUT4CC("LIST"); - OUTLONG(0); /* Length of list in bytes, don't know yet */ - hdrl_start = nhb; /* Store start position */ - OUT4CC("hdrl"); - - /* The main AVI header */ - - /* The Flags in AVI File header */ - -#define AVIF_HASINDEX 0x00000010 /* Index at end of file */ -#define AVIF_MUSTUSEINDEX 0x00000020 -#define AVIF_ISINTERLEAVED 0x00000100 -#define AVIF_TRUSTCKTYPE 0x00000800 /* Use CKType to find key frames */ -#define AVIF_WASCAPTUREFILE 0x00010000 -#define AVIF_COPYRIGHTED 0x00020000 - - OUT4CC("avih"); - OUTLONG(56); /* # of bytes to follow */ - OUTLONG(ms_per_frame); /* Microseconds per frame */ - // ThOe ->0 - // OUTLONG(10000000); /* MaxBytesPerSec, I hope this will never - // be used */ - OUTLONG(0); - OUTLONG(0); /* PaddingGranularity (whatever that might be) */ - /* Other sources call it 'reserved' */ - flag = AVIF_ISINTERLEAVED; - if (hasIndex) - flag |= AVIF_HASINDEX; - if (hasIndex && AVI->must_use_index) - flag |= AVIF_MUSTUSEINDEX; - OUTLONG(flag); /* Flags */ - OUTLONG(AVI->video_frames); /* TotalFrames */ - OUTLONG(0); /* InitialFrames */ - - OUTLONG(AVI->anum + 1); - // if (AVI->track[0].audio_bytes) - // { OUTLONG(2); } /* Streams */ - // else - // { OUTLONG(1); } /* Streams */ - - OUTLONG(0); /* SuggestedBufferSize */ - OUTLONG(AVI->width); /* Width */ - OUTLONG(AVI->height); /* Height */ - /* MS calls the following 'reserved': */ - OUTLONG(0); /* TimeScale: Unit used to measure time */ - OUTLONG(0); /* DataRate: Data rate of playback */ - OUTLONG(0); /* StartTime: Starting time of AVI data */ - OUTLONG(0); /* DataLength: Size of AVI data chunk */ - - /* Start the video stream list ---------------------------------- */ - - OUT4CC("LIST"); - OUTLONG(0); /* Length of list in bytes, don't know yet */ - strl_start = nhb; /* Store start position */ - OUT4CC("strl"); - - /* The video stream header */ - - OUT4CC("strh"); - OUTLONG(56); /* # of bytes to follow */ - OUT4CC("vids"); /* Type */ - OUT4CC(AVI->compressor); /* Handler */ - OUTLONG(0); /* Flags */ - OUTLONG(0); /* Reserved, MS says: wPriority, wLanguage */ - OUTLONG(0); /* InitialFrames */ - OUTLONG(FRAME_RATE_SCALE); /* Scale */ - OUTLONG(frate); /* Rate: Rate/Scale == samples/second */ - OUTLONG(0); /* Start */ - OUTLONG(AVI->video_frames); /* Length */ - OUTLONG(0); /* SuggestedBufferSize */ - OUTLONG(-1); /* Quality */ - OUTLONG(0); /* SampleSize */ - OUTLONG(0); /* Frame */ - OUTLONG(0); /* Frame */ - // OUTLONG(0); /* Frame */ - // OUTLONG(0); /* Frame */ - - /* The video stream format */ - - OUT4CC("strf"); - OUTLONG(40); /* # of bytes to follow */ - OUTLONG(40); /* Size */ - OUTLONG(AVI->width); /* Width */ - OUTLONG(AVI->height); /* Height */ - OUTSHRT(1); - OUTSHRT(24); /* Planes, Count */ - OUT4CC(AVI->compressor); /* Compression */ - // ThOe (*3) - OUTLONG(AVI->width * AVI->height * 3); /* SizeImage (in bytes?) */ - OUTLONG(0); /* XPelsPerMeter */ - OUTLONG(0); /* YPelsPerMeter */ - OUTLONG(0); /* ClrUsed: Number of colors used */ - OUTLONG(0); /* ClrImportant: Number of colors important */ - - /* Finish stream list, i.e. put number of bytes in the list to proper pos */ - - long2str(AVI_header + strl_start - 4, nhb - strl_start); - - /* Start the audio stream list ---------------------------------- */ - - for (j = 0; j < AVI->anum; ++j) { - - // if (AVI->track[j].a_chans && AVI->track[j].audio_bytes) - { - - sampsize = avi_sampsize(AVI, j); - - OUT4CC("LIST"); - OUTLONG(0); /* Length of list in bytes, don't know yet */ - strl_start = nhb; /* Store start position */ - OUT4CC("strl"); - - /* The audio stream header */ - - OUT4CC("strh"); - OUTLONG(56); /* # of bytes to follow */ - OUT4CC("auds"); - - // ----------- - // ThOe - OUTLONG(0); /* Format (Optionally) */ - // ----------- - - OUTLONG(0); /* Flags */ - OUTLONG(0); /* Reserved, MS says: wPriority, wLanguage */ - OUTLONG(0); /* InitialFrames */ - - // ThOe /4 - OUTLONG(sampsize / 4); /* Scale */ - OUTLONG(1000 * AVI->track[j].mp3rate / 8); - OUTLONG(0); /* Start */ - OUTLONG(4 * AVI->track[j].audio_bytes / sampsize); /* Length */ - OUTLONG(0); /* SuggestedBufferSize */ - OUTLONG(-1); /* Quality */ - - // ThOe /4 - OUTLONG(sampsize / 4); /* SampleSize */ - - OUTLONG(0); /* Frame */ - OUTLONG(0); /* Frame */ - // OUTLONG(0); /* Frame */ - // OUTLONG(0); /* Frame */ - - /* The audio stream format */ - - OUT4CC("strf"); - OUTLONG(16); /* # of bytes to follow */ - OUTSHRT(AVI->track[j].a_fmt); /* Format */ - OUTSHRT(AVI->track[j].a_chans); /* Number of channels */ - OUTLONG(AVI->track[j].a_rate); /* SamplesPerSec */ - // ThOe - OUTLONG(1000 * AVI->track[j].mp3rate / 8); - // ThOe (/4) - - OUTSHRT(sampsize / 4); /* BlockAlign */ - - OUTSHRT(AVI->track[j].a_bits); /* BitsPerSample */ - - /* Finish stream list, i.e. put number of bytes in the list to proper pos - */ - } - long2str(AVI_header + strl_start - 4, nhb - strl_start); - } - - /* Finish header list */ - - long2str(AVI_header + hdrl_start - 4, nhb - hdrl_start); - - // add INFO list --- (0.6.0pre4) - -#ifdef INFO_LIST - OUT4CC("LIST"); - - // FIXME - info_len = MAX_INFO_STRLEN + 12; - OUTLONG(info_len); - OUT4CC("INFO"); - - // OUT4CC ("INAM"); - // OUTLONG(MAX_INFO_STRLEN); - - // sprintf(id_str, "\t"); - // memset(AVI_header+nhb, 0, MAX_INFO_STRLEN); - // memcpy(AVI_header+nhb, id_str, strlen(id_str)); - // nhb += MAX_INFO_STRLEN; - - OUT4CC("ISFT"); - OUTLONG(MAX_INFO_STRLEN); - - sprintf(id_str, "%s-%s", PACKAGE, VERSION); - memset(AVI_header + nhb, 0, MAX_INFO_STRLEN); - memcpy(AVI_header + nhb, id_str, strlen(id_str)); - nhb += MAX_INFO_STRLEN; - -// OUT4CC ("ICMT"); -// OUTLONG(MAX_INFO_STRLEN); - -// calptr=time(NULL); -// sprintf(id_str, "\t%s %s", ctime(&calptr), ""); -// memset(AVI_header+nhb, 0, MAX_INFO_STRLEN); -// memcpy(AVI_header+nhb, id_str, 25); -// nhb += MAX_INFO_STRLEN; -#endif - - // ---------------------------- - - /* Calculate the needed amount of junk bytes, output junk */ - - njunk = HEADERBYTES - nhb - 8 - 12; - - /* Safety first: if njunk <= 0, somebody has played with - HEADERBYTES without knowing what (s)he did. - This is a fatal error */ - - if (njunk <= 0) { - fprintf(stderr, "AVI_close_output_file: # of header bytes too small\n"); - exit(1); - } - - OUT4CC("JUNK"); - OUTLONG(njunk); - memset(AVI_header + nhb, 0, njunk); - - nhb += njunk; - - /* Start the movi list */ - - OUT4CC("LIST"); - OUTLONG(movi_len); /* Length of list in bytes */ - OUT4CC("movi"); - - /* Output the header, truncate the file to the number of bytes - actually written, report an error if someting goes wrong */ - - if (lseek(AVI->fdes, 0, SEEK_SET) < 0 || - avi_write(AVI->fdes, (char *)AVI_header, HEADERBYTES) != HEADERBYTES - //|| ftruncate(AVI->fdes,AVI->pos)<0 - ) { - AVI_errno = AVI_ERR_CLOSE; - return -1; - } - - if (idxerror) - return -1; - - return 0; -} - -/* - AVI_write_data: - Add video or audio data to the file; - - Return values: - 0 No error; - -1 Error, AVI_errno is set appropriatly; - -*/ - -static int avi_write_data(avi_t *AVI, char *data, unsigned long length, - int audio, int keyframe) { - int n; - - unsigned char astr[5]; - - /* Check for maximum file length */ - - if ((AVI->pos + 8 + length + 8 + (AVI->n_idx + 1) * 16) > AVI_MAX_LEN) { - AVI_errno = AVI_ERR_SIZELIM; - return -1; - } - - /* Add index entry */ - - // set tag for current audio track - sprintf((char *)astr, "0%1dwb", AVI->aptr + 1); - - if (audio) - n = avi_add_index_entry(AVI, astr, 0x00, AVI->pos, length); - else - n = avi_add_index_entry(AVI, (unsigned char *)"00db", - ((keyframe) ? 0x10 : 0x0), AVI->pos, length); - - if (n) - return -1; - - /* Output tag and data */ - - if (audio) - n = avi_add_chunk(AVI, (unsigned char *)astr, (unsigned char *)data, - length); - else - n = avi_add_chunk(AVI, (unsigned char *)"00db", (unsigned char *)data, - length); - - if (n) - return -1; - - return 0; -} - -int AVI_write_frame(avi_t *AVI, char *data, long bytes, int keyframe) { - unsigned long pos; - - if (AVI->mode == AVI_MODE_READ) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - - pos = AVI->pos; - - if (avi_write_data(AVI, data, bytes, 0, keyframe)) - return -1; - - AVI->last_pos = pos; - AVI->last_len = bytes; - AVI->video_frames++; - return 0; -} - -int AVI_dup_frame(avi_t *AVI) { - if (AVI->mode == AVI_MODE_READ) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - - if (AVI->last_pos == 0) - return 0; /* No previous real frame */ - if (avi_add_index_entry(AVI, (unsigned char *)"00db", 0x10, AVI->last_pos, - AVI->last_len)) - return -1; - AVI->video_frames++; - AVI->must_use_index = 1; - return 0; -} - -int AVI_write_audio(avi_t *AVI, char *data, long bytes) { - if (AVI->mode == AVI_MODE_READ) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - - if (avi_write_data(AVI, data, bytes, 1, 0)) - return -1; - AVI->track[AVI->aptr].audio_bytes += bytes; - return 0; -} - -int AVI_append_audio(avi_t *AVI, char *data, long bytes) { - - long i, length, pos; - unsigned char c[4]; - - if (AVI->mode == AVI_MODE_READ) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - - // update last index entry: - - --AVI->n_idx; - length = str2ulong(AVI->idx[AVI->n_idx] + 12); - pos = str2ulong(AVI->idx[AVI->n_idx] + 8); - - // update; - long2str(AVI->idx[AVI->n_idx] + 12, length + bytes); - - ++AVI->n_idx; - - AVI->track[AVI->aptr].audio_bytes += bytes; - - // update chunk header - lseek(AVI->fdes, pos + 4, SEEK_SET); - long2str(c, length + bytes); - avi_write(AVI->fdes, (char *)c, 4); - - lseek(AVI->fdes, pos + 8 + length, SEEK_SET); - - i = PAD_EVEN(length + bytes); - - bytes = i - length; - avi_write(AVI->fdes, data, bytes); - AVI->pos = pos + 8 + i; - - return 0; -} - -long AVI_bytes_remain(avi_t *AVI) { - if (AVI->mode == AVI_MODE_READ) - return 0; - - return (AVI_MAX_LEN - (AVI->pos + 8 + 16 * AVI->n_idx)); -} - -long AVI_bytes_written(avi_t *AVI) { - if (AVI->mode == AVI_MODE_READ) - return 0; - - return (AVI->pos + 8 + 16 * AVI->n_idx); -} - -int AVI_set_audio_track(avi_t *AVI, int track) { - - if (track < 0 || track + 1 > AVI->anum) - return (-1); - - // this info is not written to file anyway - AVI->aptr = track; - return 0; -} - -int AVI_get_audio_track(avi_t *AVI) { return (AVI->aptr); } - -/******************************************************************* - * * - * Utilities for reading video and audio from an AVI File * - * * - *******************************************************************/ - -int AVI_close(avi_t *AVI) { - int ret; - - /* If the file was open for writing, the header and index still have - to be written */ - - if (AVI->mode == AVI_MODE_WRITE) - ret = avi_close_output_file(AVI); - else - ret = 0; - - /* Even if there happened an error, we first clean up */ - - close(AVI->fdes); - if (AVI->idx) - free(AVI->idx); - if (AVI->video_index) - free(AVI->video_index); - // FIXME - // if(AVI->audio_index) free(AVI->audio_index); - free(AVI); - - return ret; -} - -#define ERR_EXIT(x) \ - { \ - AVI_close(AVI); \ - AVI_errno = x; \ - return 0; \ - } - -avi_t *AVI_open_input_file(char *filename, int getIndex) { - avi_t *AVI = NULL; - - /* Create avi_t structure */ - - AVI = (avi_t *)malloc(sizeof(avi_t)); - if (AVI == NULL) { - AVI_errno = AVI_ERR_NO_MEM; - return 0; - } - memset((void *)AVI, 0, sizeof(avi_t)); - - AVI->mode = AVI_MODE_READ; /* open for reading */ - - /* Open the file */ - - AVI->fdes = open(filename, O_RDONLY | O_BINARY); - if (AVI->fdes < 0) { - AVI_errno = AVI_ERR_OPEN; - free(AVI); - return 0; - } - - avi_parse_input_file(AVI, getIndex); - - AVI->aptr = 0; // reset - - return AVI; -} - -avi_t *AVI_open_fd(int fd, int getIndex) { - avi_t *AVI = NULL; - - /* Create avi_t structure */ - - AVI = (avi_t *)malloc(sizeof(avi_t)); - if (AVI == NULL) { - AVI_errno = AVI_ERR_NO_MEM; - return 0; - } - memset((void *)AVI, 0, sizeof(avi_t)); - - AVI->mode = AVI_MODE_READ; /* open for reading */ - - // file alread open - AVI->fdes = fd; - - avi_parse_input_file(AVI, getIndex); - - AVI->aptr = 0; // reset - - return AVI; -} - -int avi_parse_input_file(avi_t *AVI, int getIndex) { - long i, n, rate, scale, idx_type; - unsigned char *hdrl_data; - long header_offset = 0, hdrl_len = 0; - long nvi, nai[AVI_MAX_TRACKS], ioff; - long tot[AVI_MAX_TRACKS]; - int j; - int lasttag = 0; - int vids_strh_seen = 0; - int vids_strf_seen = 0; - int auds_strh_seen = 0; - // int auds_strf_seen = 0; - int num_stream = 0; - char data[256]; - - /* Read first 12 bytes and check that this is an AVI file */ - - if (avi_read(AVI->fdes, data, 12) != 12) - ERR_EXIT(AVI_ERR_READ) - - if (strncasecmp(data, "RIFF", 4) != 0 || - strncasecmp(data + 8, "AVI ", 4) != 0) - ERR_EXIT(AVI_ERR_NO_AVI) - - /* Go through the AVI file and extract the header list, - the start position of the 'movi' list and an optionally - present idx1 tag */ - - hdrl_data = 0; - - while (1) { - if (avi_read(AVI->fdes, data, 8) != 8) - break; /* We assume it's EOF */ - - n = str2ulong((unsigned char *)data + 4); - n = PAD_EVEN(n); - - if (strncasecmp(data, "LIST", 4) == 0) { - if (avi_read(AVI->fdes, data, 4) != 4) - ERR_EXIT(AVI_ERR_READ) - n -= 4; - if (strncasecmp(data, "hdrl", 4) == 0) { - hdrl_len = n; - hdrl_data = (unsigned char *)malloc(n); - if (hdrl_data == 0) - ERR_EXIT(AVI_ERR_NO_MEM); - - // offset of header - - header_offset = lseek(AVI->fdes, 0, SEEK_CUR); - - if (avi_read(AVI->fdes, (char *)hdrl_data, n) != n) - ERR_EXIT(AVI_ERR_READ) - } else if (strncasecmp(data, "movi", 4) == 0) { - AVI->movi_start = lseek(AVI->fdes, 0, SEEK_CUR); - lseek(AVI->fdes, n, SEEK_CUR); - } else - lseek(AVI->fdes, n, SEEK_CUR); - } else if (strncasecmp(data, "idx1", 4) == 0) { - /* n must be a multiple of 16, but the reading does not - break if this is not the case */ - - AVI->n_idx = AVI->max_idx = n / 16; - AVI->idx = (unsigned char((*)[16]))malloc(n); - if (AVI->idx == 0) - ERR_EXIT(AVI_ERR_NO_MEM) - if (avi_read(AVI->fdes, (char *)AVI->idx, n) != n) - ERR_EXIT(AVI_ERR_READ) - } else - lseek(AVI->fdes, n, SEEK_CUR); - } - - if (!hdrl_data) - ERR_EXIT(AVI_ERR_NO_HDRL) - if (!AVI->movi_start) - ERR_EXIT(AVI_ERR_NO_MOVI) - - /* Interpret the header list */ - - for (i = 0; i < hdrl_len;) { - /* List tags are completly ignored */ - - if (strncasecmp((char *)hdrl_data + i, "LIST", 4) == 0) { - i += 12; - continue; - } - - n = str2ulong(hdrl_data + i + 4); - n = PAD_EVEN(n); - - /* Interpret the tag and its args */ - - if (strncasecmp((char *)hdrl_data + i, "strh", 4) == 0) { - i += 8; - if (strncasecmp((char *)hdrl_data + i, "vids", 4) == 0 && - !vids_strh_seen) { - memcpy(AVI->compressor, hdrl_data + i + 4, 4); - AVI->compressor[4] = 0; - - // ThOe - AVI->v_codech_off = header_offset + i + 4; - - scale = str2ulong((unsigned char *)hdrl_data + i + 20); - rate = str2ulong(hdrl_data + i + 24); - if (scale != 0) - AVI->fps = (double)rate / (double)scale; - AVI->video_frames = str2ulong(hdrl_data + i + 32); - AVI->video_strn = num_stream; - AVI->max_len = 0; - vids_strh_seen = 1; - lasttag = 1; /* vids */ - } else if (strncasecmp((char *)hdrl_data + i, "auds", 4) == 0 && - !auds_strh_seen) { - - // inc audio tracks - AVI->aptr = AVI->anum; - ++AVI->anum; - - if (AVI->anum > AVI_MAX_TRACKS) { - fprintf(stderr, "error - only %d audio tracks supported\n", - AVI_MAX_TRACKS); - return (-1); - } - - AVI->track[AVI->aptr].audio_bytes = - str2ulong(hdrl_data + i + 32) * avi_sampsize(AVI, 0); - AVI->track[AVI->aptr].audio_strn = num_stream; - // auds_strh_seen = 1; - lasttag = 2; /* auds */ - - // ThOe - AVI->track[AVI->aptr].a_codech_off = header_offset + i; - - } else - lasttag = 0; - num_stream++; - } else if (strncasecmp((char *)hdrl_data + i, "strf", 4) == 0) { - i += 8; - if (lasttag == 1) { - AVI->width = str2ulong(hdrl_data + i + 4); - AVI->height = str2ulong(hdrl_data + i + 8); - vids_strf_seen = 1; - // ThOe - AVI->v_codecf_off = header_offset + i + 16; - - memcpy(AVI->compressor2, hdrl_data + i + 16, 4); - AVI->compressor2[4] = 0; - - } else if (lasttag == 2) { - AVI->track[AVI->aptr].a_fmt = str2ushort(hdrl_data + i); - - // ThOe - AVI->track[AVI->aptr].a_codecf_off = header_offset + i; - - AVI->track[AVI->aptr].a_chans = str2ushort(hdrl_data + i + 2); - AVI->track[AVI->aptr].a_rate = str2ulong(hdrl_data + i + 4); - // ThOe: read mp3bitrate - AVI->track[AVI->aptr].mp3rate = 8 * str2ulong(hdrl_data + i + 8) / 1000; - //: ThOe - AVI->track[AVI->aptr].a_bits = str2ushort(hdrl_data + i + 14); - // auds_strf_seen = 1; - } - lasttag = 0; - } else { - i += 8; - lasttag = 0; - } - - i += n; - } - - free(hdrl_data); - - if (!vids_strh_seen || !vids_strf_seen) - ERR_EXIT(AVI_ERR_NO_VIDS) - - AVI->video_tag[0] = AVI->video_strn / 10 + '0'; - AVI->video_tag[1] = AVI->video_strn % 10 + '0'; - AVI->video_tag[2] = 'd'; - AVI->video_tag[3] = 'b'; - - /* Audio tag is set to "99wb" if no audio present */ - if (!AVI->track[0].a_chans) - AVI->track[0].audio_strn = 99; - - for (j = 0; j < AVI->anum; ++j) { - AVI->track[j].audio_tag[0] = (j + 1) / 10 + '0'; - AVI->track[j].audio_tag[1] = (j + 1) % 10 + '0'; - AVI->track[j].audio_tag[2] = 'w'; - AVI->track[j].audio_tag[3] = 'b'; - } - - lseek(AVI->fdes, AVI->movi_start, SEEK_SET); - - /* get index if wanted */ - - if (!getIndex) - return (0); - - /* if the file has an idx1, check if this is relative - to the start of the file or to the start of the movi list */ - - idx_type = 0; - - if (AVI->idx) { - long pos, len; - - /* Search the first videoframe in the idx1 and look where - it is in the file */ - - for (i = 0; i < AVI->n_idx; i++) - if (strncasecmp((char *)AVI->idx[i], (char *)AVI->video_tag, 3) == 0) - break; - if (i >= AVI->n_idx) - ERR_EXIT(AVI_ERR_NO_VIDS) - - pos = str2ulong(AVI->idx[i] + 8); - len = str2ulong(AVI->idx[i] + 12); - - lseek(AVI->fdes, pos, SEEK_SET); - if (avi_read(AVI->fdes, data, 8) != 8) - ERR_EXIT(AVI_ERR_READ) - if (strncasecmp((char *)data, (char *)AVI->idx[i], 4) == 0 && - str2ulong((unsigned char *)data + 4) == len) { - idx_type = 1; /* Index from start of file */ - } else { - lseek(AVI->fdes, pos + AVI->movi_start - 4, SEEK_SET); - if (avi_read(AVI->fdes, data, 8) != 8) - ERR_EXIT(AVI_ERR_READ) - if (strncasecmp((char *)data, (char *)AVI->idx[i], 4) == 0 && - str2ulong((unsigned char *)data + 4) == len) { - idx_type = 2; /* Index from start of movi list */ - } - } - /* idx_type remains 0 if neither of the two tests above succeeds */ - } - - if (idx_type == 0) { - /* we must search through the file to get the index */ - - lseek(AVI->fdes, AVI->movi_start, SEEK_SET); - - AVI->n_idx = 0; - - while (1) { - if (avi_read(AVI->fdes, data, 8) != 8) - break; - n = str2ulong((unsigned char *)data + 4); - - /* The movi list may contain sub-lists, ignore them */ - - if (strncasecmp(data, "LIST", 4) == 0) { - lseek(AVI->fdes, 4, SEEK_CUR); - continue; - } - - /* Check if we got a tag ##db, ##dc or ##wb */ - - if (((data[2] == 'd' || data[2] == 'D') && - (data[3] == 'b' || data[3] == 'B' || data[3] == 'c' || - data[3] == 'C')) || - ((data[2] == 'w' || data[2] == 'W') && - (data[3] == 'b' || data[3] == 'B'))) { - avi_add_index_entry(AVI, (unsigned char *)data, 0, - lseek(AVI->fdes, 0, SEEK_CUR) - 8, n); - } - - lseek(AVI->fdes, PAD_EVEN(n), SEEK_CUR); - } - idx_type = 1; - } - - /* Now generate the video index and audio index arrays */ - - nvi = 0; - for (j = 0; j < AVI->anum; ++j) - nai[j] = 0; - - for (i = 0; i < AVI->n_idx; i++) { - - if (strncasecmp((char *)AVI->idx[i], (char *)AVI->video_tag, 3) == 0) - nvi++; - - for (j = 0; j < AVI->anum; ++j) - if (strncasecmp((char *)AVI->idx[i], AVI->track[j].audio_tag, 4) == 0) - nai[j]++; - } - - AVI->video_frames = nvi; - for (j = 0; j < AVI->anum; ++j) - AVI->track[j].audio_chunks = nai[j]; - - // fprintf(stderr, "chunks = %ld %d %s\n", AVI->track[0].audio_chunks, - // AVI->anum, AVI->track[0].audio_tag); - - if (AVI->video_frames == 0) - ERR_EXIT(AVI_ERR_NO_VIDS); - AVI->video_index = - (video_index_entry *)malloc(nvi * sizeof(video_index_entry)); - if (AVI->video_index == 0) - ERR_EXIT(AVI_ERR_NO_MEM); - - for (j = 0; j < AVI->anum; ++j) { - if (AVI->track[j].audio_chunks) { - AVI->track[j].audio_index = - (audio_index_entry *)malloc(nai[j] * sizeof(audio_index_entry)); - if (AVI->track[j].audio_index == 0) - ERR_EXIT(AVI_ERR_NO_MEM); - } - } - - nvi = 0; - for (j = 0; j < AVI->anum; ++j) - nai[j] = tot[j] = 0; - - ioff = idx_type == 1 ? 8 : AVI->movi_start + 4; - - for (i = 0; i < AVI->n_idx; i++) { - - // video - if (strncasecmp((char *)AVI->idx[i], (char *)AVI->video_tag, 3) == 0) { - AVI->video_index[nvi].key = str2ulong(AVI->idx[i] + 4); - AVI->video_index[nvi].pos = str2ulong(AVI->idx[i] + 8) + ioff; - AVI->video_index[nvi].len = str2ulong(AVI->idx[i] + 12); - nvi++; - } - - // audio - for (j = 0; j < AVI->anum; ++j) { - - if (strncasecmp((char *)AVI->idx[i], AVI->track[j].audio_tag, 4) == 0) { - AVI->track[j].audio_index[nai[j]].pos = - str2ulong(AVI->idx[i] + 8) + ioff; - AVI->track[j].audio_index[nai[j]].len = str2ulong(AVI->idx[i] + 12); - AVI->track[j].audio_index[nai[j]].tot = tot[j]; - tot[j] += AVI->track[j].audio_index[nai[j]].len; - nai[j]++; - } - } - } - - for (j = 0; j < AVI->anum; ++j) - AVI->track[j].audio_bytes = tot[j]; - - /* Reposition the file */ - - lseek(AVI->fdes, AVI->movi_start, SEEK_SET); - AVI->video_pos = 0; - - return (0); -} - -long AVI_video_frames(avi_t *AVI) { return AVI->video_frames; } -int AVI_video_width(avi_t *AVI) { return AVI->width; } -int AVI_video_height(avi_t *AVI) { return AVI->height; } -double AVI_frame_rate(avi_t *AVI) { return AVI->fps; } -char *AVI_video_compressor(avi_t *AVI) { return AVI->compressor2; } - -long AVI_max_video_chunk(avi_t *AVI) { return AVI->max_len; } - -int AVI_audio_tracks(avi_t *AVI) { return (AVI->anum); } - -int AVI_audio_channels(avi_t *AVI) { return AVI->track[AVI->aptr].a_chans; } - -long AVI_audio_mp3rate(avi_t *AVI) { return AVI->track[AVI->aptr].mp3rate; } - -int AVI_audio_bits(avi_t *AVI) { return AVI->track[AVI->aptr].a_bits; } - -int AVI_audio_format(avi_t *AVI) { return AVI->track[AVI->aptr].a_fmt; } - -long AVI_audio_rate(avi_t *AVI) { return AVI->track[AVI->aptr].a_rate; } - -long AVI_audio_bytes(avi_t *AVI) { return AVI->track[AVI->aptr].audio_bytes; } - -long AVI_audio_chunks(avi_t *AVI) { return AVI->track[AVI->aptr].audio_chunks; } - -long AVI_audio_codech_offset(avi_t *AVI) { - return AVI->track[AVI->aptr].a_codech_off; -} - -long AVI_audio_codecf_offset(avi_t *AVI) { - return AVI->track[AVI->aptr].a_codecf_off; -} - -long AVI_video_codech_offset(avi_t *AVI) { return AVI->v_codech_off; } - -long AVI_video_codecf_offset(avi_t *AVI) { return AVI->v_codecf_off; } - -long AVI_frame_size(avi_t *AVI, long frame) { - if (AVI->mode == AVI_MODE_WRITE) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - if (!AVI->video_index) { - AVI_errno = AVI_ERR_NO_IDX; - return -1; - } - - if (frame < 0 || frame >= AVI->video_frames) - return 0; - return (AVI->video_index[frame].len); -} - -long AVI_audio_size(avi_t *AVI, long frame) { - if (AVI->mode == AVI_MODE_WRITE) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - if (!AVI->track[AVI->aptr].audio_index) { - AVI_errno = AVI_ERR_NO_IDX; - return -1; - } - - if (frame < 0 || frame >= AVI->track[AVI->aptr].audio_chunks) - return 0; - return (AVI->track[AVI->aptr].audio_index[frame].len); -} - -long AVI_get_video_position(avi_t *AVI, long frame) { - if (AVI->mode == AVI_MODE_WRITE) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - if (!AVI->video_index) { - AVI_errno = AVI_ERR_NO_IDX; - return -1; - } - - if (frame < 0 || frame >= AVI->video_frames) - return 0; - return (AVI->video_index[frame].pos); -} - -int AVI_seek_start(avi_t *AVI) { - if (AVI->mode == AVI_MODE_WRITE) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - - lseek(AVI->fdes, AVI->movi_start, SEEK_SET); - AVI->video_pos = 0; - return 0; -} - -int AVI_set_video_position(avi_t *AVI, long frame) { - if (AVI->mode == AVI_MODE_WRITE) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - if (!AVI->video_index) { - AVI_errno = AVI_ERR_NO_IDX; - return -1; - } - - if (frame < 0) - frame = 0; - AVI->video_pos = frame; - return 0; -} - -int AVI_set_audio_bitrate(avi_t *AVI, long bitrate) { - if (AVI->mode == AVI_MODE_READ) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - - AVI->track[AVI->aptr].mp3rate = bitrate; - return 0; -} - -long AVI_read_frame(avi_t *AVI, char *vidbuf, int *keyframe) { - long n; - if (AVI->mode == AVI_MODE_WRITE) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - if (!AVI->video_index) { - AVI_errno = AVI_ERR_NO_IDX; - return -1; - } - if (AVI->video_pos < 0 || AVI->video_pos >= AVI->video_frames) - return -1; - n = AVI->video_index[AVI->video_pos].len; - *keyframe = (AVI->video_index[AVI->video_pos].key == 0x10) ? 1 : 0; - lseek(AVI->fdes, AVI->video_index[AVI->video_pos].pos, SEEK_SET); - if (avi_read(AVI->fdes, vidbuf, n) != n) { - AVI_errno = AVI_ERR_READ; - return -1; - } - AVI->video_pos++; - return n; -} - -int AVI_set_audio_position(avi_t *AVI, long byte) { - long n0, n1, n; - - if (AVI->mode == AVI_MODE_WRITE) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - if (!AVI->track[AVI->aptr].audio_index) { - AVI_errno = AVI_ERR_NO_IDX; - return -1; - } - - if (byte < 0) - byte = 0; - - /* Binary search in the audio chunks */ - - n0 = 0; - n1 = AVI->track[AVI->aptr].audio_chunks; - - while (n0 < n1 - 1) { - n = (n0 + n1) / 2; - if (AVI->track[AVI->aptr].audio_index[n].tot > byte) - n1 = n; - else - n0 = n; - } - - AVI->track[AVI->aptr].audio_posc = n0; - AVI->track[AVI->aptr].audio_posb = - byte - AVI->track[AVI->aptr].audio_index[n0].tot; - - return 0; -} - -long AVI_read_audio(avi_t *AVI, char *audbuf, long bytes) { - long nr, pos, left, todo; - - if (AVI->mode == AVI_MODE_WRITE) { - AVI_errno = AVI_ERR_NOT_PERM; - return -1; - } - if (!AVI->track[AVI->aptr].audio_index) { - AVI_errno = AVI_ERR_NO_IDX; - return -1; - } - - nr = 0; /* total number of bytes read */ - - while (bytes > 0) { - left = AVI->track[AVI->aptr] - .audio_index[AVI->track[AVI->aptr].audio_posc] - .len - - AVI->track[AVI->aptr].audio_posb; - if (left == 0) { - if (AVI->track[AVI->aptr].audio_posc >= - AVI->track[AVI->aptr].audio_chunks - 1) - return nr; - AVI->track[AVI->aptr].audio_posc++; - AVI->track[AVI->aptr].audio_posb = 0; - continue; - } - if (bytes < left) - todo = bytes; - else - todo = left; - pos = AVI->track[AVI->aptr] - .audio_index[AVI->track[AVI->aptr].audio_posc] - .pos + - AVI->track[AVI->aptr].audio_posb; - lseek(AVI->fdes, pos, SEEK_SET); - if (avi_read(AVI->fdes, audbuf + nr, todo) != todo) { - AVI_errno = AVI_ERR_READ; - return -1; - } - bytes -= todo; - nr += todo; - AVI->track[AVI->aptr].audio_posb += todo; - } - - return nr; -} - -/* AVI_read_data: Special routine for reading the next audio or video chunk - without having an index of the file. */ - -int AVI_read_data(avi_t *AVI, char *vidbuf, long max_vidbuf, char *audbuf, - long max_audbuf, long *len) { - - /* - * Return codes: - * - * 1 = video data read - * 2 = audio data read - * 0 = reached EOF - * -1 = video buffer too small - * -2 = audio buffer too small - */ - - int n; - char data[8]; - - if (AVI->mode == AVI_MODE_WRITE) - return 0; - - while (1) { - /* Read tag and length */ - - if (avi_read(AVI->fdes, data, 8) != 8) - return 0; - - /* if we got a list tag, ignore it */ - - if (strncasecmp(data, "LIST", 4) == 0) { - lseek(AVI->fdes, 4, SEEK_CUR); - continue; - } - - n = PAD_EVEN(str2ulong((unsigned char *)data + 4)); - - if (strncasecmp(data, AVI->video_tag, 3) == 0) { - *len = n; - AVI->video_pos++; - if (n > max_vidbuf) { - lseek(AVI->fdes, n, SEEK_CUR); - return -1; - } - if (avi_read(AVI->fdes, vidbuf, n) != n) - return 0; - return 1; - } else if (strncasecmp(data, AVI->track[AVI->aptr].audio_tag, 4) == 0) { - *len = n; - if (n > max_audbuf) { - lseek(AVI->fdes, n, SEEK_CUR); - return -2; - } - if (avi_read(AVI->fdes, audbuf, n) != n) - return 0; - return 2; - break; - } else if (lseek(AVI->fdes, n, SEEK_CUR) < 0) - return 0; - } -} - -/* AVI_print_error: Print most recent error (similar to perror) */ - -char *(avi_errors[]) = { - /* 0 */ (char *)"avilib - No Error", - /* 1 */ (char *)"avilib - AVI file size limit reached", - /* 2 */ (char *)"avilib - Error opening AVI file", - /* 3 */ (char *)"avilib - Error reading from AVI file", - /* 4 */ (char *)"avilib - Error writing to AVI file", - /* 5 */ (char *)"avilib - Error writing index (file may still be useable)", - /* 6 */ (char *)"avilib - Error closing AVI file", - /* 7 */ (char *)"avilib - Operation (read/write) not permitted", - /* 8 */ (char *)"avilib - Out of memory (malloc failed)", - /* 9 */ (char *)"avilib - Not an AVI file", - /* 10 */ (char *)"avilib - AVI file has no header list (corrupted?)", - /* 11 */ (char *)"avilib - AVI file has no MOVI list (corrupted?)", - /* 12 */ (char *)"avilib - AVI file has no video data", - /* 13 */ (char *)"avilib - operation needs an index", - /* 14 */ (char *)"avilib - Unkown Error"}; -static int num_avi_errors = sizeof(avi_errors) / sizeof(char *); - -static char error_string[4096]; - -void AVI_print_error(char *str) { - int aerrno; - - aerrno = (AVI_errno >= 0 && AVI_errno < num_avi_errors) ? AVI_errno - : num_avi_errors - 1; - - fprintf(stderr, "%s: %s\n", str, avi_errors[aerrno]); - - /* for the following errors, perror should report a more detailed reason: */ - - if (AVI_errno == AVI_ERR_OPEN || AVI_errno == AVI_ERR_READ || - AVI_errno == AVI_ERR_WRITE || AVI_errno == AVI_ERR_WRITE_INDEX || - AVI_errno == AVI_ERR_CLOSE) { - perror("REASON"); - } -} - -char *AVI_strerror() { - int aerrno; - - aerrno = (AVI_errno >= 0 && AVI_errno < num_avi_errors) ? AVI_errno - : num_avi_errors - 1; - - if (AVI_errno == AVI_ERR_OPEN || AVI_errno == AVI_ERR_READ || - AVI_errno == AVI_ERR_WRITE || AVI_errno == AVI_ERR_WRITE_INDEX || - AVI_errno == AVI_ERR_CLOSE) { - sprintf(error_string, "%s - %s", avi_errors[aerrno], strerror(errno)); - return error_string; - } else { - return avi_errors[aerrno]; - } -} - -uint64_t AVI_max_size() { return ((uint64_t)AVI_MAX_LEN); } - -#ifdef __cplusplus -} -#endif diff --git a/examples/heartwall/AVI/avilib.h b/examples/heartwall/AVI/avilib.h deleted file mode 100644 index 57d2a97..0000000 --- a/examples/heartwall/AVI/avilib.h +++ /dev/null @@ -1,317 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -/* - * avilib.h - * - * Copyright (C) Thomas Östreich - June 2001 - * multiple audio track support Copyright (C) 2002 Thomas Östreich - * - * Original code: - * Copyright (C) 1999 Rainer Johanni - * - * This file is part of transcode, a linux video stream processing tool - * - * transcode is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * transcode is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - * - */ - -#include -#include -#include -#include -#include -// #include -#include -#include -#include -#include -#include - -#ifndef AVILIB_H -#define AVILIB_H - -#define AVI_MAX_TRACKS 8 - -typedef struct { - unsigned long key; - unsigned long pos; - unsigned long len; -} video_index_entry; - -typedef struct { - unsigned long pos; - unsigned long len; - unsigned long tot; -} audio_index_entry; - -typedef struct track_s { - - long a_fmt; /* Audio format, see #defines below */ - long a_chans; /* Audio channels, 0 for no audio */ - long a_rate; /* Rate in Hz */ - long a_bits; /* bits per audio sample */ - long mp3rate; /* mp3 bitrate kbs*/ - - long audio_strn; /* Audio stream number */ - long audio_bytes; /* Total number of bytes of audio data */ - long audio_chunks; /* Chunks of audio data in the file */ - - char audio_tag[4]; /* Tag of audio data */ - long audio_posc; /* Audio position: chunk */ - long audio_posb; /* Audio position: byte within chunk */ - - long a_codech_off; /* absolut offset of audio codec information */ - long a_codecf_off; /* absolut offset of audio codec information */ - - audio_index_entry *audio_index; - -} track_t; - -typedef struct { - - long fdes; /* File descriptor of AVI file */ - long mode; /* 0 for reading, 1 for writing */ - - long width; /* Width of a video frame */ - long height; /* Height of a video frame */ - double fps; /* Frames per second */ - char compressor[8]; /* Type of compressor, 4 bytes + padding for 0 byte */ - char compressor2[8]; /* Type of compressor, 4 bytes + padding for 0 byte */ - long video_strn; /* Video stream number */ - long video_frames; /* Number of video frames */ - char video_tag[4]; /* Tag of video data */ - long video_pos; /* Number of next frame to be read - (if index present) */ - - unsigned long max_len; /* maximum video chunk present */ - - track_t track[AVI_MAX_TRACKS]; // up to AVI_MAX_TRACKS audio tracks supported - - unsigned long pos; /* position in file */ - long n_idx; /* number of index entries actually filled */ - long max_idx; /* number of index entries actually allocated */ - - long v_codech_off; /* absolut offset of video codec (strh) info */ - long v_codecf_off; /* absolut offset of video codec (strf) info */ - - unsigned char (*idx)[16]; /* index entries (AVI idx1 tag) */ - video_index_entry *video_index; - - unsigned long last_pos; /* Position of last frame written */ - unsigned long last_len; /* Length of last frame written */ - int must_use_index; /* Flag if frames are duplicated */ - unsigned long movi_start; - - int anum; // total number of audio tracks - int aptr; // current audio working track - -} avi_t; - -#define AVI_MODE_WRITE 0 -#define AVI_MODE_READ 1 - -/* The error codes delivered by avi_open_input_file */ - -#define AVI_ERR_SIZELIM \ - 1 /* The write of the data would exceed \ - the maximum size of the AVI file. \ - This is more a warning than an \ - error since the file may be closed safely */ - -#define AVI_ERR_OPEN \ - 2 /* Error opening the AVI file - wrong path \ - name or file nor readable/writable \ - */ - -#define AVI_ERR_READ 3 /* Error reading from AVI File */ - -#define AVI_ERR_WRITE \ - 4 /* Error writing to AVI File, \ - disk full ??? */ - -#define AVI_ERR_WRITE_INDEX \ - 5 /* Could not write index to AVI file \ - during close, file may still be \ - usable */ - -#define AVI_ERR_CLOSE \ - 6 /* Could not write header to AVI file \ - or not truncate the file during \ - close, file is most probably corrupted */ - -#define AVI_ERR_NOT_PERM \ - 7 /* Operation not permitted: \ - trying to read from a file open \ - for writing or vice versa */ - -#define AVI_ERR_NO_MEM 8 /* malloc failed */ - -#define AVI_ERR_NO_AVI 9 /* Not an AVI file */ - -#define AVI_ERR_NO_HDRL \ - 10 /* AVI file has no has no header list, \ - corrupted ??? */ - -#define AVI_ERR_NO_MOVI \ - 11 /* AVI file has no has no MOVI list, \ - corrupted ??? */ - -#define AVI_ERR_NO_VIDS 12 /* AVI file contains no video data */ - -#define AVI_ERR_NO_IDX \ - 13 /* The file has been opened with \ - getIndex==0, but an operation has \ - been performed that needs an index */ - -/* Possible Audio formats */ - -#ifndef WAVE_FORMAT_PCM -#define WAVE_FORMAT_UNKNOWN (0x0000) -#define WAVE_FORMAT_PCM (0x0001) -#define WAVE_FORMAT_ADPCM (0x0002) -#define WAVE_FORMAT_IBM_CVSD (0x0005) -#define WAVE_FORMAT_ALAW (0x0006) -#define WAVE_FORMAT_MULAW (0x0007) -#define WAVE_FORMAT_OKI_ADPCM (0x0010) -#define WAVE_FORMAT_DVI_ADPCM (0x0011) -#define WAVE_FORMAT_DIGISTD (0x0015) -#define WAVE_FORMAT_DIGIFIX (0x0016) -#define WAVE_FORMAT_YAMAHA_ADPCM (0x0020) -#define WAVE_FORMAT_DSP_TRUESPEECH (0x0022) -#define WAVE_FORMAT_GSM610 (0x0031) -#define IBM_FORMAT_MULAW (0x0101) -#define IBM_FORMAT_ALAW (0x0102) -#define IBM_FORMAT_ADPCM (0x0103) -#endif - -avi_t *AVI_open_output_file(char *filename); -void AVI_set_video(avi_t *AVI, int width, int height, double fps, - char *compressor); -void AVI_set_audio(avi_t *AVI, int channels, long rate, int bits, int format, - long mp3rate); -int AVI_write_frame(avi_t *AVI, char *data, long bytes, int keyframe); -int AVI_dup_frame(avi_t *AVI); -int AVI_write_audio(avi_t *AVI, char *data, long bytes); -int AVI_append_audio(avi_t *AVI, char *data, long bytes); -long AVI_bytes_remain(avi_t *AVI); -int AVI_close(avi_t *AVI); -long AVI_bytes_written(avi_t *AVI); - -avi_t *AVI_open_input_file(char *filename, int getIndex); -avi_t *AVI_open_fd(int fd, int getIndex); -int avi_parse_input_file(avi_t *AVI, int getIndex); -long AVI_audio_mp3rate(avi_t *AVI); -long AVI_video_frames(avi_t *AVI); -int AVI_video_width(avi_t *AVI); -int AVI_video_height(avi_t *AVI); -double AVI_frame_rate(avi_t *AVI); -char *AVI_video_compressor(avi_t *AVI); - -int AVI_audio_channels(avi_t *AVI); -int AVI_audio_bits(avi_t *AVI); -int AVI_audio_format(avi_t *AVI); -long AVI_audio_rate(avi_t *AVI); -long AVI_audio_bytes(avi_t *AVI); -long AVI_audio_chunks(avi_t *AVI); - -long AVI_max_video_chunk(avi_t *AVI); - -long AVI_frame_size(avi_t *AVI, long frame); -long AVI_audio_size(avi_t *AVI, long frame); -int AVI_seek_start(avi_t *AVI); -int AVI_set_video_position(avi_t *AVI, long frame); -long AVI_get_video_position(avi_t *AVI, long frame); -long AVI_read_frame(avi_t *AVI, char *vidbuf, int *keyframe); - -int AVI_set_audio_position(avi_t *AVI, long byte); -int AVI_set_audio_bitrate(avi_t *AVI, long bitrate); - -long AVI_read_audio(avi_t *AVI, char *audbuf, long bytes); - -long AVI_audio_codech_offset(avi_t *AVI); -long AVI_audio_codecf_offset(avi_t *AVI); -long AVI_video_codech_offset(avi_t *AVI); -long AVI_video_codecf_offset(avi_t *AVI); - -int AVI_read_data(avi_t *AVI, char *vidbuf, long max_vidbuf, char *audbuf, - long max_audbuf, long *len); - -void AVI_print_error(char *str); -char *AVI_strerror(); -char *AVI_syserror(); - -int AVI_scan(char *name); -int AVI_dump(char *name, int mode); - -char *AVI_codec2str(short cc); -int AVI_file_check(char *import_file); - -void AVI_info(avi_t *avifile); -uint64_t AVI_max_size(); -int avi_update_header(avi_t *AVI); - -int AVI_set_audio_track(avi_t *AVI, int track); -int AVI_get_audio_track(avi_t *AVI); -int AVI_audio_tracks(avi_t *AVI); - -struct riff_struct { - unsigned char id[4]; /* RIFF */ - unsigned long len; - unsigned char wave_id[4]; /* WAVE */ -}; - -struct chunk_struct { - unsigned char id[4]; - unsigned long len; -}; - -struct common_struct { - unsigned short wFormatTag; - unsigned short wChannels; - unsigned long dwSamplesPerSec; - unsigned long dwAvgBytesPerSec; - unsigned short wBlockAlign; - unsigned short wBitsPerSample; /* Only for PCM */ -}; - -struct wave_header { - struct riff_struct riff; - struct chunk_struct format; - struct common_struct common; - struct chunk_struct data; -}; - -struct AVIStreamHeader { - long fccType; - long fccHandler; - long dwFlags; - long dwPriority; - long dwInitialFrames; - long dwScale; - long dwRate; - long dwStart; - long dwLength; - long dwSuggestedBufferSize; - long dwQuality; - long dwSampleSize; -}; - -#endif - -#ifdef __cplusplus -} -#endif diff --git a/examples/heartwall/AVI/avimod.c b/examples/heartwall/AVI/avimod.c deleted file mode 100644 index 64d1edb..0000000 --- a/examples/heartwall/AVI/avimod.c +++ /dev/null @@ -1,130 +0,0 @@ -// #ifdef __cplusplus -// extern "C" { -// #endif - -//=============================================================================================================================================================================================================== -// DEFINE / INCLUDE -//=============================================================================================================================================================================================================== -#include "avimod.h" - -//=============================================================================================================================================================================================================== -// FUNCTIONS -//=============================================================================================================================================================================================================== - -// Flips the specified image and crops it to the specified dimensions -// If scaled == true, all values are scaled to the range [0.0, 1.0 -fp *chop_flip_image(char *image, int height, int width, int cropped, int scaled, - int converted) { - - // fixed dimensions for cropping or not cropping, square vertices starting - // from initial point in top left corner going down and right - int top; - int bottom; - int left; - int right; - if (cropped == 1) { - top = 0; - bottom = 0; - left = 0; - right = 0; - } else { - top = 0; - bottom = height - 1; - left = 0; - right = width - 1; - } - - // dimensions of new cropped image - int height_new = bottom - top + 1; - int width_new = right - left + 1; - - // counters - int i, j; - - // allocate memory for cropped/flipped frame - fp *result = (fp *)malloc(height_new * width_new * sizeof(fp)); - - // crop/flip and scale frame - fp temp; - if (scaled) { - fp scale = 1.0 / 255.0; - for (i = 0; i < height_new; i++) { // rows - for (j = 0; j < width_new; j++) { // colums - temp = - (fp)image[((height - 1 - (i + top)) * width) + (j + left)] * scale; - if (temp < 0) { - result[i * width_new + j] = temp + 256; - } else { - result[i * width_new + j] = temp; - } - } - } - } else { - for (i = 0; i < height_new; i++) { // rows - for (j = 0; j < width_new; j++) { // colums - temp = (fp)image[((height - 1 - (i + top)) * width) + (j + left)]; - if (temp < 0) { - result[i * width_new + j] = temp + 256; - } else { - result[i * width_new + j] = temp; - } - } - } - } - - // convert storage method (from row-major to column-major) - fp *result_converted = (fp *)malloc(height_new * width_new * sizeof(fp)); - if (converted == 1) { - for (i = 0; i < width_new; i++) { // rows - for (j = 0; j < height_new; j++) { // colums - result_converted[i * height_new + j] = result[j * width_new + i]; - } - } - } else { - result_converted = result; - } - free(result); - - // return - return result_converted; -} - -// Returns the specified frame from the specified video file -// If cropped == true, the frame is cropped to pre-determined dimensions -// (hardcoded to the boundaries of the blood vessel in the test video) -// If scaled == true, all values are scaled to the range [0.0, 1.0] -fp *get_frame(avi_t *cell_file, int frame_num, int cropped, int scaled, - int converted) { - - // variable - int dummy; - int width = AVI_video_width(cell_file); - int height = AVI_video_height(cell_file); - int status; - - // There are 600 frames in this file (i.e. frame_num = 600 causes an error) - AVI_set_video_position(cell_file, frame_num); - - // Read in the frame from the AVI - char *image_buf = (char *)malloc(width * height * sizeof(char)); - status = AVI_read_frame(cell_file, image_buf, &dummy); - if (status == -1) { - AVI_print_error((char *)"Error with AVI_read_frame"); - exit(-1); - } - - // The image is read in upside-down, so we need to flip it - fp *image_chopped; - image_chopped = - chop_flip_image(image_buf, height, width, cropped, scaled, converted); - - // free image buffer - free(image_buf); - - // return - return image_chopped; -} - -// #ifdef __cplusplus -// } -// #endif diff --git a/examples/heartwall/AVI/avimod.h b/examples/heartwall/AVI/avimod.h deleted file mode 100644 index f912014..0000000 --- a/examples/heartwall/AVI/avimod.h +++ /dev/null @@ -1,24 +0,0 @@ -#ifdef __cplusplus -extern "C" { -#endif - -//=============================================================================================================================================================================================================== -// DEFINE / INCLUDE -//=============================================================================================================================================================================================================== -#define fp float - -#include "avilib.h" - -//=============================================================================================================================================================================================================== -// DEFINE / INCLUDE -//=============================================================================================================================================================================================================== - -fp *chop_flip_image(char *image, int height, int width, int cropped, int scaled, - int converted); - -fp *get_frame(avi_t *cell_file, int frame_num, int cropped, int scaled, - int converted); - -#ifdef __cplusplus -} -#endif diff --git a/examples/heartwall/define.c b/examples/heartwall/define.c deleted file mode 100644 index 6603910..0000000 --- a/examples/heartwall/define.c +++ /dev/null @@ -1,396 +0,0 @@ -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== -// DEFINE / INCLUDE -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== - -#define fp float - -/* #define NUMBER_THREADS 512 */ -#ifdef RD_WG_SIZE_0_0 -#define NUMBER_THREADS RD_WG_SIZE_0_0 -#elif defined(RD_WG_SIZE_0) -#define NUMBER_THREADS RD_WG_SIZE_0 -#elif defined(RD_WG_SIZE) -#define NUMBER_THREADS RD_WG_SIZE -#else -#define NUMBER_THREADS 256 -#endif - -#define ENDO_POINTS 20 -#define EPI_POINTS 31 -#define ALL_POINTS 51 - -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== -// PARAMS_COMMON_CHANGE STRUCT -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== - -typedef struct params_common_change { - - //====================================================================================================================================================== - // FRAME - //====================================================================================================================================================== - - fp *d_frame; - int frame_no; - -} params_common_change; - -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== -// PARAMS_COMMON STRUCTURE -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== - -typedef struct params_common { - - //====================================================================================================================================================== - // HARDCODED INPUTS FROM MATLAB - //====================================================================================================================================================== - - //==================================================================================================== - // CONSTANTS - //==================================================================================================== - - int sSize; - int tSize; - int maxMove; - fp alpha; - - //==================================================================================================== - // FRAME - //==================================================================================================== - - int no_frames; - int frame_rows; - int frame_cols; - int frame_elem; - int frame_mem; - - //==================================================================================================== - // ENDO POINTS - //==================================================================================================== - - int endoPoints; - int endo_mem; - - int *endoRow; - int *endoCol; - int *tEndoRowLoc; - int *tEndoColLoc; - - int *d_endoRow; - int *d_endoCol; - int *d_tEndoRowLoc; - int *d_tEndoColLoc; - - fp *d_endoT; - - //==================================================================================================== - // EPI POINTS - //==================================================================================================== - int epiPoints; - int epi_mem; - - int *epiRow; - int *epiCol; - int *tEpiRowLoc; - int *tEpiColLoc; - - int *d_epiRow; - int *d_epiCol; - int *d_tEpiRowLoc; - int *d_tEpiColLoc; - - fp *d_epiT; - - //==================================================================================================== - // ALL POINTS - //==================================================================================================== - - int allPoints; - - //====================================================================================================================================================== - // RIGHT TEMPLATE FROM TEMPLATE ARRAY - //====================================================================================================================================================== - - int in_rows; - int in_cols; - int in_elem; - int in_mem; - - //====================================================================================================================================================== - // AREA AROUND POINT FROM FRAME - //====================================================================================================================================================== - - int in2_rows; - int in2_cols; - int in2_elem; - int in2_mem; - - //====================================================================================================================================================== - // CONVOLUTION - //====================================================================================================================================================== - - int conv_rows; - int conv_cols; - int conv_elem; - int conv_mem; - int ioffset; - int joffset; - - //====================================================================================================================================================== - // CUMULATIVE SUM 1 - //====================================================================================================================================================== - - //==================================================================================================== - // PAD ARRAY, VERTICAL CUMULATIVE SUM - //==================================================================================================== - - int in2_pad_add_rows; - int in2_pad_add_cols; - int in2_pad_cumv_rows; - int in2_pad_cumv_cols; - int in2_pad_cumv_elem; - int in2_pad_cumv_mem; - - //==================================================================================================== - // SELECTION - //==================================================================================================== - - int in2_pad_cumv_sel_rows; - int in2_pad_cumv_sel_cols; - int in2_pad_cumv_sel_elem; - int in2_pad_cumv_sel_mem; - int in2_pad_cumv_sel_rowlow; - int in2_pad_cumv_sel_rowhig; - int in2_pad_cumv_sel_collow; - int in2_pad_cumv_sel_colhig; - - //==================================================================================================== - // SELECTION 2, SUBTRACTION, HORIZONTAL CUMULATIVE SUM - //==================================================================================================== - - int in2_pad_cumv_sel2_rowlow; - int in2_pad_cumv_sel2_rowhig; - int in2_pad_cumv_sel2_collow; - int in2_pad_cumv_sel2_colhig; - int in2_sub_cumh_rows; - int in2_sub_cumh_cols; - int in2_sub_cumh_elem; - int in2_sub_cumh_mem; - - //==================================================================================================== - // SELECTION - //==================================================================================================== - - int in2_sub_cumh_sel_rows; - int in2_sub_cumh_sel_cols; - int in2_sub_cumh_sel_elem; - int in2_sub_cumh_sel_mem; - int in2_sub_cumh_sel_rowlow; - int in2_sub_cumh_sel_rowhig; - int in2_sub_cumh_sel_collow; - int in2_sub_cumh_sel_colhig; - - //==================================================================================================== - // SELECTION 2, SUBTRACTION - //==================================================================================================== - - int in2_sub_cumh_sel2_rowlow; - int in2_sub_cumh_sel2_rowhig; - int in2_sub_cumh_sel2_collow; - int in2_sub_cumh_sel2_colhig; - int in2_sub2_rows; - int in2_sub2_cols; - int in2_sub2_elem; - int in2_sub2_mem; - - //====================================================================================================================================================== - // CUMULATIVE SUM 2 - //====================================================================================================================================================== - - //==================================================================================================== - // MULTIPLICATION - //==================================================================================================== - - int in2_sqr_rows; - int in2_sqr_cols; - int in2_sqr_elem; - int in2_sqr_mem; - - //==================================================================================================== - // SELECTION 2, SUBTRACTION - //==================================================================================================== - - int in2_sqr_sub2_rows; - int in2_sqr_sub2_cols; - int in2_sqr_sub2_elem; - int in2_sqr_sub2_mem; - - //====================================================================================================================================================== - // FINAL - //====================================================================================================================================================== - - int in_sqr_rows; - int in_sqr_cols; - int in_sqr_elem; - int in_sqr_mem; - - //====================================================================================================================================================== - // TEMPLATE MASK CREATE - //====================================================================================================================================================== - - int tMask_rows; - int tMask_cols; - int tMask_elem; - int tMask_mem; - - //====================================================================================================================================================== - // POINT MASK INITIALIZE - //====================================================================================================================================================== - - int mask_rows; - int mask_cols; - int mask_elem; - int mask_mem; - - //====================================================================================================================================================== - // MASK CONVOLUTION - //====================================================================================================================================================== - - int mask_conv_rows; - int mask_conv_cols; - int mask_conv_elem; - int mask_conv_mem; - int mask_conv_ioffset; - int mask_conv_joffset; - -} params_common; - -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== -// PARAMS_UNIQUE STRUCTURE -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== - -typedef struct params_unique { - - //====================================================================================================================================================== - // POINT NUMBER - //====================================================================================================================================================== - - int *d_Row; - int *d_Col; - int *d_tRowLoc; - int *d_tColLoc; - fp *d_T; - - //====================================================================================================================================================== - // POINT NUMBER - //====================================================================================================================================================== - - int point_no; - - //====================================================================================================================================================== - // RIGHT TEMPLATE FROM TEMPLATE ARRAY - //====================================================================================================================================================== - - int in_pointer; - - //====================================================================================================================================================== - // AREA AROUND POINT FROM FRAME - //====================================================================================================================================================== - - fp *d_in2; - - //====================================================================================================================================================== - // CONVOLUTION - //====================================================================================================================================================== - - fp *d_conv; - fp *d_in_mod; - - //====================================================================================================================================================== - // CUMULATIVE SUM - //====================================================================================================================================================== - - //==================================================================================================== - // PAD ARRAY, VERTICAL CUMULATIVE SUM - //==================================================================================================== - - fp *d_in2_pad_cumv; - - //==================================================================================================== - // SELECTION - //==================================================================================================== - - fp *d_in2_pad_cumv_sel; - - //==================================================================================================== - // SELECTION 2, SUBTRACTION, HORIZONTAL CUMULATIVE SUM - //==================================================================================================== - - fp *d_in2_sub_cumh; - - //==================================================================================================== - // SELECTION - //==================================================================================================== - - fp *d_in2_sub_cumh_sel; - - //==================================================================================================== - // SELECTION 2, SUBTRACTION - //==================================================================================================== - - fp *d_in2_sub2; - - //====================================================================================================================================================== - // CUMULATIVE SUM 2 - //====================================================================================================================================================== - - //==================================================================================================== - // MULTIPLICATION - //==================================================================================================== - - fp *d_in2_sqr; - - //==================================================================================================== - // SELECTION 2, SUBTRACTION - //==================================================================================================== - - fp *d_in2_sqr_sub2; - - //====================================================================================================================================================== - // FINAL - //====================================================================================================================================================== - - fp *d_in_sqr; - - //====================================================================================================================================================== - // TEMPLATE MASK - //====================================================================================================================================================== - - fp *d_tMask; - - //====================================================================================================================================================== - // POINT MASK INITIALIZE - //====================================================================================================================================================== - - fp *d_mask; - - //====================================================================================================================================================== - // MASK CONVOLUTION - //====================================================================================================================================================== - - fp *d_mask_conv; - -} params_unique; - -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== -// END OF STRUCTURE -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== diff --git a/examples/heartwall/kernel.cu b/examples/heartwall/kernel.cu deleted file mode 100755 index b9d1945..0000000 --- a/examples/heartwall/kernel.cu +++ /dev/null @@ -1,1239 +0,0 @@ -__global__ void kernel() { - - //====================================================================================================================================================== - // COMMON VARIABLES - //====================================================================================================================================================== - - fp *d_in; - int rot_row; - int rot_col; - int in2_rowlow; - int in2_collow; - int ic; - int jc; - int jp1; - int ja1, ja2; - int ip1; - int ia1, ia2; - int ja, jb; - int ia, ib; - float s; - int i; - int j; - int row; - int col; - int ori_row; - int ori_col; - int position; - float sum; - int pos_ori; - float temp; - float temp2; - int location; - int cent; - int tMask_row; - int tMask_col; - float largest_value_current = 0; - float largest_value = 0; - int largest_coordinate_current = 0; - int largest_coordinate = 0; - float fin_max_val = 0; - int fin_max_coo = 0; - int largest_row; - int largest_col; - int offset_row; - int offset_col; - float in_partial_sum[51]; // WATCH THIS !!! HARDCODED VALUE - float in_sqr_partial_sum[51]; // WATCH THIS !!! HARDCODED VALUE - float in_final_sum; - float in_sqr_final_sum; - float mean; - float mean_sqr; - float variance; - float deviation; - float denomT; - float par_max_val[131]; // WATCH THIS !!! HARDCODED VALUE - int par_max_coo[131]; // WATCH THIS !!! HARDCODED VALUE - int pointer; - float d_in_mod_temp[2601]; - int ori_pointer; - int loc_pointer; - - //====================================================================================================================================================== - // THREAD PARAMETERS - //====================================================================================================================================================== - - int bx = blockIdx.x; // get current horizontal block index (0-n) - int tx = threadIdx.x; // get current horizontal thread index (0-n) - int ei_new = tx; - - //=============================================================================================================================================================================================================== - //=============================================================================================================================================================================================================== - // GENERATE TEMPLATE - //=============================================================================================================================================================================================================== - //=============================================================================================================================================================================================================== - printf("phase1\n"); - // generate templates based on the first frame only - //====================================================================================================================================================== - // GET POINTER TO TEMPLATE FOR THE POINT - //====================================================================================================================================================== - - // pointers to: current template for current point - d_in = &d_unique[bx].d_T[d_unique[bx].in_pointer]; - - //=============================================================================================================================================================================================================== - //=============================================================================================================================================================================================================== - // PROCESS POINTS - //=============================================================================================================================================================================================================== - //=============================================================================================================================================================================================================== - printf("phase2\n"); - // process points in all frames except for the first one - - //====================================================================================================================================================== - // SELECTION - //====================================================================================================================================================== - - in2_rowlow = - d_unique[bx].d_Row[d_unique[bx].point_no] - d_common.sSize; // (1 to n+1) - in2_collow = d_unique[bx].d_Col[d_unique[bx].point_no] - d_common.sSize; - - // work - ei_new = tx; - while (ei_new < d_common.in2_elem) { - - // figure out row/col location in new matrix - row = (ei_new + 1) % d_common.in2_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_rows == 0) { - row = d_common.in2_rows - 1; - col = col - 1; - } - - // figure out corresponding location in old matrix and copy values to new - // matrix - ori_row = row + in2_rowlow - 1; - ori_col = col + in2_collow - 1; - d_unique[bx].d_in2[ei_new] = - d_common_change.d_frame[ori_col * d_common.frame_rows + ori_row]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //====================================================================================================================================================== - // SYNCHRONIZE THREADS - //====================================================================================================================================================== - - __syncthreads(); - - //====================================================================================================================================================== - // CONVOLUTION - //====================================================================================================================================================== - - //==================================================================================================== - // ROTATION - //==================================================================================================== - - // variables - d_in = &d_unique[bx].d_T[d_unique[bx].in_pointer]; - - // work - ei_new = tx; - while (ei_new < d_common.in_elem) { - - // figure out row/col location in padded array - row = (ei_new + 1) % d_common.in_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in_rows == 0) { - row = d_common.in_rows - 1; - col = col - 1; - } - - // execution - rot_row = (d_common.in_rows - 1) - row; - rot_col = (d_common.in_rows - 1) - col; - d_in_mod_temp[ei_new] = d_in[rot_col * d_common.in_rows + rot_row]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // ACTUAL CONVOLUTION - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.conv_elem) { - - // figure out row/col location in array - ic = (ei_new + 1) % d_common.conv_rows; // (1-n) - jc = (ei_new + 1) / d_common.conv_rows + 1; // (1-n) - if ((ei_new + 1) % d_common.conv_rows == 0) { - ic = d_common.conv_rows; - jc = jc - 1; - } - - // - j = jc + d_common.joffset; - jp1 = j + 1; - if (d_common.in2_cols < jp1) { - ja1 = jp1 - d_common.in2_cols; - } else { - ja1 = 1; - } - if (d_common.in_cols < j) { - ja2 = d_common.in_cols; - } else { - ja2 = j; - } - - i = ic + d_common.ioffset; - ip1 = i + 1; - - if (d_common.in2_rows < ip1) { - ia1 = ip1 - d_common.in2_rows; - } else { - ia1 = 1; - } - if (d_common.in_rows < i) { - ia2 = d_common.in_rows; - } else { - ia2 = i; - } - - s = 0; - - for (ja = ja1; ja <= ja2; ja++) { - jb = jp1 - ja; - for (ia = ia1; ia <= ia2; ia++) { - ib = ip1 - ia; - s = s + d_in_mod_temp[d_common.in_rows * (ja - 1) + ia - 1] * - d_unique[bx].d_in2[d_common.in2_rows * (jb - 1) + ib - 1]; - } - } - - // d_unique[bx].d_conv[d_common.conv_rows*(jc-1)+ic-1] = s; - d_unique[bx].d_conv[ei_new] = s; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //====================================================================================================================================================== - // SYNCHRONIZE THREADS - //====================================================================================================================================================== - - __syncthreads(); - - //====================================================================================================================================================== - // CUMULATIVE SUM - //====================================================================================================================================================== - - //==================================================================================================== - // PAD ARRAY, VERTICAL CUMULATIVE SUM - //==================================================================================================== - - //================================================== - // PADD ARRAY - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_pad_cumv_elem) { - - // figure out row/col location in padded array - row = (ei_new + 1) % d_common.in2_pad_cumv_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_pad_cumv_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_pad_cumv_rows == 0) { - row = d_common.in2_pad_cumv_rows - 1; - col = col - 1; - } - - // execution - if (row > (d_common.in2_pad_add_rows - - 1) && // do if has numbers in original array - row < (d_common.in2_pad_add_rows + d_common.in2_rows) && - col > (d_common.in2_pad_add_cols - 1) && - col < (d_common.in2_pad_add_cols + d_common.in2_cols)) { - ori_row = row - d_common.in2_pad_add_rows; - ori_col = col - d_common.in2_pad_add_cols; - d_unique[bx].d_in2_pad_cumv[ei_new] = - d_unique[bx].d_in2[ori_col * d_common.in2_rows + ori_row]; - } else { // do if otherwise - d_unique[bx].d_in2_pad_cumv[ei_new] = 0; - } - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //================================================== - // SYNCHRONIZE THREADS - //================================================== - - __syncthreads(); - - //================================================== - // VERTICAL CUMULATIVE SUM - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_pad_cumv_cols) { - - // figure out column position - pos_ori = ei_new * d_common.in2_pad_cumv_rows; - - // variables - sum = 0; - - // loop through all rows - for (position = pos_ori; position < pos_ori + d_common.in2_pad_cumv_rows; - position = position + 1) { - d_unique[bx].d_in2_pad_cumv[position] = - d_unique[bx].d_in2_pad_cumv[position] + sum; - sum = d_unique[bx].d_in2_pad_cumv[position]; - } - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // SELECTION - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_pad_cumv_sel_elem) { - - // figure out row/col location in new matrix - row = (ei_new + 1) % d_common.in2_pad_cumv_sel_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_pad_cumv_sel_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_pad_cumv_sel_rows == 0) { - row = d_common.in2_pad_cumv_sel_rows - 1; - col = col - 1; - } - - // figure out corresponding location in old matrix and copy values to new - // matrix - ori_row = row + d_common.in2_pad_cumv_sel_rowlow - 1; - ori_col = col + d_common.in2_pad_cumv_sel_collow - 1; - d_unique[bx].d_in2_pad_cumv_sel[ei_new] = - d_unique[bx] - .d_in2_pad_cumv[ori_col * d_common.in2_pad_cumv_rows + ori_row]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // SELECTION 2, SUBTRACTION, HORIZONTAL CUMULATIVE SUM - //==================================================================================================== - - //================================================== - // SELECTION 2 - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub_cumh_elem) { - - // figure out row/col location in new matrix - row = (ei_new + 1) % d_common.in2_sub_cumh_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_sub_cumh_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_sub_cumh_rows == 0) { - row = d_common.in2_sub_cumh_rows - 1; - col = col - 1; - } - - // figure out corresponding location in old matrix and copy values to new - // matrix - ori_row = row + d_common.in2_pad_cumv_sel2_rowlow - 1; - ori_col = col + d_common.in2_pad_cumv_sel2_collow - 1; - d_unique[bx].d_in2_sub_cumh[ei_new] = - d_unique[bx] - .d_in2_pad_cumv[ori_col * d_common.in2_pad_cumv_rows + ori_row]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //================================================== - // SYNCHRONIZE THREADS - //================================================== - - __syncthreads(); - - //================================================== - // SUBTRACTION - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub_cumh_elem) { - - // subtract - d_unique[bx].d_in2_sub_cumh[ei_new] = - d_unique[bx].d_in2_pad_cumv_sel[ei_new] - - d_unique[bx].d_in2_sub_cumh[ei_new]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //================================================== - // SYNCHRONIZE THREADS - //================================================== - - __syncthreads(); - - //================================================== - // HORIZONTAL CUMULATIVE SUM - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub_cumh_rows) { - - // figure out row position - pos_ori = ei_new; - - // variables - sum = 0; - - // loop through all rows - for (position = pos_ori; position < pos_ori + d_common.in2_sub_cumh_elem; - position = position + d_common.in2_sub_cumh_rows) { - d_unique[bx].d_in2_sub_cumh[position] = - d_unique[bx].d_in2_sub_cumh[position] + sum; - sum = d_unique[bx].d_in2_sub_cumh[position]; - } - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // SELECTION - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub_cumh_sel_elem) { - - // figure out row/col location in new matrix - row = (ei_new + 1) % d_common.in2_sub_cumh_sel_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_sub_cumh_sel_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_sub_cumh_sel_rows == 0) { - row = d_common.in2_sub_cumh_sel_rows - 1; - col = col - 1; - } - - // figure out corresponding location in old matrix and copy values to new - // matrix - ori_row = row + d_common.in2_sub_cumh_sel_rowlow - 1; - ori_col = col + d_common.in2_sub_cumh_sel_collow - 1; - d_unique[bx].d_in2_sub_cumh_sel[ei_new] = - d_unique[bx] - .d_in2_sub_cumh[ori_col * d_common.in2_sub_cumh_rows + ori_row]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // SELECTION 2, SUBTRACTION - //==================================================================================================== - - //================================================== - // SELECTION 2 - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub2_elem) { - - // figure out row/col location in new matrix - row = (ei_new + 1) % d_common.in2_sub2_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_sub2_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_sub2_rows == 0) { - row = d_common.in2_sub2_rows - 1; - col = col - 1; - } - - // figure out corresponding location in old matrix and copy values to new - // matrix - ori_row = row + d_common.in2_sub_cumh_sel2_rowlow - 1; - ori_col = col + d_common.in2_sub_cumh_sel2_collow - 1; - d_unique[bx].d_in2_sub2[ei_new] = - d_unique[bx] - .d_in2_sub_cumh[ori_col * d_common.in2_sub_cumh_rows + ori_row]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //================================================== - // SYNCHRONIZE THREADS - //================================================== - - __syncthreads(); - - //================================================== - // SUBTRACTION - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub2_elem) { - - // subtract - d_unique[bx].d_in2_sub2[ei_new] = d_unique[bx].d_in2_sub_cumh_sel[ei_new] - - d_unique[bx].d_in2_sub2[ei_new]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //====================================================================================================================================================== - // SYNCHRONIZE THREADS - //====================================================================================================================================================== - - __syncthreads(); - - //====================================================================================================================================================== - // CUMULATIVE SUM 2 - //====================================================================================================================================================== - - //==================================================================================================== - // MULTIPLICATION - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sqr_elem) { - - temp = d_unique[bx].d_in2[ei_new]; - d_unique[bx].d_in2_sqr[ei_new] = temp * temp; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // PAD ARRAY, VERTICAL CUMULATIVE SUM - //==================================================================================================== - - //================================================== - // PAD ARRAY - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_pad_cumv_elem) { - - // figure out row/col location in padded array - row = (ei_new + 1) % d_common.in2_pad_cumv_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_pad_cumv_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_pad_cumv_rows == 0) { - row = d_common.in2_pad_cumv_rows - 1; - col = col - 1; - } - - // execution - if (row > (d_common.in2_pad_add_rows - - 1) && // do if has numbers in original array - row < (d_common.in2_pad_add_rows + d_common.in2_sqr_rows) && - col > (d_common.in2_pad_add_cols - 1) && - col < (d_common.in2_pad_add_cols + d_common.in2_sqr_cols)) { - ori_row = row - d_common.in2_pad_add_rows; - ori_col = col - d_common.in2_pad_add_cols; - d_unique[bx].d_in2_pad_cumv[ei_new] = - d_unique[bx].d_in2_sqr[ori_col * d_common.in2_sqr_rows + ori_row]; - } else { // do if otherwise - d_unique[bx].d_in2_pad_cumv[ei_new] = 0; - } - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //================================================== - // SYNCHRONIZE THREADS - //================================================== - - __syncthreads(); - - //================================================== - // VERTICAL CUMULATIVE SUM - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_pad_cumv_cols) { - - // figure out column position - pos_ori = ei_new * d_common.in2_pad_cumv_rows; - - // variables - sum = 0; - - // loop through all rows - for (position = pos_ori; position < pos_ori + d_common.in2_pad_cumv_rows; - position = position + 1) { - d_unique[bx].d_in2_pad_cumv[position] = - d_unique[bx].d_in2_pad_cumv[position] + sum; - sum = d_unique[bx].d_in2_pad_cumv[position]; - } - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // SELECTION - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_pad_cumv_sel_elem) { - - // figure out row/col location in new matrix - row = (ei_new + 1) % d_common.in2_pad_cumv_sel_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_pad_cumv_sel_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_pad_cumv_sel_rows == 0) { - row = d_common.in2_pad_cumv_sel_rows - 1; - col = col - 1; - } - - // figure out corresponding location in old matrix and copy values to new - // matrix - ori_row = row + d_common.in2_pad_cumv_sel_rowlow - 1; - ori_col = col + d_common.in2_pad_cumv_sel_collow - 1; - d_unique[bx].d_in2_pad_cumv_sel[ei_new] = - d_unique[bx] - .d_in2_pad_cumv[ori_col * d_common.in2_pad_cumv_rows + ori_row]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // SELECTION 2, SUBTRACTION, HORIZONTAL CUMULATIVE SUM - //==================================================================================================== - - //================================================== - // SELECTION 2 - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub_cumh_elem) { - - // figure out row/col location in new matrix - row = (ei_new + 1) % d_common.in2_sub_cumh_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_sub_cumh_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_sub_cumh_rows == 0) { - row = d_common.in2_sub_cumh_rows - 1; - col = col - 1; - } - - // figure out corresponding location in old matrix and copy values to new - // matrix - ori_row = row + d_common.in2_pad_cumv_sel2_rowlow - 1; - ori_col = col + d_common.in2_pad_cumv_sel2_collow - 1; - d_unique[bx].d_in2_sub_cumh[ei_new] = - d_unique[bx] - .d_in2_pad_cumv[ori_col * d_common.in2_pad_cumv_rows + ori_row]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //================================================== - // SYNCHRONIZE THREADS - //================================================== - - __syncthreads(); - - //================================================== - // SUBTRACTION - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub_cumh_elem) { - - // subtract - d_unique[bx].d_in2_sub_cumh[ei_new] = - d_unique[bx].d_in2_pad_cumv_sel[ei_new] - - d_unique[bx].d_in2_sub_cumh[ei_new]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //================================================== - // HORIZONTAL CUMULATIVE SUM - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub_cumh_rows) { - - // figure out row position - pos_ori = ei_new; - - // variables - sum = 0; - - // loop through all rows - for (position = pos_ori; position < pos_ori + d_common.in2_sub_cumh_elem; - position = position + d_common.in2_sub_cumh_rows) { - d_unique[bx].d_in2_sub_cumh[position] = - d_unique[bx].d_in2_sub_cumh[position] + sum; - sum = d_unique[bx].d_in2_sub_cumh[position]; - } - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // SELECTION - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub_cumh_sel_elem) { - - // figure out row/col location in new matrix - row = (ei_new + 1) % d_common.in2_sub_cumh_sel_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_sub_cumh_sel_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_sub_cumh_sel_rows == 0) { - row = d_common.in2_sub_cumh_sel_rows - 1; - col = col - 1; - } - - // figure out corresponding location in old matrix and copy values to new - // matrix - ori_row = row + d_common.in2_sub_cumh_sel_rowlow - 1; - ori_col = col + d_common.in2_sub_cumh_sel_collow - 1; - d_unique[bx].d_in2_sub_cumh_sel[ei_new] = - d_unique[bx] - .d_in2_sub_cumh[ori_col * d_common.in2_sub_cumh_rows + ori_row]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // SELECTION 2, SUBTRACTION - //==================================================================================================== - - //================================================== - // SELECTION 2 - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub2_elem) { - - // figure out row/col location in new matrix - row = (ei_new + 1) % d_common.in2_sub2_rows - 1; // (0-n) row - col = (ei_new + 1) / d_common.in2_sub2_rows + 1 - 1; // (0-n) column - if ((ei_new + 1) % d_common.in2_sub2_rows == 0) { - row = d_common.in2_sub2_rows - 1; - col = col - 1; - } - - // figure out corresponding location in old matrix and copy values to new - // matrix - ori_row = row + d_common.in2_sub_cumh_sel2_rowlow - 1; - ori_col = col + d_common.in2_sub_cumh_sel2_collow - 1; - d_unique[bx].d_in2_sqr_sub2[ei_new] = - d_unique[bx] - .d_in2_sub_cumh[ori_col * d_common.in2_sub_cumh_rows + ori_row]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //================================================== - // SYNCHRONIZE THREADS - //================================================== - - __syncthreads(); - - //================================================== - // SUBTRACTION - //================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub2_elem) { - - // subtract - d_unique[bx].d_in2_sqr_sub2[ei_new] = - d_unique[bx].d_in2_sub_cumh_sel[ei_new] - - d_unique[bx].d_in2_sqr_sub2[ei_new]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //====================================================================================================================================================== - // SYNCHRONIZE THREADS - //====================================================================================================================================================== - - __syncthreads(); - - //====================================================================================================================================================== - // FINAL - //====================================================================================================================================================== - - //==================================================================================================== - // DENOMINATOR A SAVE RESULT IN CUMULATIVE SUM A2 - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub2_elem) { - - temp = d_unique[bx].d_in2_sub2[ei_new]; - temp2 = - d_unique[bx].d_in2_sqr_sub2[ei_new] - (temp * temp / d_common.in_elem); - if (temp2 < 0) { - temp2 = 0; - } - d_unique[bx].d_in2_sqr_sub2[ei_new] = sqrt(temp2); - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // MULTIPLICATION - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in_sqr_elem) { - - temp = d_in[ei_new]; - d_unique[bx].d_in_sqr[ei_new] = temp * temp; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // IN SUM - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in_cols) { - - sum = 0; - for (i = 0; i < d_common.in_rows; i++) { - - sum = sum + d_in[ei_new * d_common.in_rows + i]; - } - in_partial_sum[ei_new] = sum; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // IN_SQR SUM - //==================================================================================================== - - ei_new = tx; - while (ei_new < d_common.in_sqr_rows) { - - sum = 0; - for (i = 0; i < d_common.in_sqr_cols; i++) { - - sum = sum + d_unique[bx].d_in_sqr[ei_new + d_common.in_sqr_rows * i]; - } - in_sqr_partial_sum[ei_new] = sum; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // FINAL SUMMATION - //==================================================================================================== - - in_final_sum = 0; - for (i = 0; i < d_common.in_cols; i++) { - in_final_sum = in_final_sum + in_partial_sum[i]; - } - - { - - in_sqr_final_sum = 0; - for (i = 0; i < d_common.in_sqr_cols; i++) { - in_sqr_final_sum = in_sqr_final_sum + in_sqr_partial_sum[i]; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // DENOMINATOR T - //==================================================================================================== - - mean = in_final_sum / - d_common.in_elem; // gets mean (average) value of element in ROI - mean_sqr = mean * mean; - variance = (in_sqr_final_sum / d_common.in_elem) - - mean_sqr; // gets variance of ROI - deviation = sqrt(variance); // gets standard deviation of ROI - - denomT = sqrt(float(d_common.in_elem - 1)) * deviation; - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // DENOMINATOR SAVE RESULT IN CUMULATIVE SUM A2 - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub2_elem) { - - d_unique[bx].d_in2_sqr_sub2[ei_new] = - d_unique[bx].d_in2_sqr_sub2[ei_new] * denomT; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // NUMERATOR SAVE RESULT IN CONVOLUTION - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.conv_elem) { - - d_unique[bx].d_conv[ei_new] = - d_unique[bx].d_conv[ei_new] - - d_unique[bx].d_in2_sub2[ei_new] * in_final_sum / d_common.in_elem; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // CORRELATION SAVE RESULT IN CUMULATIVE SUM A2 - //==================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.in2_sub2_elem) { - - d_unique[bx].d_in2_sqr_sub2[ei_new] = - d_unique[bx].d_conv[ei_new] / d_unique[bx].d_in2_sqr_sub2[ei_new]; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //====================================================================================================================================================== - // SYNCHRONIZE THREADS - //====================================================================================================================================================== - - __syncthreads(); - - //====================================================================================================================================================== - // TEMPLATE MASK CREATE - //====================================================================================================================================================== - - cent = d_common.sSize + d_common.tSize + 1; - if (d_common_change.frame_no == 0) { - tMask_row = cent + d_unique[bx].d_Row[d_unique[bx].point_no] - - d_unique[bx].d_Row[d_unique[bx].point_no] - 1; - tMask_col = cent + d_unique[bx].d_Col[d_unique[bx].point_no] - - d_unique[bx].d_Col[d_unique[bx].point_no] - 1; - } else { - pointer = d_common_change.frame_no - 1 + - d_unique[bx].point_no * d_common.no_frames; - tMask_row = cent + d_unique[bx].d_tRowLoc[pointer] - - d_unique[bx].d_Row[d_unique[bx].point_no] - 1; - tMask_col = cent + d_unique[bx].d_tColLoc[pointer] - - d_unique[bx].d_Col[d_unique[bx].point_no] - 1; - } - - // work - ei_new = tx; - while (ei_new < d_common.tMask_elem) { - - location = tMask_col * d_common.tMask_rows + tMask_row; - - if (ei_new == location) { - d_unique[bx].d_tMask[ei_new] = 1; - } else { - d_unique[bx].d_tMask[ei_new] = 0; - } - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //====================================================================================================================================================== - // SYNCHRONIZE THREADS - //====================================================================================================================================================== - - __syncthreads(); - - //====================================================================================================================================================== - // MASK CONVOLUTION - //====================================================================================================================================================== - - // work - ei_new = tx; - while (ei_new < d_common.mask_conv_elem) { - - // figure out row/col location in array - ic = (ei_new + 1) % d_common.mask_conv_rows; // (1-n) - jc = (ei_new + 1) / d_common.mask_conv_rows + 1; // (1-n) - if ((ei_new + 1) % d_common.mask_conv_rows == 0) { - ic = d_common.mask_conv_rows; - jc = jc - 1; - } - - // - j = jc + d_common.mask_conv_joffset; - jp1 = j + 1; - if (d_common.mask_cols < jp1) { - ja1 = jp1 - d_common.mask_cols; - } else { - ja1 = 1; - } - if (d_common.tMask_cols < j) { - ja2 = d_common.tMask_cols; - } else { - ja2 = j; - } - - i = ic + d_common.mask_conv_ioffset; - ip1 = i + 1; - - if (d_common.mask_rows < ip1) { - ia1 = ip1 - d_common.mask_rows; - } else { - ia1 = 1; - } - if (d_common.tMask_rows < i) { - ia2 = d_common.tMask_rows; - } else { - ia2 = i; - } - - s = 0; - - for (ja = ja1; ja <= ja2; ja++) { - jb = jp1 - ja; - for (ia = ia1; ia <= ia2; ia++) { - ib = ip1 - ia; - s = s + - d_unique[bx].d_tMask[d_common.tMask_rows * (ja - 1) + ia - 1] * 1; - } - } - - // //d_unique[bx].d_mask_conv[d_common.mask_conv_rows*(jc-1)+ic-1] = s; - d_unique[bx].d_mask_conv[ei_new] = - d_unique[bx].d_in2_sqr_sub2[ei_new] * s; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //====================================================================================================================================================== - // SYNCHRONIZE THREADS - //====================================================================================================================================================== - - __syncthreads(); - - //====================================================================================================================================================== - // MAXIMUM VALUE - //====================================================================================================================================================== - - //==================================================================================================== - // INITIAL SEARCH - //==================================================================================================== - - ei_new = tx; - while (ei_new < d_common.mask_conv_rows) { - - for (i = 0; i < d_common.mask_conv_cols; i++) { - largest_coordinate_current = ei_new * d_common.mask_conv_rows + i; - largest_value_current = - abs(d_unique[bx].d_mask_conv[largest_coordinate_current]); - if (largest_value_current > largest_value) { - largest_coordinate = largest_coordinate_current; - largest_value = largest_value_current; - } - } - par_max_coo[ei_new] = largest_coordinate; - par_max_val[ei_new] = largest_value; - - // go for second round - ei_new = ei_new + NUMBER_THREADS; - } - - //==================================================================================================== - // SYNCHRONIZE THREADS - //==================================================================================================== - - __syncthreads(); - - //==================================================================================================== - // FINAL SEARCH - //==================================================================================================== - - for (i = 0; i < d_common.mask_conv_rows; i++) { - if (par_max_val[i] > fin_max_val) { - fin_max_val = par_max_val[i]; - fin_max_coo = par_max_coo[i]; - } - } - - // convert coordinate to row/col form - largest_row = (fin_max_coo + 1) % d_common.mask_conv_rows - 1; // (0-n) row - largest_col = (fin_max_coo + 1) / d_common.mask_conv_rows; // (0-n) column - if ((fin_max_coo + 1) % d_common.mask_conv_rows == 0) { - largest_row = d_common.mask_conv_rows - 1; - largest_col = largest_col - 1; - } - - // calculate offset - largest_row = largest_row + 1; // compensate to match MATLAB format (1-n) - largest_col = largest_col + 1; // compensate to match MATLAB format (1-n) - offset_row = - largest_row - d_common.in_rows - (d_common.sSize - d_common.tSize); - offset_col = - largest_col - d_common.in_cols - (d_common.sSize - d_common.tSize); - pointer = - d_common_change.frame_no + d_unique[bx].point_no * d_common.no_frames; - d_unique[bx].d_tRowLoc[pointer] = - d_unique[bx].d_Row[d_unique[bx].point_no] + offset_row; - d_unique[bx].d_tColLoc[pointer] = - d_unique[bx].d_Col[d_unique[bx].point_no] + offset_col; - - //====================================================================================================================================================== - // SYNCHRONIZE THREADS - //====================================================================================================================================================== - - __syncthreads(); - } diff --git a/examples/heartwall/main.cu b/examples/heartwall/main.cu deleted file mode 100644 index ca80319..0000000 --- a/examples/heartwall/main.cu +++ /dev/null @@ -1,795 +0,0 @@ -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== -// DEFINE / INCLUDE -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== - -//====================================================================================================================================================== -// LIBRARIES -//====================================================================================================================================================== - -#include -#include -#include - -#include -#include -#include - -//====================================================================================================================================================== -// STRUCTURES, GLOBAL STRUCTURE VARIABLES -//====================================================================================================================================================== - -#include "define.c" - -params_common_change common_change; -__constant__ params_common_change d_common_change; - -params_common common; -__constant__ params_common d_common; - -params_unique unique[ALL_POINTS]; // cannot determine size dynamically so choose - // more than usually needed -__constant__ params_unique d_unique[ALL_POINTS]; - -//====================================================================================================================================================== -// KERNEL CODE -//====================================================================================================================================================== - -#include "kernel.cu" - -// WRITE DATA FUNCTION -//===============================================================================================================================================================================================================200 - -void write_data(char *filename, int frameNo, int frames_processed, - int endoPoints, int *input_a, int *input_b, int epiPoints, - int *input_2a, int *input_2b) { - - //================================================================================80 - // VARIABLES - //================================================================================80 - - FILE *fid; - int i, j; - char c; - - //================================================================================80 - // OPEN FILE FOR READING - //================================================================================80 - - fid = fopen(filename, "w+"); - if (fid == NULL) { - printf("The file was not opened for writing\n"); - return; - } - - //================================================================================80 - // WRITE VALUES TO THE FILE - //================================================================================80 - fprintf(fid, "Total AVI Frames: %d\n", frameNo); - fprintf(fid, "Frames Processed: %d\n", frames_processed); - fprintf(fid, "endoPoints: %d\n", endoPoints); - fprintf(fid, "epiPoints: %d", epiPoints); - for (j = 0; j < frames_processed; j++) { - fprintf(fid, "\n---Frame %d---", j); - fprintf(fid, "\n--endo--\n", j); - for (i = 0; i < endoPoints; i++) { - fprintf(fid, "%d\t", input_a[j + i * frameNo]); - } - fprintf(fid, "\n"); - for (i = 0; i < endoPoints; i++) { - // if(input_b[j*size+i] > 2000) input_b[j*size+i]=0; - fprintf(fid, "%d\t", input_b[j + i * frameNo]); - } - fprintf(fid, "\n--epi--\n", j); - for (i = 0; i < epiPoints; i++) { - // if(input_2a[j*size_2+i] > 2000) input_2a[j*size_2+i]=0; - fprintf(fid, "%d\t", input_2a[j + i * frameNo]); - } - fprintf(fid, "\n"); - for (i = 0; i < epiPoints; i++) { - // if(input_2b[j*size_2+i] > 2000) input_2b[j*size_2+i]=0; - fprintf(fid, "%d\t", input_2b[j + i * frameNo]); - } - } - // ================================================================================80 - // CLOSE FILE - // ================================================================================80 - - fclose(fid); -} - -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== -// MAIN FUNCTION -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== -int main(int argc, char *argv[]) { - cudaSetDevice(0); - printf("WG size of kernel = %d \n", NUMBER_THREADS); - //====================================================================================================================================================== - // VARIABLES - //====================================================================================================================================================== - - // CUDA kernel execution parameters - dim3 threads; - dim3 blocks; - - // counter - int i; - int frames_processed; - - // frames - char *video_file_name; - avi_t *frames; - fp *frame; - - //====================================================================================================================================================== - // FRAME - //====================================================================================================================================================== - - if (argc != 3) { - printf("ERROR: usage: heartwall \n"); - exit(1); - } - - // open movie file - video_file_name = argv[1]; - frames = (avi_t *)AVI_open_input_file(video_file_name, 1); // added casting - if (frames == NULL) { - AVI_print_error((char *)"Error with AVI_open_input_file"); - return -1; - } - - // common - common.no_frames = AVI_video_frames(frames); - common.frame_rows = AVI_video_height(frames); - common.frame_cols = AVI_video_width(frames); - common.frame_elem = common.frame_rows * common.frame_cols; - common.frame_mem = sizeof(fp) * common.frame_elem; - - // pointers - cudaMalloc((void **)&common_change.d_frame, common.frame_mem); - - //====================================================================================================================================================== - // CHECK INPUT ARGUMENTS - //====================================================================================================================================================== - - frames_processed = atoi(argv[2]); - if (frames_processed < 0 || frames_processed > common.no_frames) { - printf("ERROR: %d is an incorrect number of frames specified, select in " - "the range of 0-%d\n", - frames_processed, common.no_frames); - return 0; - } - - //====================================================================================================================================================== - // HARDCODED INPUTS FROM MATLAB - //====================================================================================================================================================== - - //==================================================================================================== - // CONSTANTS - //==================================================================================================== - - common.sSize = 40; - common.tSize = 25; - common.maxMove = 10; - common.alpha = 0.87; - - //==================================================================================================== - // ENDO POINTS - //==================================================================================================== - - common.endoPoints = ENDO_POINTS; - common.endo_mem = sizeof(int) * common.endoPoints; - - common.endoRow = (int *)malloc(common.endo_mem); - common.endoRow[0] = 369; - common.endoRow[1] = 400; - common.endoRow[2] = 429; - common.endoRow[3] = 452; - common.endoRow[4] = 476; - common.endoRow[5] = 486; - common.endoRow[6] = 479; - common.endoRow[7] = 458; - common.endoRow[8] = 433; - common.endoRow[9] = 404; - common.endoRow[10] = 374; - common.endoRow[11] = 346; - common.endoRow[12] = 318; - common.endoRow[13] = 294; - common.endoRow[14] = 277; - common.endoRow[15] = 269; - common.endoRow[16] = 275; - common.endoRow[17] = 287; - common.endoRow[18] = 311; - common.endoRow[19] = 339; - cudaMalloc((void **)&common.d_endoRow, common.endo_mem); - cudaMemcpy(common.d_endoRow, common.endoRow, common.endo_mem, - cudaMemcpyHostToDevice); - - common.endoCol = (int *)malloc(common.endo_mem); - common.endoCol[0] = 408; - common.endoCol[1] = 406; - common.endoCol[2] = 397; - common.endoCol[3] = 383; - common.endoCol[4] = 354; - common.endoCol[5] = 322; - common.endoCol[6] = 294; - common.endoCol[7] = 270; - common.endoCol[8] = 250; - common.endoCol[9] = 237; - common.endoCol[10] = 235; - common.endoCol[11] = 241; - common.endoCol[12] = 254; - common.endoCol[13] = 273; - common.endoCol[14] = 300; - common.endoCol[15] = 328; - common.endoCol[16] = 356; - common.endoCol[17] = 383; - common.endoCol[18] = 401; - common.endoCol[19] = 411; - cudaMalloc((void **)&common.d_endoCol, common.endo_mem); - cudaMemcpy(common.d_endoCol, common.endoCol, common.endo_mem, - cudaMemcpyHostToDevice); - - common.tEndoRowLoc = (int *)malloc(common.endo_mem * common.no_frames); - cudaMalloc((void **)&common.d_tEndoRowLoc, - common.endo_mem * common.no_frames); - - common.tEndoColLoc = (int *)malloc(common.endo_mem * common.no_frames); - cudaMalloc((void **)&common.d_tEndoColLoc, - common.endo_mem * common.no_frames); - - //==================================================================================================== - // EPI POINTS - //==================================================================================================== - - common.epiPoints = EPI_POINTS; - common.epi_mem = sizeof(int) * common.epiPoints; - - common.epiRow = (int *)malloc(common.epi_mem); - common.epiRow[0] = 390; - common.epiRow[1] = 419; - common.epiRow[2] = 448; - common.epiRow[3] = 474; - common.epiRow[4] = 501; - common.epiRow[5] = 519; - common.epiRow[6] = 535; - common.epiRow[7] = 542; - common.epiRow[8] = 543; - common.epiRow[9] = 538; - common.epiRow[10] = 528; - common.epiRow[11] = 511; - common.epiRow[12] = 491; - common.epiRow[13] = 466; - common.epiRow[14] = 438; - common.epiRow[15] = 406; - common.epiRow[16] = 376; - common.epiRow[17] = 347; - common.epiRow[18] = 318; - common.epiRow[19] = 291; - common.epiRow[20] = 275; - common.epiRow[21] = 259; - common.epiRow[22] = 256; - common.epiRow[23] = 252; - common.epiRow[24] = 252; - common.epiRow[25] = 257; - common.epiRow[26] = 266; - common.epiRow[27] = 283; - common.epiRow[28] = 305; - common.epiRow[29] = 331; - common.epiRow[30] = 360; - cudaMalloc((void **)&common.d_epiRow, common.epi_mem); - cudaMemcpy(common.d_epiRow, common.epiRow, common.epi_mem, - cudaMemcpyHostToDevice); - - common.epiCol = (int *)malloc(common.epi_mem); - common.epiCol[0] = 457; - common.epiCol[1] = 454; - common.epiCol[2] = 446; - common.epiCol[3] = 431; - common.epiCol[4] = 411; - common.epiCol[5] = 388; - common.epiCol[6] = 361; - common.epiCol[7] = 331; - common.epiCol[8] = 301; - common.epiCol[9] = 273; - common.epiCol[10] = 243; - common.epiCol[11] = 218; - common.epiCol[12] = 196; - common.epiCol[13] = 178; - common.epiCol[14] = 166; - common.epiCol[15] = 157; - common.epiCol[16] = 155; - common.epiCol[17] = 165; - common.epiCol[18] = 177; - common.epiCol[19] = 197; - common.epiCol[20] = 218; - common.epiCol[21] = 248; - common.epiCol[22] = 276; - common.epiCol[23] = 304; - common.epiCol[24] = 333; - common.epiCol[25] = 361; - common.epiCol[26] = 391; - common.epiCol[27] = 415; - common.epiCol[28] = 434; - common.epiCol[29] = 448; - common.epiCol[30] = 455; - cudaMalloc((void **)&common.d_epiCol, common.epi_mem); - cudaMemcpy(common.d_epiCol, common.epiCol, common.epi_mem, - cudaMemcpyHostToDevice); - - common.tEpiRowLoc = (int *)malloc(common.epi_mem * common.no_frames); - cudaMalloc((void **)&common.d_tEpiRowLoc, common.epi_mem * common.no_frames); - - common.tEpiColLoc = (int *)malloc(common.epi_mem * common.no_frames); - cudaMalloc((void **)&common.d_tEpiColLoc, common.epi_mem * common.no_frames); - - //==================================================================================================== - // ALL POINTS - //==================================================================================================== - - common.allPoints = ALL_POINTS; - - //====================================================================================================================================================== - // TEMPLATE SIZES - //====================================================================================================================================================== - - // common - common.in_rows = common.tSize + 1 + common.tSize; - common.in_cols = common.in_rows; - common.in_elem = common.in_rows * common.in_cols; - common.in_mem = sizeof(fp) * common.in_elem; - - //====================================================================================================================================================== - // CREATE ARRAY OF TEMPLATES FOR ALL POINTS - //====================================================================================================================================================== - - // common - cudaMalloc((void **)&common.d_endoT, common.in_mem * common.endoPoints); - cudaMalloc((void **)&common.d_epiT, common.in_mem * common.epiPoints); - - //====================================================================================================================================================== - // SPECIFIC TO ENDO OR EPI TO BE SET HERE - //====================================================================================================================================================== - - for (i = 0; i < common.endoPoints; i++) { - unique[i].point_no = i; - unique[i].d_Row = common.d_endoRow; - unique[i].d_Col = common.d_endoCol; - unique[i].d_tRowLoc = common.d_tEndoRowLoc; - unique[i].d_tColLoc = common.d_tEndoColLoc; - unique[i].d_T = common.d_endoT; - } - for (i = common.endoPoints; i < common.allPoints; i++) { - unique[i].point_no = i - common.endoPoints; - unique[i].d_Row = common.d_epiRow; - unique[i].d_Col = common.d_epiCol; - unique[i].d_tRowLoc = common.d_tEpiRowLoc; - unique[i].d_tColLoc = common.d_tEpiColLoc; - unique[i].d_T = common.d_epiT; - } - - //====================================================================================================================================================== - // RIGHT TEMPLATE FROM TEMPLATE ARRAY - //====================================================================================================================================================== - - // pointers - for (i = 0; i < common.allPoints; i++) { - unique[i].in_pointer = unique[i].point_no * common.in_elem; - } - - //====================================================================================================================================================== - // AREA AROUND POINT FROM FRAME - //====================================================================================================================================================== - - // common - common.in2_rows = 2 * common.sSize + 1; - common.in2_cols = 2 * common.sSize + 1; - common.in2_elem = common.in2_rows * common.in2_cols; - common.in2_mem = sizeof(float) * common.in2_elem; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_in2, common.in2_mem); - } - - //====================================================================================================================================================== - // CONVOLUTION - //====================================================================================================================================================== - - // common - common.conv_rows = - common.in_rows + common.in2_rows - 1; // number of rows in I - common.conv_cols = - common.in_cols + common.in2_cols - 1; // number of columns in I - common.conv_elem = common.conv_rows * common.conv_cols; // number of elements - common.conv_mem = sizeof(float) * common.conv_elem; - common.ioffset = 0; - common.joffset = 0; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_conv, common.conv_mem); - } - - //====================================================================================================================================================== - // CUMULATIVE SUM - //====================================================================================================================================================== - - //==================================================================================================== - // PADDING OF ARRAY, VERTICAL CUMULATIVE SUM - //==================================================================================================== - - // common - common.in2_pad_add_rows = common.in_rows; - common.in2_pad_add_cols = common.in_cols; - - common.in2_pad_cumv_rows = common.in2_rows + 2 * common.in2_pad_add_rows; - common.in2_pad_cumv_cols = common.in2_cols + 2 * common.in2_pad_add_cols; - common.in2_pad_cumv_elem = - common.in2_pad_cumv_rows * common.in2_pad_cumv_cols; - common.in2_pad_cumv_mem = sizeof(float) * common.in2_pad_cumv_elem; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_in2_pad_cumv, common.in2_pad_cumv_mem); - } - - //==================================================================================================== - // SELECTION - //==================================================================================================== - - // common - common.in2_pad_cumv_sel_rowlow = 1 + common.in_rows; // (1 to n+1) - common.in2_pad_cumv_sel_rowhig = common.in2_pad_cumv_rows - 1; - common.in2_pad_cumv_sel_collow = 1; - common.in2_pad_cumv_sel_colhig = common.in2_pad_cumv_cols; - common.in2_pad_cumv_sel_rows = - common.in2_pad_cumv_sel_rowhig - common.in2_pad_cumv_sel_rowlow + 1; - common.in2_pad_cumv_sel_cols = - common.in2_pad_cumv_sel_colhig - common.in2_pad_cumv_sel_collow + 1; - common.in2_pad_cumv_sel_elem = - common.in2_pad_cumv_sel_rows * common.in2_pad_cumv_sel_cols; - common.in2_pad_cumv_sel_mem = sizeof(float) * common.in2_pad_cumv_sel_elem; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_in2_pad_cumv_sel, - common.in2_pad_cumv_sel_mem); - } - - //==================================================================================================== - // SELECTION 2, SUBTRACTION, HORIZONTAL CUMULATIVE SUM - //==================================================================================================== - - // common - common.in2_pad_cumv_sel2_rowlow = 1; - common.in2_pad_cumv_sel2_rowhig = - common.in2_pad_cumv_rows - common.in_rows - 1; - common.in2_pad_cumv_sel2_collow = 1; - common.in2_pad_cumv_sel2_colhig = common.in2_pad_cumv_cols; - common.in2_sub_cumh_rows = - common.in2_pad_cumv_sel2_rowhig - common.in2_pad_cumv_sel2_rowlow + 1; - common.in2_sub_cumh_cols = - common.in2_pad_cumv_sel2_colhig - common.in2_pad_cumv_sel2_collow + 1; - common.in2_sub_cumh_elem = - common.in2_sub_cumh_rows * common.in2_sub_cumh_cols; - common.in2_sub_cumh_mem = sizeof(float) * common.in2_sub_cumh_elem; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_in2_sub_cumh, common.in2_sub_cumh_mem); - } - - //==================================================================================================== - // SELECTION - //==================================================================================================== - - // common - common.in2_sub_cumh_sel_rowlow = 1; - common.in2_sub_cumh_sel_rowhig = common.in2_sub_cumh_rows; - common.in2_sub_cumh_sel_collow = 1 + common.in_cols; - common.in2_sub_cumh_sel_colhig = common.in2_sub_cumh_cols - 1; - common.in2_sub_cumh_sel_rows = - common.in2_sub_cumh_sel_rowhig - common.in2_sub_cumh_sel_rowlow + 1; - common.in2_sub_cumh_sel_cols = - common.in2_sub_cumh_sel_colhig - common.in2_sub_cumh_sel_collow + 1; - common.in2_sub_cumh_sel_elem = - common.in2_sub_cumh_sel_rows * common.in2_sub_cumh_sel_cols; - common.in2_sub_cumh_sel_mem = sizeof(float) * common.in2_sub_cumh_sel_elem; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_in2_sub_cumh_sel, - common.in2_sub_cumh_sel_mem); - } - - //==================================================================================================== - // SELECTION 2, SUBTRACTION - //==================================================================================================== - - // common - common.in2_sub_cumh_sel2_rowlow = 1; - common.in2_sub_cumh_sel2_rowhig = common.in2_sub_cumh_rows; - common.in2_sub_cumh_sel2_collow = 1; - common.in2_sub_cumh_sel2_colhig = - common.in2_sub_cumh_cols - common.in_cols - 1; - common.in2_sub2_rows = - common.in2_sub_cumh_sel2_rowhig - common.in2_sub_cumh_sel2_rowlow + 1; - common.in2_sub2_cols = - common.in2_sub_cumh_sel2_colhig - common.in2_sub_cumh_sel2_collow + 1; - common.in2_sub2_elem = common.in2_sub2_rows * common.in2_sub2_cols; - common.in2_sub2_mem = sizeof(float) * common.in2_sub2_elem; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_in2_sub2, common.in2_sub2_mem); - } - - //====================================================================================================================================================== - // CUMULATIVE SUM 2 - //====================================================================================================================================================== - - //==================================================================================================== - // MULTIPLICATION - //==================================================================================================== - - // common - common.in2_sqr_rows = common.in2_rows; - common.in2_sqr_cols = common.in2_cols; - common.in2_sqr_elem = common.in2_elem; - common.in2_sqr_mem = common.in2_mem; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_in2_sqr, common.in2_sqr_mem); - } - - //==================================================================================================== - // SELECTION 2, SUBTRACTION - //==================================================================================================== - - // common - common.in2_sqr_sub2_rows = common.in2_sub2_rows; - common.in2_sqr_sub2_cols = common.in2_sub2_cols; - common.in2_sqr_sub2_elem = common.in2_sub2_elem; - common.in2_sqr_sub2_mem = common.in2_sub2_mem; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_in2_sqr_sub2, common.in2_sqr_sub2_mem); - } - - //====================================================================================================================================================== - // FINAL - //====================================================================================================================================================== - - // common - common.in_sqr_rows = common.in_rows; - common.in_sqr_cols = common.in_cols; - common.in_sqr_elem = common.in_elem; - common.in_sqr_mem = common.in_mem; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_in_sqr, common.in_sqr_mem); - } - - //====================================================================================================================================================== - // TEMPLATE MASK CREATE - //====================================================================================================================================================== - - // common - common.tMask_rows = common.in_rows + (common.sSize + 1 + common.sSize) - 1; - common.tMask_cols = common.tMask_rows; - common.tMask_elem = common.tMask_rows * common.tMask_cols; - common.tMask_mem = sizeof(float) * common.tMask_elem; - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_tMask, common.tMask_mem); - } - - //====================================================================================================================================================== - // POINT MASK INITIALIZE - //====================================================================================================================================================== - - // common - common.mask_rows = common.maxMove; - common.mask_cols = common.mask_rows; - common.mask_elem = common.mask_rows * common.mask_cols; - common.mask_mem = sizeof(float) * common.mask_elem; - - //====================================================================================================================================================== - // MASK CONVOLUTION - //====================================================================================================================================================== - - // common - common.mask_conv_rows = common.tMask_rows; // number of rows in I - common.mask_conv_cols = common.tMask_cols; // number of columns in I - common.mask_conv_elem = - common.mask_conv_rows * common.mask_conv_cols; // number of elements - common.mask_conv_mem = sizeof(float) * common.mask_conv_elem; - common.mask_conv_ioffset = (common.mask_rows - 1) / 2; - if ((common.mask_rows - 1) % 2 > 0.5) { - common.mask_conv_ioffset = common.mask_conv_ioffset + 1; - } - common.mask_conv_joffset = (common.mask_cols - 1) / 2; - if ((common.mask_cols - 1) % 2 > 0.5) { - common.mask_conv_joffset = common.mask_conv_joffset + 1; - } - - // pointers - for (i = 0; i < common.allPoints; i++) { - cudaMalloc((void **)&unique[i].d_mask_conv, common.mask_conv_mem); - } - - //====================================================================================================================================================== - // KERNEL - //====================================================================================================================================================== - - //==================================================================================================== - // THREAD BLOCK - //==================================================================================================== - - // All kernels operations within kernel use same max size of threads. Size of - // block size is set to the size appropriate for max size operation (on padded - // matrix). Other use subsets of that. - threads.x = NUMBER_THREADS; // define the number of threads in the block - threads.y = 1; - blocks.x = common.allPoints; // define the number of blocks in the grid - blocks.y = 1; - - //==================================================================================================== - // COPY ARGUMENTS - //==================================================================================================== - - cudaMemcpyToSymbol(d_common, &common, sizeof(params_common)); - cudaMemcpyToSymbol(d_unique, &unique, sizeof(params_unique) * ALL_POINTS); - - //==================================================================================================== - // PRINT FRAME PROGRESS START - //==================================================================================================== - - printf("frame progress: "); - fflush(NULL); - - //==================================================================================================== - // LAUNCH - //==================================================================================================== - - for (common_change.frame_no = 0; common_change.frame_no < frames_processed; - common_change.frame_no++) { - printf("get frame\n"); - // Extract a cropped version of the first frame from the video file - frame = get_frame( - frames, // pointer to video file - common_change.frame_no, // number of frame that needs to be returned - 0, // cropped? - 0, // scaled? - 1); // converted - printf("memcpy\n"); - // copy frame to GPU memory - cudaMemcpy(common_change.d_frame, frame, common.frame_mem, - cudaMemcpyHostToDevice); - printf("toSymbol\n"); - cudaMemcpyToSymbol(d_common_change, &common_change, - sizeof(params_common_change)); - - // launch GPU kernel - printf("launch\n"); - kernel<<<1, 32>>>(); - cudaDeviceSynchronize(); - printf("return\n"); - // free frame after each loop iteration, since AVI library allocates memory - // for every frame fetched - printf("free\n"); - free(frame); - - // print frame progress - printf("%d ", common_change.frame_no); - fflush(NULL); - } - - //==================================================================================================== - // PRINT FRAME PROGRESS END - //==================================================================================================== - - printf("\n"); - fflush(NULL); - - //==================================================================================================== - // OUTPUT - //==================================================================================================== - - cudaMemcpy(common.tEndoRowLoc, common.d_tEndoRowLoc, - common.endo_mem * common.no_frames, cudaMemcpyDeviceToHost); - cudaMemcpy(common.tEndoColLoc, common.d_tEndoColLoc, - common.endo_mem * common.no_frames, cudaMemcpyDeviceToHost); - - cudaMemcpy(common.tEpiRowLoc, common.d_tEpiRowLoc, - common.epi_mem * common.no_frames, cudaMemcpyDeviceToHost); - cudaMemcpy(common.tEpiColLoc, common.d_tEpiColLoc, - common.epi_mem * common.no_frames, cudaMemcpyDeviceToHost); - -#ifdef OUTPUT - - //==================================================50 - // DUMP DATA TO FILE - //==================================================50 - write_data("result.txt", common.no_frames, frames_processed, - common.endoPoints, common.tEndoRowLoc, common.tEndoColLoc, - common.epiPoints, common.tEpiRowLoc, common.tEpiColLoc); - - //==================================================50 - // End - //==================================================50 - -#endif - - //====================================================================================================================================================== - // DEALLOCATION - //====================================================================================================================================================== - - //==================================================================================================== - // COMMON - //==================================================================================================== - - // frame - cudaFree(common_change.d_frame); - - // endo points - free(common.endoRow); - free(common.endoCol); - free(common.tEndoRowLoc); - free(common.tEndoColLoc); - - cudaFree(common.d_endoRow); - cudaFree(common.d_endoCol); - cudaFree(common.d_tEndoRowLoc); - cudaFree(common.d_tEndoColLoc); - - cudaFree(common.d_endoT); - - // epi points - free(common.epiRow); - free(common.epiCol); - free(common.tEpiRowLoc); - free(common.tEpiColLoc); - - cudaFree(common.d_epiRow); - cudaFree(common.d_epiCol); - cudaFree(common.d_tEpiRowLoc); - cudaFree(common.d_tEpiColLoc); - - cudaFree(common.d_epiT); - - //==================================================================================================== - // POINTERS - //==================================================================================================== - - for (i = 0; i < common.allPoints; i++) { - cudaFree(unique[i].d_in2); - - cudaFree(unique[i].d_conv); - cudaFree(unique[i].d_in2_pad_cumv); - cudaFree(unique[i].d_in2_pad_cumv_sel); - cudaFree(unique[i].d_in2_sub_cumh); - cudaFree(unique[i].d_in2_sub_cumh_sel); - cudaFree(unique[i].d_in2_sub2); - cudaFree(unique[i].d_in2_sqr); - cudaFree(unique[i].d_in2_sqr_sub2); - cudaFree(unique[i].d_in_sqr); - - cudaFree(unique[i].d_tMask); - cudaFree(unique[i].d_mask_conv); - } -} - -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== -// MAIN FUNCTION -//=============================================================================================================================================================================================================== -//=============================================================================================================================================================================================================== diff --git a/examples/heartwall/run.sh b/examples/heartwall/run.sh deleted file mode 100644 index 53465a2..0000000 --- a/examples/heartwall/run.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -cd AVI; make; cd ..; - -clang++ -DOUTPUT main.cu -I./AVI --cuda-path=/usr/local/cuda-10.1 --cuda-gpu-arch=sm_61 -L/usr/local/cuda-10.1/lib64 -lcudart_static -ldl -lrt -pthread -save-temps -v - - -/home/robinhan/repo/open_source_template/build/compilation/kernelTranslator main-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -/home/robinhan/repo/open_source_template/build/compilation/hostTranslator main-host-x86_64-unknown-linux-gnu.bc host.bc - -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - - -g++ -Wall -L/home/robinhan/repo/open_source_template/build/runtime -L/home/robinhan/repo/open_source_template/build/runtime/threadPool -o heartwall -fPIC -no-pie host.o kernel.o ./AVI/avilib.o ./AVI/avimod.o -lc -lx86Runtime -lthreadPool -lpthread - -./heartwall /home/robinhan/repo/open_source_template/runtime/examples/rodinia-data/heartwall/test.avi 20 diff --git a/examples/heartwall/setdevice.cu b/examples/heartwall/setdevice.cu deleted file mode 100755 index d27bb48..0000000 --- a/examples/heartwall/setdevice.cu +++ /dev/null @@ -1,5 +0,0 @@ -//////////////////////////////////////////////////////////////////////////////// -// Set Device -//////////////////////////////////////////////////////////////////////////////// - -void setdevice(void) { cudaSetDevice(0); } diff --git a/examples/hotspot/hotspot-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/hotspot/hotspot-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 90f6f17..0000000 --- a/examples/hotspot/hotspot-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,719 +0,0 @@ -; ModuleID = 'hotspot-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "hotspot.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv = comdat any - -@_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ14calculate_tempiPfS_S_iiiiffffffE13power_on_cuda = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ14calculate_tempiPfS_S_iiiiffffffE6temp_t = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z14calculate_tempiPfS_S_iiiiffffff(i32 %iteration, float* %power, float* %temp_src, float* %temp_dst, i32 %grid_cols, i32 %grid_rows, i32 %border_cols, i32 %border_rows, float %Cap, float %Rx, float %Ry, float %Rz, float %step, float %time_elapsed) #0 { -entry: - %iteration.addr = alloca i32, align 4 - %power.addr = alloca float*, align 8 - %temp_src.addr = alloca float*, align 8 - %temp_dst.addr = alloca float*, align 8 - %grid_cols.addr = alloca i32, align 4 - %grid_rows.addr = alloca i32, align 4 - %border_cols.addr = alloca i32, align 4 - %border_rows.addr = alloca i32, align 4 - %Cap.addr = alloca float, align 4 - %Rx.addr = alloca float, align 4 - %Ry.addr = alloca float, align 4 - %Rz.addr = alloca float, align 4 - %step.addr = alloca float, align 4 - %time_elapsed.addr = alloca float, align 4 - %amb_temp = alloca float, align 4 - %step_div_Cap = alloca float, align 4 - %Rx_1 = alloca float, align 4 - %Ry_1 = alloca float, align 4 - %Rz_1 = alloca float, align 4 - %bx = alloca i32, align 4 - %by = alloca i32, align 4 - %tx = alloca i32, align 4 - %ty = alloca i32, align 4 - %small_block_rows = alloca i32, align 4 - %small_block_cols = alloca i32, align 4 - %blkY = alloca i32, align 4 - %blkX = alloca i32, align 4 - %blkYmax = alloca i32, align 4 - %blkXmax = alloca i32, align 4 - %yidx = alloca i32, align 4 - %xidx = alloca i32, align 4 - %loadYidx = alloca i32, align 4 - %loadXidx = alloca i32, align 4 - %index = alloca i32, align 4 - %validYmin = alloca i32, align 4 - %validYmax = alloca i32, align 4 - %validXmin = alloca i32, align 4 - %validXmax = alloca i32, align 4 - %N = alloca i32, align 4 - %S = alloca i32, align 4 - %W = alloca i32, align 4 - %E = alloca i32, align 4 - %computed = alloca i8, align 1 - %i = alloca i32, align 4 - store i32 %iteration, i32* %iteration.addr, align 4 - store float* %power, float** %power.addr, align 8 - store float* %temp_src, float** %temp_src.addr, align 8 - store float* %temp_dst, float** %temp_dst.addr, align 8 - store i32 %grid_cols, i32* %grid_cols.addr, align 4 - store i32 %grid_rows, i32* %grid_rows.addr, align 4 - store i32 %border_cols, i32* %border_cols.addr, align 4 - store i32 %border_rows, i32* %border_rows.addr, align 4 - store float %Cap, float* %Cap.addr, align 4 - store float %Rx, float* %Rx.addr, align 4 - store float %Ry, float* %Ry.addr, align 4 - store float %Rz, float* %Rz.addr, align 4 - store float %step, float* %step.addr, align 4 - store float %time_elapsed, float* %time_elapsed.addr, align 4 - store float 8.000000e+01, float* %amb_temp, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %bx, align 4 - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #2 - store i32 %call1, i32* %by, align 4 - %call2 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call2, i32* %tx, align 4 - %call3 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - store i32 %call3, i32* %ty, align 4 - %0 = load float, float* %step.addr, align 4 - %1 = load float, float* %Cap.addr, align 4 - %div = fdiv float %0, %1 - store float %div, float* %step_div_Cap, align 4 - %2 = load float, float* %Rx.addr, align 4 - %div4 = fdiv float 1.000000e+00, %2 - store float %div4, float* %Rx_1, align 4 - %3 = load float, float* %Ry.addr, align 4 - %div5 = fdiv float 1.000000e+00, %3 - store float %div5, float* %Ry_1, align 4 - %4 = load float, float* %Rz.addr, align 4 - %div6 = fdiv float 1.000000e+00, %4 - store float %div6, float* %Rz_1, align 4 - %5 = load i32, i32* %iteration.addr, align 4 - %mul = mul nsw i32 %5, 2 - %sub = sub nsw i32 16, %mul - store i32 %sub, i32* %small_block_rows, align 4 - %6 = load i32, i32* %iteration.addr, align 4 - %mul7 = mul nsw i32 %6, 2 - %sub8 = sub nsw i32 16, %mul7 - store i32 %sub8, i32* %small_block_cols, align 4 - %7 = load i32, i32* %small_block_rows, align 4 - %8 = load i32, i32* %by, align 4 - %mul9 = mul nsw i32 %7, %8 - %9 = load i32, i32* %border_rows.addr, align 4 - %sub10 = sub nsw i32 %mul9, %9 - store i32 %sub10, i32* %blkY, align 4 - %10 = load i32, i32* %small_block_cols, align 4 - %11 = load i32, i32* %bx, align 4 - %mul11 = mul nsw i32 %10, %11 - %12 = load i32, i32* %border_cols.addr, align 4 - %sub12 = sub nsw i32 %mul11, %12 - store i32 %sub12, i32* %blkX, align 4 - %13 = load i32, i32* %blkY, align 4 - %add = add nsw i32 %13, 16 - %sub13 = sub nsw i32 %add, 1 - store i32 %sub13, i32* %blkYmax, align 4 - %14 = load i32, i32* %blkX, align 4 - %add14 = add nsw i32 %14, 16 - %sub15 = sub nsw i32 %add14, 1 - store i32 %sub15, i32* %blkXmax, align 4 - %15 = load i32, i32* %blkY, align 4 - %16 = load i32, i32* %ty, align 4 - %add16 = add nsw i32 %15, %16 - store i32 %add16, i32* %yidx, align 4 - %17 = load i32, i32* %blkX, align 4 - %18 = load i32, i32* %tx, align 4 - %add17 = add nsw i32 %17, %18 - store i32 %add17, i32* %xidx, align 4 - %19 = load i32, i32* %yidx, align 4 - store i32 %19, i32* %loadYidx, align 4 - %20 = load i32, i32* %xidx, align 4 - store i32 %20, i32* %loadXidx, align 4 - %21 = load i32, i32* %grid_cols.addr, align 4 - %22 = load i32, i32* %loadYidx, align 4 - %mul18 = mul nsw i32 %21, %22 - %23 = load i32, i32* %loadXidx, align 4 - %add19 = add nsw i32 %mul18, %23 - store i32 %add19, i32* %index, align 4 - %24 = load i32, i32* %loadYidx, align 4 - %cmp = icmp sge i32 %24, 0 - br i1 %cmp, label %land.lhs.true, label %if.end - -land.lhs.true: ; preds = %entry - %25 = load i32, i32* %loadYidx, align 4 - %26 = load i32, i32* %grid_rows.addr, align 4 - %sub20 = sub nsw i32 %26, 1 - %cmp21 = icmp sle i32 %25, %sub20 - br i1 %cmp21, label %land.lhs.true22, label %if.end - -land.lhs.true22: ; preds = %land.lhs.true - %27 = load i32, i32* %loadXidx, align 4 - %cmp23 = icmp sge i32 %27, 0 - br i1 %cmp23, label %land.lhs.true24, label %if.end - -land.lhs.true24: ; preds = %land.lhs.true22 - %28 = load i32, i32* %loadXidx, align 4 - %29 = load i32, i32* %grid_cols.addr, align 4 - %sub25 = sub nsw i32 %29, 1 - %cmp26 = icmp sle i32 %28, %sub25 - br i1 %cmp26, label %if.then, label %if.end - -if.then: ; preds = %land.lhs.true24 - %30 = load float*, float** %temp_src.addr, align 8 - %31 = load i32, i32* %index, align 4 - %idxprom = sext i32 %31 to i64 - %arrayidx = getelementptr inbounds float, float* %30, i64 %idxprom - %32 = load float, float* %arrayidx, align 4 - %33 = load i32, i32* %ty, align 4 - %idxprom27 = sext i32 %33 to i64 - %arrayidx28 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom27 - %34 = load i32, i32* %tx, align 4 - %idxprom29 = sext i32 %34 to i64 - %arrayidx30 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx28, i64 0, i64 %idxprom29 - store float %32, float* %arrayidx30, align 4 - %35 = load float*, float** %power.addr, align 8 - %36 = load i32, i32* %index, align 4 - %idxprom31 = sext i32 %36 to i64 - %arrayidx32 = getelementptr inbounds float, float* %35, i64 %idxprom31 - %37 = load float, float* %arrayidx32, align 4 - %38 = load i32, i32* %ty, align 4 - %idxprom33 = sext i32 %38 to i64 - %arrayidx34 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE13power_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom33 - %39 = load i32, i32* %tx, align 4 - %idxprom35 = sext i32 %39 to i64 - %arrayidx36 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx34, i64 0, i64 %idxprom35 - store float %37, float* %arrayidx36, align 4 - br label %if.end - -if.end: ; preds = %if.then, %land.lhs.true24, %land.lhs.true22, %land.lhs.true, %entry - call void @llvm.nvvm.barrier0() - %40 = load i32, i32* %blkY, align 4 - %cmp37 = icmp slt i32 %40, 0 - br i1 %cmp37, label %cond.true, label %cond.false - -cond.true: ; preds = %if.end - %41 = load i32, i32* %blkY, align 4 - %sub38 = sub nsw i32 0, %41 - br label %cond.end - -cond.false: ; preds = %if.end - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %sub38, %cond.true ], [ 0, %cond.false ] - store i32 %cond, i32* %validYmin, align 4 - %42 = load i32, i32* %blkYmax, align 4 - %43 = load i32, i32* %grid_rows.addr, align 4 - %sub39 = sub nsw i32 %43, 1 - %cmp40 = icmp sgt i32 %42, %sub39 - br i1 %cmp40, label %cond.true41, label %cond.false45 - -cond.true41: ; preds = %cond.end - %44 = load i32, i32* %blkYmax, align 4 - %45 = load i32, i32* %grid_rows.addr, align 4 - %sub42 = sub nsw i32 %44, %45 - %add43 = add nsw i32 %sub42, 1 - %sub44 = sub nsw i32 15, %add43 - br label %cond.end46 - -cond.false45: ; preds = %cond.end - br label %cond.end46 - -cond.end46: ; preds = %cond.false45, %cond.true41 - %cond47 = phi i32 [ %sub44, %cond.true41 ], [ 15, %cond.false45 ] - store i32 %cond47, i32* %validYmax, align 4 - %46 = load i32, i32* %blkX, align 4 - %cmp48 = icmp slt i32 %46, 0 - br i1 %cmp48, label %cond.true49, label %cond.false51 - -cond.true49: ; preds = %cond.end46 - %47 = load i32, i32* %blkX, align 4 - %sub50 = sub nsw i32 0, %47 - br label %cond.end52 - -cond.false51: ; preds = %cond.end46 - br label %cond.end52 - -cond.end52: ; preds = %cond.false51, %cond.true49 - %cond53 = phi i32 [ %sub50, %cond.true49 ], [ 0, %cond.false51 ] - store i32 %cond53, i32* %validXmin, align 4 - %48 = load i32, i32* %blkXmax, align 4 - %49 = load i32, i32* %grid_cols.addr, align 4 - %sub54 = sub nsw i32 %49, 1 - %cmp55 = icmp sgt i32 %48, %sub54 - br i1 %cmp55, label %cond.true56, label %cond.false60 - -cond.true56: ; preds = %cond.end52 - %50 = load i32, i32* %blkXmax, align 4 - %51 = load i32, i32* %grid_cols.addr, align 4 - %sub57 = sub nsw i32 %50, %51 - %add58 = add nsw i32 %sub57, 1 - %sub59 = sub nsw i32 15, %add58 - br label %cond.end61 - -cond.false60: ; preds = %cond.end52 - br label %cond.end61 - -cond.end61: ; preds = %cond.false60, %cond.true56 - %cond62 = phi i32 [ %sub59, %cond.true56 ], [ 15, %cond.false60 ] - store i32 %cond62, i32* %validXmax, align 4 - %52 = load i32, i32* %ty, align 4 - %sub63 = sub nsw i32 %52, 1 - store i32 %sub63, i32* %N, align 4 - %53 = load i32, i32* %ty, align 4 - %add64 = add nsw i32 %53, 1 - store i32 %add64, i32* %S, align 4 - %54 = load i32, i32* %tx, align 4 - %sub65 = sub nsw i32 %54, 1 - store i32 %sub65, i32* %W, align 4 - %55 = load i32, i32* %tx, align 4 - %add66 = add nsw i32 %55, 1 - store i32 %add66, i32* %E, align 4 - %56 = load i32, i32* %N, align 4 - %57 = load i32, i32* %validYmin, align 4 - %cmp67 = icmp slt i32 %56, %57 - br i1 %cmp67, label %cond.true68, label %cond.false69 - -cond.true68: ; preds = %cond.end61 - %58 = load i32, i32* %validYmin, align 4 - br label %cond.end70 - -cond.false69: ; preds = %cond.end61 - %59 = load i32, i32* %N, align 4 - br label %cond.end70 - -cond.end70: ; preds = %cond.false69, %cond.true68 - %cond71 = phi i32 [ %58, %cond.true68 ], [ %59, %cond.false69 ] - store i32 %cond71, i32* %N, align 4 - %60 = load i32, i32* %S, align 4 - %61 = load i32, i32* %validYmax, align 4 - %cmp72 = icmp sgt i32 %60, %61 - br i1 %cmp72, label %cond.true73, label %cond.false74 - -cond.true73: ; preds = %cond.end70 - %62 = load i32, i32* %validYmax, align 4 - br label %cond.end75 - -cond.false74: ; preds = %cond.end70 - %63 = load i32, i32* %S, align 4 - br label %cond.end75 - -cond.end75: ; preds = %cond.false74, %cond.true73 - %cond76 = phi i32 [ %62, %cond.true73 ], [ %63, %cond.false74 ] - store i32 %cond76, i32* %S, align 4 - %64 = load i32, i32* %W, align 4 - %65 = load i32, i32* %validXmin, align 4 - %cmp77 = icmp slt i32 %64, %65 - br i1 %cmp77, label %cond.true78, label %cond.false79 - -cond.true78: ; preds = %cond.end75 - %66 = load i32, i32* %validXmin, align 4 - br label %cond.end80 - -cond.false79: ; preds = %cond.end75 - %67 = load i32, i32* %W, align 4 - br label %cond.end80 - -cond.end80: ; preds = %cond.false79, %cond.true78 - %cond81 = phi i32 [ %66, %cond.true78 ], [ %67, %cond.false79 ] - store i32 %cond81, i32* %W, align 4 - %68 = load i32, i32* %E, align 4 - %69 = load i32, i32* %validXmax, align 4 - %cmp82 = icmp sgt i32 %68, %69 - br i1 %cmp82, label %cond.true83, label %cond.false84 - -cond.true83: ; preds = %cond.end80 - %70 = load i32, i32* %validXmax, align 4 - br label %cond.end85 - -cond.false84: ; preds = %cond.end80 - %71 = load i32, i32* %E, align 4 - br label %cond.end85 - -cond.end85: ; preds = %cond.false84, %cond.true83 - %cond86 = phi i32 [ %70, %cond.true83 ], [ %71, %cond.false84 ] - store i32 %cond86, i32* %E, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %cond.end85 - %72 = load i32, i32* %i, align 4 - %73 = load i32, i32* %iteration.addr, align 4 - %cmp87 = icmp slt i32 %72, %73 - br i1 %cmp87, label %for.body, label %for.end - -for.body: ; preds = %for.cond - store i8 0, i8* %computed, align 1 - %74 = load i32, i32* %tx, align 4 - %75 = load i32, i32* %i, align 4 - %add88 = add nsw i32 %75, 1 - %cmp89 = icmp sge i32 %74, %add88 - br i1 %cmp89, label %land.lhs.true90, label %if.end175 - -land.lhs.true90: ; preds = %for.body - %76 = load i32, i32* %tx, align 4 - %77 = load i32, i32* %i, align 4 - %sub91 = sub nsw i32 16, %77 - %sub92 = sub nsw i32 %sub91, 2 - %cmp93 = icmp sle i32 %76, %sub92 - br i1 %cmp93, label %land.lhs.true94, label %if.end175 - -land.lhs.true94: ; preds = %land.lhs.true90 - %78 = load i32, i32* %ty, align 4 - %79 = load i32, i32* %i, align 4 - %add95 = add nsw i32 %79, 1 - %cmp96 = icmp sge i32 %78, %add95 - br i1 %cmp96, label %land.lhs.true97, label %if.end175 - -land.lhs.true97: ; preds = %land.lhs.true94 - %80 = load i32, i32* %ty, align 4 - %81 = load i32, i32* %i, align 4 - %sub98 = sub nsw i32 16, %81 - %sub99 = sub nsw i32 %sub98, 2 - %cmp100 = icmp sle i32 %80, %sub99 - br i1 %cmp100, label %land.lhs.true101, label %if.end175 - -land.lhs.true101: ; preds = %land.lhs.true97 - %82 = load i32, i32* %tx, align 4 - %83 = load i32, i32* %validXmin, align 4 - %cmp102 = icmp sge i32 %82, %83 - br i1 %cmp102, label %land.lhs.true103, label %if.end175 - -land.lhs.true103: ; preds = %land.lhs.true101 - %84 = load i32, i32* %tx, align 4 - %85 = load i32, i32* %validXmax, align 4 - %cmp104 = icmp sle i32 %84, %85 - br i1 %cmp104, label %land.lhs.true105, label %if.end175 - -land.lhs.true105: ; preds = %land.lhs.true103 - %86 = load i32, i32* %ty, align 4 - %87 = load i32, i32* %validYmin, align 4 - %cmp106 = icmp sge i32 %86, %87 - br i1 %cmp106, label %land.lhs.true107, label %if.end175 - -land.lhs.true107: ; preds = %land.lhs.true105 - %88 = load i32, i32* %ty, align 4 - %89 = load i32, i32* %validYmax, align 4 - %cmp108 = icmp sle i32 %88, %89 - br i1 %cmp108, label %if.then109, label %if.end175 - -if.then109: ; preds = %land.lhs.true107 - store i8 1, i8* %computed, align 1 - %90 = load i32, i32* %ty, align 4 - %idxprom110 = sext i32 %90 to i64 - %arrayidx111 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom110 - %91 = load i32, i32* %tx, align 4 - %idxprom112 = sext i32 %91 to i64 - %arrayidx113 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx111, i64 0, i64 %idxprom112 - %92 = load float, float* %arrayidx113, align 4 - %conv = fpext float %92 to double - %93 = load float, float* %step_div_Cap, align 4 - %conv114 = fpext float %93 to double - %94 = load i32, i32* %ty, align 4 - %idxprom115 = sext i32 %94 to i64 - %arrayidx116 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE13power_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom115 - %95 = load i32, i32* %tx, align 4 - %idxprom117 = sext i32 %95 to i64 - %arrayidx118 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx116, i64 0, i64 %idxprom117 - %96 = load float, float* %arrayidx118, align 4 - %conv119 = fpext float %96 to double - %97 = load i32, i32* %S, align 4 - %idxprom120 = sext i32 %97 to i64 - %arrayidx121 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom120 - %98 = load i32, i32* %tx, align 4 - %idxprom122 = sext i32 %98 to i64 - %arrayidx123 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx121, i64 0, i64 %idxprom122 - %99 = load float, float* %arrayidx123, align 4 - %100 = load i32, i32* %N, align 4 - %idxprom124 = sext i32 %100 to i64 - %arrayidx125 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom124 - %101 = load i32, i32* %tx, align 4 - %idxprom126 = sext i32 %101 to i64 - %arrayidx127 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx125, i64 0, i64 %idxprom126 - %102 = load float, float* %arrayidx127, align 4 - %add128 = fadd contract float %99, %102 - %conv129 = fpext float %add128 to double - %103 = load i32, i32* %ty, align 4 - %idxprom130 = sext i32 %103 to i64 - %arrayidx131 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom130 - %104 = load i32, i32* %tx, align 4 - %idxprom132 = sext i32 %104 to i64 - %arrayidx133 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx131, i64 0, i64 %idxprom132 - %105 = load float, float* %arrayidx133, align 4 - %conv134 = fpext float %105 to double - %mul135 = fmul contract double 2.000000e+00, %conv134 - %sub136 = fsub contract double %conv129, %mul135 - %106 = load float, float* %Ry_1, align 4 - %conv137 = fpext float %106 to double - %mul138 = fmul contract double %sub136, %conv137 - %add139 = fadd contract double %conv119, %mul138 - %107 = load i32, i32* %ty, align 4 - %idxprom140 = sext i32 %107 to i64 - %arrayidx141 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom140 - %108 = load i32, i32* %E, align 4 - %idxprom142 = sext i32 %108 to i64 - %arrayidx143 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx141, i64 0, i64 %idxprom142 - %109 = load float, float* %arrayidx143, align 4 - %110 = load i32, i32* %ty, align 4 - %idxprom144 = sext i32 %110 to i64 - %arrayidx145 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom144 - %111 = load i32, i32* %W, align 4 - %idxprom146 = sext i32 %111 to i64 - %arrayidx147 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx145, i64 0, i64 %idxprom146 - %112 = load float, float* %arrayidx147, align 4 - %add148 = fadd contract float %109, %112 - %conv149 = fpext float %add148 to double - %113 = load i32, i32* %ty, align 4 - %idxprom150 = sext i32 %113 to i64 - %arrayidx151 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom150 - %114 = load i32, i32* %tx, align 4 - %idxprom152 = sext i32 %114 to i64 - %arrayidx153 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx151, i64 0, i64 %idxprom152 - %115 = load float, float* %arrayidx153, align 4 - %conv154 = fpext float %115 to double - %mul155 = fmul contract double 2.000000e+00, %conv154 - %sub156 = fsub contract double %conv149, %mul155 - %116 = load float, float* %Rx_1, align 4 - %conv157 = fpext float %116 to double - %mul158 = fmul contract double %sub156, %conv157 - %add159 = fadd contract double %add139, %mul158 - %117 = load float, float* %amb_temp, align 4 - %118 = load i32, i32* %ty, align 4 - %idxprom160 = sext i32 %118 to i64 - %arrayidx161 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom160 - %119 = load i32, i32* %tx, align 4 - %idxprom162 = sext i32 %119 to i64 - %arrayidx163 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx161, i64 0, i64 %idxprom162 - %120 = load float, float* %arrayidx163, align 4 - %sub164 = fsub contract float %117, %120 - %121 = load float, float* %Rz_1, align 4 - %mul165 = fmul contract float %sub164, %121 - %conv166 = fpext float %mul165 to double - %add167 = fadd contract double %add159, %conv166 - %mul168 = fmul contract double %conv114, %add167 - %add169 = fadd contract double %conv, %mul168 - %conv170 = fptrunc double %add169 to float - %122 = load i32, i32* %ty, align 4 - %idxprom171 = sext i32 %122 to i64 - %arrayidx172 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE6temp_t to [16 x [16 x float]]*), i64 0, i64 %idxprom171 - %123 = load i32, i32* %tx, align 4 - %idxprom173 = sext i32 %123 to i64 - %arrayidx174 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx172, i64 0, i64 %idxprom173 - store float %conv170, float* %arrayidx174, align 4 - br label %if.end175 - -if.end175: ; preds = %if.then109, %land.lhs.true107, %land.lhs.true105, %land.lhs.true103, %land.lhs.true101, %land.lhs.true97, %land.lhs.true94, %land.lhs.true90, %for.body - call void @llvm.nvvm.barrier0() - %124 = load i32, i32* %i, align 4 - %125 = load i32, i32* %iteration.addr, align 4 - %sub176 = sub nsw i32 %125, 1 - %cmp177 = icmp eq i32 %124, %sub176 - br i1 %cmp177, label %if.then178, label %if.end179 - -if.then178: ; preds = %if.end175 - br label %for.end - -if.end179: ; preds = %if.end175 - %126 = load i8, i8* %computed, align 1 - %tobool = trunc i8 %126 to i1 - br i1 %tobool, label %if.then180, label %if.end189 - -if.then180: ; preds = %if.end179 - %127 = load i32, i32* %ty, align 4 - %idxprom181 = sext i32 %127 to i64 - %arrayidx182 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE6temp_t to [16 x [16 x float]]*), i64 0, i64 %idxprom181 - %128 = load i32, i32* %tx, align 4 - %idxprom183 = sext i32 %128 to i64 - %arrayidx184 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx182, i64 0, i64 %idxprom183 - %129 = load float, float* %arrayidx184, align 4 - %130 = load i32, i32* %ty, align 4 - %idxprom185 = sext i32 %130 to i64 - %arrayidx186 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda to [16 x [16 x float]]*), i64 0, i64 %idxprom185 - %131 = load i32, i32* %tx, align 4 - %idxprom187 = sext i32 %131 to i64 - %arrayidx188 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx186, i64 0, i64 %idxprom187 - store float %129, float* %arrayidx188, align 4 - br label %if.end189 - -if.end189: ; preds = %if.then180, %if.end179 - call void @llvm.nvvm.barrier0() - br label %for.inc - -for.inc: ; preds = %if.end189 - %132 = load i32, i32* %i, align 4 - %inc = add nsw i32 %132, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %if.then178, %for.cond - %133 = load i8, i8* %computed, align 1 - %tobool190 = trunc i8 %133 to i1 - br i1 %tobool190, label %if.then191, label %if.end198 - -if.then191: ; preds = %for.end - %134 = load i32, i32* %ty, align 4 - %idxprom192 = sext i32 %134 to i64 - %arrayidx193 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ14calculate_tempiPfS_S_iiiiffffffE6temp_t to [16 x [16 x float]]*), i64 0, i64 %idxprom192 - %135 = load i32, i32* %tx, align 4 - %idxprom194 = sext i32 %135 to i64 - %arrayidx195 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx193, i64 0, i64 %idxprom194 - %136 = load float, float* %arrayidx195, align 4 - %137 = load float*, float** %temp_dst.addr, align 8 - %138 = load i32, i32* %index, align 4 - %idxprom196 = sext i32 %138 to i64 - %arrayidx197 = getelementptr inbounds float, float* %137, i64 %idxprom196 - store float %136, float* %arrayidx197, align 4 - br label %if.end198 - -if.end198: ; preds = %if.then191, %for.end - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() - ret i32 %0 -} - -; Function Attrs: convergent nounwind -declare void @llvm.nvvm.barrier0() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() #3 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind } -attributes #3 = { nounwind readnone } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !4, !6, !6, !6, !6, !7, !7, !6} -!llvm.ident = !{!8} -!nvvmir.version = !{!9} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (i32, float*, float*, float*, i32, i32, i32, i32, float, float, float, float, float, float)* @_Z14calculate_tempiPfS_S_iiiiffffff, !"kernel", i32 1} -!4 = !{null, !"align", i32 8} -!5 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!6 = !{null, !"align", i32 16} -!7 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!8 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!9 = !{i32 1, i32 4} diff --git a/examples/hotspot/hotspot-host-x86_64-unknown-linux-gnu.ll b/examples/hotspot/hotspot-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 76aac61..0000000 --- a/examples/hotspot/hotspot-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,1022 +0,0 @@ -; ModuleID = 'hotspot-host-x86_64-unknown-linux-gnu.bc' -source_filename = "hotspot.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque - -$_ZN4dim3C2Ejjj = comdat any - -@t_chip = dso_local global float 0x3F40624DE0000000, align 4 -@chip_height = dso_local global float 0x3F90624DE0000000, align 4 -@chip_width = dso_local global float 0x3F90624DE0000000, align 4 -@amb_temp = dso_local global float 8.000000e+01, align 4 -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str = private unnamed_addr constant [11 x i8] c"error: %s\0A\00", align 1 -@.str.1 = private unnamed_addr constant [2 x i8] c"w\00", align 1 -@.str.2 = private unnamed_addr constant [25 x i8] c"The file was not opened\0A\00", align 1 -@.str.3 = private unnamed_addr constant [7 x i8] c"%d\09%g\0A\00", align 1 -@.str.4 = private unnamed_addr constant [2 x i8] c"r\00", align 1 -@.str.5 = private unnamed_addr constant [25 x i8] c"not enough lines in file\00", align 1 -@.str.6 = private unnamed_addr constant [3 x i8] c"%f\00", align 1 -@.str.7 = private unnamed_addr constant [20 x i8] c"invalid file format\00", align 1 -@.str.8 = private unnamed_addr constant [100 x i8] c"Usage: %s \0A\00", align 1 -@.str.9 = private unnamed_addr constant [78 x i8] c"\09 - number of rows/cols in the grid (positive integer)\0A\00", align 1 -@.str.10 = private unnamed_addr constant [53 x i8] c"\09 - pyramid heigh(positive integer)\0A\00", align 1 -@.str.11 = private unnamed_addr constant [38 x i8] c"\09 - number of iterations\0A\00", align 1 -@.str.12 = private unnamed_addr constant [89 x i8] c"\09 - name of the file containing the initial temperature values of each cell\0A\00", align 1 -@.str.13 = private unnamed_addr constant [86 x i8] c"\09 - name of the file containing the dissipated power values of each cell\0A\00", align 1 -@.str.14 = private unnamed_addr constant [42 x i8] c"\09 - name of the output file\0A\00", align 1 -@.str.15 = private unnamed_addr constant [29 x i8] c"WG size of kernel = %d X %d\0A\00", align 1 -@.str.16 = private unnamed_addr constant [26 x i8] c"unable to allocate memory\00", align 1 -@.str.17 = private unnamed_addr constant [94 x i8] c"pyramidHeight: %d\0AgridSize: [%d, %d]\0Aborder:[%d, %d]\0AblockGrid:[%d, %d]\0AtargetBlock:[%d, %d]\0A\00", align 1 -@.str.18 = private unnamed_addr constant [43 x i8] c"Start computing the transient temperature\0A\00", align 1 -@.str.19 = private unnamed_addr constant [19 x i8] c"Ending simulation\0A\00", align 1 -@0 = private unnamed_addr constant [36 x i8] c"_Z14calculate_tempiPfS_S_iiiiffffff\00", align 1 -@1 = private constant [35409 x i8] c"P\EDU\BA\01\00\10\00@\8A\00\00\00\00\00\00\02\00\01\01@\00\00\00\A8v\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\00v\00\00\00\00\00\00\80s\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0A\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.info._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.shared._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.global\00.nv.constant0._Z14calculate_tempiPfS_S_iiiiffffff\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z14calculate_tempiPfS_S_iiiiffffff\00.text._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.info._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.shared._Z14calculate_tempiPfS_S_iiiiffffff\00.nv.global\00blockIdx\00threadIdx\00$_Z14calculate_tempiPfS_S_iiiiffffff$__cuda_sm20_rcp_rn_f32_slowpath\00$_Z14calculate_tempiPfS_S_iiiiffffff$__cuda_sm3x_div_rn_noftz_f32\00$_Z14calculate_tempiPfS_S_iiiiffffff$__cuda_sm3x_div_rn_noftz_f32_slowpath\00$___ZZ14calculate_tempiPfS_S_iiiiffffffE12temp_on_cuda__196\00$___ZZ14calculate_tempiPfS_S_iiiiffffffE13power_on_cuda__198\00$___ZZ14calculate_tempiPfS_S_iiiiffffffE6temp_t__200\00.nv.constant0._Z14calculate_tempiPfS_S_iiiiffffff\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00V\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\AD\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\DC\00\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\E7\00\00\00\01\00\09\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\F0\00\00\00\01\00\09\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\FA\00\00\00\22\00\07\00`]\00\00\00\00\00\00 \04\00\00\00\00\00\00?\01\00\00\22\00\07\00\80a\00\00\00\00\00\00`\01\00\00\00\00\00\00\81\01\00\00\22\00\07\00\E0b\00\00\00\00\00\00`\08\00\00\00\00\00\00z\02\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00@k\00\00\00\00\00\00\04/\08\00\0A\00\00\00\17\00\00\00\04#\08\00\08\00\00\00\00\00\00\00\04\12\08\00\08\00\00\00\00\00\00\00\04\11\08\00\08\00\00\00\00\00\00\00\04#\08\00\07\00\00\00\00\00\00\00\04\12\08\00\07\00\00\00\00\00\00\00\04\11\08\00\07\00\00\00\00\00\00\00\04#\08\00\06\00\00\00\00\00\00\00\04\12\08\00\06\00\00\00\00\00\00\00\04\11\08\00\06\00\00\00\00\00\00\00\04#\08\00\0A\00\00\00\00\00\00\00\04\12\08\00\0A\00\00\00\C0\00\00\00\04\11\08\00\0A\00\00\00\C0\00\00\00\010\00\00\01*\00\00\04\0A\08\00\09\00\00\00@\01H\00\03\19H\00\04\17\0C\00\00\00\00\00\0D\00D\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0C\00@\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0B\00<\00\00\F0\11\00\04\17\0C\00\00\00\00\00\0A\008\00\00\F0\11\00\04\17\0C\00\00\00\00\00\09\004\00\00\F0\11\00\04\17\0C\00\00\00\00\00\08\000\00\00\F0\11\00\04\17\0C\00\00\00\00\00\07\00,\00\00\F0\11\00\04\17\0C\00\00\00\00\00\06\00(\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00$\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0\11\00\03\1B\FF\00\04\1D\08\00h\09\00\00\D8\09\00\00\04\1C\04\00X]\00\00\04\1E\04\00\B0\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F1\1Evisible .entry _Z14calculate_tempiPfS_S_iiiif\01\00\06\AC\04\00x\00\0F1\00\10\0E\99\04\00\DF\00\0F9\00\17\1F19\00%\1F29\00%/3,\E4\00$\1F49\00%\1F59\00%\1F69\00%\1679\00\1Ff9\00\1A\1F89\00%\1F99\00%/10:\00&\1F1:\00&\1F2:\00&\0F\9A\0A\14O6[19\9B\0A\16\A6pred %p<25U\06\8516 %rs<7\12\00\00\A8\00k%f<36>\8A\06'14%\00\00Z\00]fd<15\9F\06 88\A0\06P\09.shao\00\03\BB\00\124\BB\00\1FZ\01\01\0F0E12\18\00@_on_G\05o[1024]R\00,o3powerS\009\116\A4\00\14tK\00\0F\8F\07\08\1F6\8F\07\12\02v\01O6, [$\02\19\1D]B\00\1F5B\00\1B\1E2B\00\1F4B\00\1B\1E1B\00\1F3B\00\1B\1E0B\00\1F2B\00\1A\1E9A\00\1F1A\00\1A\1E8p\08\1F2I\01\1B\1F7B\00\00\0FI\01\1B\1F6B\00\00\0FI\01\1B\1F5B\00\00\0FI\01\1B\0F\CD\09\01\0F\84\00\1B\1F3f\09\00\0F\84\00\1B\0F\A8\09\01\0F\CE\01\1B\1F1\08\01\00\0FB\00\1B#0]\B4\03#to\8C\19\04\8A\00\144H\09\01\1F\00\0A\1C\00\115\1C\00\1F4;\00\05\146\B0\09\0F;\00\00\117\1C\00\1F6;\00\05\148\01\0A\0F;\00\00\119\1C\00\1A8\DA\09\03y\0E\0F3\0A\03\1A9\16\00\033\0A/d74\0A\03\1F54\0A\03\0Bx\0A\123E\00\1B2b\0A\134\89\00\1B4\17\00\02\\\00\122\\\00\15f\17\00\01\A1\00*f1\16\00\115r\00+f2\16\00\01q\00*f3\16\00\116p\00+f4\16\00\01o\00\1Bfn\00\126n\00\196\D4\0F\DA6, 1117782016\B5\00\137\FA\00\0A1\00\00\BA\01zctaid.x/\00\139/\00\197/\00\158/\00\1By/\00\03A\01\198/\00\00\DD\01\1Et\\\00#10X\01\199\BB\0B\130.\00\1Cy.\00\02o\01!30\18\03\02!\05%7,\1C\01\07\16\00%8,\8A\01\83;\0Adiv.rn\1A\00\229,5\00:%f8G\01\127\89\01\179F\00510,\BB\01V;\0ArcpG\00!11\E2\01\1A0D\00\128\B7\01(11E\00%2,\EA\01\0CE\00\113.\00\0B\12\02\128\E6\01(13E\00%4,\19\02\0CE\00!5,!\00\0BE\00\03\83\02\115E\00\03X\01$1,V\03S;\0Ashl\10\09332,\1D\00\09\E6\15A33, ;\02Bub.s\12\00#4,\18\00\005\00\0C\9C\01\02\95\03(34q\00\1F5q\00\03#6,\1D\00\191_\00\177_\00\0B\B4\02\131\07\18(37_\00&8,}\00\08\18\00%9,\8C\02\83;\0Amul.lod\00340,9\00\00'\00\074\00541,\D3\03\08\95\00542,7\00\1C4\9B\18\03r\04(42I\00&3,\B3\00\08\18\00%4,P\03\0C\95\00#5,9\00\00'\00\084\00%6,\7F\04\09\95\00&7,7\00\0C*\01\122S\03\184*\01648,\B3\00T;\0AaddJ\00#9,\1F\00,15H\00\02m\03\184&\01650,f\00\08H\00351,\1F\00\0EH\00\02\19\02(51H\00\192\90\00\06\18\00&3,\EB\03\09`\00#4,7\00\00%\00\0Bb\00\03\CB\05\185;\01)55\AA\00\06\18\00&6,{\04\09\C2\00#7,7\00\00%\00\0Cb\00\02\E9\01\185T\01658,\80\00\0B0\00\03/\06(580\00&9,N\00\0C0\00\02l\01\185l\01\146l\01\193H\02661,e\00\0BI\02362,9\00\00(\00\085\00\05X\01\1A4X\01664,8\00\0CX\01\02\C7\06(r6X\01)65\7F\00rsetp.ltN\003p1,\22\00\F2\0C0;\0A@%p1 bra LBB6_5;\0Abra.uni\10\0021;\0A\08\00\17:[\00\196[\00\06\18\00\04\E3\05\1A3\BC\02368,\1E\00#-1\8A\00\14g\8A\00#2,Q\00\00'\00\01\8D\00\1F2\8D\00\07\132\8D\00\182\8D\00\1992\01\0A\E8\00#3,\22\00\02\E8\00\1F3[\00\07\133[\00\173[\00)70[\00\06\18\00\181\F1\01\06\E8\00372,\1E\00\0E\E8\00#4,Q\00\00'\00\01\8D\00\1F4\8D\00\07\134\8D\00\124\8D\00\03\93\0A\05\8E\00\03\8A\03\02u\00&d1h\02\148\BE\05\032\00$2,!\00\132\A8\00\03\19\00$3,R\00\01'\00\08\C0\06\01\DC\01\00#\00\0Ae\00\194\F0\03\08e\00$5,!\00\176\96\0E rdJ\00\0F~\0F \03s\0A\02'\0F\05L\00\02\AF\0A*16\C7\00(8,\1D\00\195\B1\00\199?\04\07\B1\00\132\F6\00\1C9\16\01\00\B3\0B\06V\00\1822\08\00\1D\00\02\A7\07'6;\93\01\142\1E\05\198-\01/23\92\01\05\00\A2\0D\03!\00\0B|\00$5,Q\00\01'\00\08\92\01\227,\82\00\1A5e\00\196\92\01\07y\01\00\22\0A\0F\A5\10!\0Fz\01\02\132\0E\01\1A7\C8\00$9,\84\00\0A\93\01\01\12\0A\066\00\189\B2\00/31\93\01\05\02\D9\08-d3\A9\02\02d\08\04V\00)32\93\01\2233\93\01\1B7>\03\135>\03\D85:\0Abar.sync 0\BF\03\193\D4\06\0A\A8\03#5,\22\00!-1\A6\03\165\A6\03\0Ch\00\136h\00\1863\04\195\\\00Tneg.sN\18\00\1E\00\09\9E\1F#39:\17\09W\00\138W\00'7:,\00!74\C4\04\0D=\00/74>\00\04*8:9\1C\012\00\0B7\06\155\8E\17\06\1C\01\05V\02\1A2\F3\04\0F\DB\05\04\117\E9\01\187\DB\05#leK\01#6,Q\00\00'\00\01M\01\166M\01+10\B9\00\139\B9\00\179N\01/80\8E\00\03(81\8E\00\06o\09382,\1E\00\00<\00\08\A8\00#3,\1F\00)14F\01\03g\0A\0C\F5\06\05\F6\06\190\85\01\1296\09\0D@\00/79A\00\06\181A\00\224,4\00\0C\88\01\03\9E\0A\08\E2\00\193\16\09\0A\A4\02\01\F0\04\168\A4\02\177W\01\0D\CB\00\044\07\1815\07\1A8u\09\06\A7\02\01\FF\02\1B8\E4\00\023\0A\0CY\00\04\A5\06(13\E3\00\1D8\A9\02\02?\00/84@\00\06\184@\00\01/\02\1D1\C1\0B\02g\0F(r6#\01\06\AB\02\09U\0C/87\9E\07\03\118\AB\02\1F8\AB\02\02#8,Q\00\00'\00\01T\01\178T\01\1C6\BB\00\04b\04'15T\01/90\90\00\03\199.\08\06\AD\02392,\1E\00\00<\00\09U\03\01\13\00\0F\AD\02\00\02`\08\0C\8E\00\147\8E\00\196\89\01\0F\AD\02\00\02\1F\03\1F9A\00\06\09s\04\228,4\00\0C\8A\01\02\02\0B\081\0B\1F9\0D\0C\05394,\1F\00\1D-\D1\01\02\B3\0A\189\F5\02/95H\00\05#6,\1F\00\0CG\00\03#\11\189\19\02/97:\0C\04398,\1F\00\0D\8F\00\02T\10)r9\D7\00\1F9H\00\04C100, \00\0C\90\00\138P%\180q\06%10\FA\06\196\1B\02F102,\8C\05\04\E7\03\04\95\02$9,<\00\01*\00\01\97\02\179\97\02\0D\C8\01\04\FC\05(18\97\02\09b\00\08\0E\02\01\14\05\0CE\00$20E\00\08\89\05\07|\0A\1F8F\00\01/10G\00\05\192B\05\01\8D\11\005\00\0F\0E\02\03)11\1D\01\05W\02*726\01&4,:\05\0C\CB\03\02\D5\0A#10\98\00\124\E1\0C\1608\01,22\AC\00\04\AD\05\182\E5\0C)12e\00\08\F3\00\02\C7\05\0DG\00\04C\0C\182j\05\1A1\C5\00\0EG\00\1F3G\00\06\09X\05\131<\06\1F4\01\03\03)14!\01\05I\03*76:\01&6,Q\05\0Cp\02\03\9D\01\2205\07\00\126:\01\07\1C\0E\1D2\90\0D\04\C4\05(24\F3\00\195e\00\08\F3\00\02[\00\0DG\00\04J\09\182P\05)16\C5\00\0EG\00\1F6G\00\06\09\09\05\02\1F\0C?145\F3\03\03)17!\01\05<\04*80:\01&8,\01\05\0Dt\02$2,=\00\01+\00\02:\01\07\C9\0E,28\AC\00\04t\05(27\F3\00\0Ae\00\08\F3\00\02\18\07\0DG\00\046\09\182\F3\03)19\C5\00\0EG\00\1F9G\00\06\1997\0A\02K\03/46\E5\04\03\1A2\E8\07\120g\0F\0A*\00\03\D8\03\1C0r\00\143 \04(30\B9\00\06h\04\1A8)\05\05\E5\0E\0F\B7\02\00$3,;\00\01)\00\02}\01\07\EB\0F,46}\00\04\F1\03\143\9E\09\02\93\1E\151\C1\00\148\86\00\02\ED\06\19s\98\04)12\18\06\07\A5\00\193\BE\00\071\06\03\BE\03\00!\00\1D1\8C\11$4,V\00\01*\00\02\C0\00\07\1E\10,40\C0\00\04\D4\09\193j\04/15\98\00\05\196\98\00\08\8F\05\127e\08\06\96\08\121\FF\0E\1316\03,16}\0A\02\F0\0F\141\07\00\03\EC\02\1F5\AF\00\09\05\\\11\193\AF\00\199\EE\07\07y\05\1F0G\01\05\02\A2\0F\01!\00\0EG\01$6,V\00\01*\00\02\98\00\1F6\98\00\09\04\BE\04\193\BE\04/22\98\00\05\1F3G\01\05-24G\01\02\D1\0F#12,\06\1D3G\01$7,m\00\01.\00\02\AF\00\1F7\AF\00\09\04v\0A\193&\05/26\F6\01\04.27\EC\05\06\01\14$8,=\00\01+\00\02\7F\00\1F8\7F\00\09\04^\05)36\7F\00\1F8\7F\00\05\1E91\05\06\FE\00$9,=\00\01+\00\02\7F\00\1F9\7F\00\09\041\05\1931\05/30\AD\01\04.31Z\09\05\FE\00\03\D9\04\133\07\00\02,\02/20\7F\00\09\04\\\09\193i\05/32\7F\00\05\1E3\A3\08\06\F4\14$1,=\00\01+\00\02\7F\00\1F1\7F\00\09\04\A1\05)39\B2\04\00$\0C\0F\B2\04\03\192z\11\1F4,\12\05/35\A5\137\02.\1A:d35\DC\11$7,\83\00\0B+\12(8,6\00\197\B1\00\0F\BE\13\06\02q\0F-d3\BE\13\02g\0E\04V\00(40B\13\128B\13\2241f\1F\00L$\03\1B\00\12d\1E\1C\188.\00%9,h\1C\0D/\00!2,\22\00\09D\01/42p\138\02P\0B+d4\DD\00'4,\1D\00*37\1D\00\03\07 \1E4\FA\00#20\FA\00\1E5\CB\00\01\D3\1C(20w\01)46\AD\0A\08w\01\02\C6\01\1D4\F1\13\194\C6\01\1B4\9A\00'9,$\00\0A\9A\00\131\9A\00\199\D6\14\1A5i\0C\07;\15\02\B9\1A\01!\00\0A\82\00(52\82\00*51\1D\00'3,$\00\0A\82\00\122\82\00#53e\06\05\A9\1D\00\E9\05\02\A2\00\00&\00\0D9\01\114\18\00a3;\0Afma5\00\02\9A,P\02J0dC0\01\00\01\1A\00\08\D4\15\01\AA#\03C\1E\0D`\00!6,\22\00\0C`\00&7,f\00\01#\00)d3:\01\194\F5\09\08:\01$5,!\00\0A3\03(563\03(55\A1\00\135\1D\01\0Al\18)57\94\0B\08e\00\03E\00\1D7e\00\199e\00\08j\03#26e\00\1D9\82\01#7,\85\00\00&\00\0D\22\01\118\18\00\1C7\22\01\1F9\82\01\0C\09|\00%8,\0A \0E\15\04\010\00,28a\00\02L \04h\00\02U\00\187U\00\05;\04\04\EB\11\05\D2\00\01l\06\01!\00\0Bo\04\05\A9\06#88&\1C\064\00%2,:\00.31\A6\00\02\1F\00\03o\01\07\A6\00'3,\AD\00-12\C6\00%4,\CB\04\05-\00\02^\00\02{\00\03)\00\133\1B\00\194\EE\04\00\B5\1C\0F\B0(\19\0F\E6\04\02\126\CD\03)60+\02\02\DE\1C\04\1D\00\0B\E6\04\00\1A\1D\06$\00\1A4\9E\19\126\0B\18\1C3M\0F\144g\0C/40\0D\18\04?134s\0C\04\09\14!\07\FD\11\02i\04\01\1F\00\05I\14\14n\8D\0C\03\D0\09\133\DC\0F\02D\0F\172\0A\0E,42\A3\00\04\8D\0C;41:\1A\00\04r\09\134\E7\0B\108\D9\07\06\85\0A\01$\13$ndQ+\01s\00\17sb\0C\22eq\1B\00\11p\D0\04\01!\00\00\EA\08\10!\11\00\07\91\00\0C\99\15$43w\00'3:\FB\03\1F6\02\1C\06\00H\1E\03!\00\0A\02\1C/66+\021\03\AA&\0A\F5\05\02S\1E\05\1D\00\09\FB\1B\1F6=\08\06\02\80\1D-d6=\08\00\B4\1D\06V\00\187'\06\02 \02\00#\00\0A:\09/72:\097\2273x\00\0B\95\00'4,\1D\00*65\1D\00(5,$\00\0A\0F\03\2275\0F\03\0D\DB\01\05Y\1E\1A4\0F\03\0A&\00\04i\0C\194i\0C/37\97\0D\05\02\DC\05\01!\00\1F1\01\10\04/38\01\10\05'46\DD\02\1F5\DD\02\05#6,\1F\00\0E\DD\02\02\F8\02\166\DD\02\08;\0F\0Cw\00$47w\00\187\DD\02/76\DD\02\05\00\DB\1A\03!\00\0A\DD\02/78\DD\020\2279^\00\198\DC\01\00\F7\1A\07\1D\00\09\1A\0B\1F8E\1D\06\02\FD\1A-d8E\1D\00*\1A\06V\00\188\EE\1F\133\E7\07883]\D8\1E\00\C5\19\04\93)\09}\00\1F5\D9\1E\05\02<\19\01!\00\0B}\00$7,R\00\01'\00\09\A8\02\2287\A8\02\0D\00!\04n\0D\C048:\0Aret;\0A\0A}\0A\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([35409 x i8], [35409 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z5fatalPc(i8* %s) #0 { -entry: - %s.addr = alloca i8*, align 8 - store i8* %s, i8** %s.addr, align 8 - %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %1 = load i8*, i8** %s.addr, align 8 - %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0), i8* %1) - ret void -} - -declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z11writeoutputPfiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i8* %file) #0 { -entry: - %vect.addr = alloca float*, align 8 - %grid_rows.addr = alloca i32, align 4 - %grid_cols.addr = alloca i32, align 4 - %file.addr = alloca i8*, align 8 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %index = alloca i32, align 4 - %fp = alloca %struct._IO_FILE*, align 8 - %str = alloca [256 x i8], align 16 - store float* %vect, float** %vect.addr, align 8 - store i32 %grid_rows, i32* %grid_rows.addr, align 4 - store i32 %grid_cols, i32* %grid_cols.addr, align 4 - store i8* %file, i8** %file.addr, align 8 - store i32 0, i32* %index, align 4 - %0 = load i8*, i8** %file.addr, align 8 - %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i64 0, i64 0)) - store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 - %cmp = icmp eq %struct._IO_FILE* %call, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.2, i64 0, i64 0)) - br label %if.end - -if.end: ; preds = %if.then, %entry - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc10, %if.end - %1 = load i32, i32* %i, align 4 - %2 = load i32, i32* %grid_rows.addr, align 4 - %cmp2 = icmp slt i32 %1, %2 - br i1 %cmp2, label %for.body, label %for.end12 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond3 - -for.cond3: ; preds = %for.inc, %for.body - %3 = load i32, i32* %j, align 4 - %4 = load i32, i32* %grid_cols.addr, align 4 - %cmp4 = icmp slt i32 %3, %4 - br i1 %cmp4, label %for.body5, label %for.end - -for.body5: ; preds = %for.cond3 - %arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 - %5 = load i32, i32* %index, align 4 - %6 = load float*, float** %vect.addr, align 8 - %7 = load i32, i32* %i, align 4 - %8 = load i32, i32* %grid_cols.addr, align 4 - %mul = mul nsw i32 %7, %8 - %9 = load i32, i32* %j, align 4 - %add = add nsw i32 %mul, %9 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds float, float* %6, i64 %idxprom - %10 = load float, float* %arrayidx, align 4 - %conv = fpext float %10 to double - %call6 = call i32 (i8*, i8*, ...) @sprintf(i8* %arraydecay, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.3, i64 0, i64 0), i32 %5, double %conv) #8 - %arraydecay7 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 - %11 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call8 = call i32 @fputs(i8* %arraydecay7, %struct._IO_FILE* %11) - %12 = load i32, i32* %index, align 4 - %inc = add nsw i32 %12, 1 - store i32 %inc, i32* %index, align 4 - br label %for.inc - -for.inc: ; preds = %for.body5 - %13 = load i32, i32* %j, align 4 - %inc9 = add nsw i32 %13, 1 - store i32 %inc9, i32* %j, align 4 - br label %for.cond3 - -for.end: ; preds = %for.cond3 - br label %for.inc10 - -for.inc10: ; preds = %for.end - %14 = load i32, i32* %i, align 4 - %inc11 = add nsw i32 %14, 1 - store i32 %inc11, i32* %i, align 4 - br label %for.cond - -for.end12: ; preds = %for.cond - %15 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call13 = call i32 @fclose(%struct._IO_FILE* %15) - ret void -} - -declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #1 - -declare dso_local i32 @printf(i8*, ...) #1 - -; Function Attrs: nounwind -declare dso_local i32 @sprintf(i8*, i8*, ...) #2 - -declare dso_local i32 @fputs(i8*, %struct._IO_FILE*) #1 - -declare dso_local i32 @fclose(%struct._IO_FILE*) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z9readinputPfiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i8* %file) #0 { -entry: - %vect.addr = alloca float*, align 8 - %grid_rows.addr = alloca i32, align 4 - %grid_cols.addr = alloca i32, align 4 - %file.addr = alloca i8*, align 8 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %fp = alloca %struct._IO_FILE*, align 8 - %str = alloca [256 x i8], align 16 - %val = alloca float, align 4 - store float* %vect, float** %vect.addr, align 8 - store i32 %grid_rows, i32* %grid_rows.addr, align 4 - store i32 %grid_cols, i32* %grid_cols.addr, align 4 - store i8* %file, i8** %file.addr, align 8 - %0 = load i8*, i8** %file.addr, align 8 - %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i64 0, i64 0)) - store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 - %cmp = icmp eq %struct._IO_FILE* %call, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.2, i64 0, i64 0)) - br label %if.end - -if.end: ; preds = %if.then, %entry - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc16, %if.end - %1 = load i32, i32* %i, align 4 - %2 = load i32, i32* %grid_rows.addr, align 4 - %sub = sub nsw i32 %2, 1 - %cmp2 = icmp sle i32 %1, %sub - br i1 %cmp2, label %for.body, label %for.end18 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond3 - -for.cond3: ; preds = %for.inc, %for.body - %3 = load i32, i32* %j, align 4 - %4 = load i32, i32* %grid_cols.addr, align 4 - %sub4 = sub nsw i32 %4, 1 - %cmp5 = icmp sle i32 %3, %sub4 - br i1 %cmp5, label %for.body6, label %for.end - -for.body6: ; preds = %for.cond3 - %arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 - %5 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call7 = call i8* @fgets(i8* %arraydecay, i32 256, %struct._IO_FILE* %5) - %6 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call8 = call i32 @feof(%struct._IO_FILE* %6) #8 - %tobool = icmp ne i32 %call8, 0 - br i1 %tobool, label %if.then9, label %if.end10 - -if.then9: ; preds = %for.body6 - call void @_Z5fatalPc(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.5, i64 0, i64 0)) - br label %if.end10 - -if.end10: ; preds = %if.then9, %for.body6 - %arraydecay11 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 - %call12 = call i32 (i8*, i8*, ...) @sscanf(i8* %arraydecay11, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.6, i64 0, i64 0), float* %val) #8 - %cmp13 = icmp ne i32 %call12, 1 - br i1 %cmp13, label %if.then14, label %if.end15 - -if.then14: ; preds = %if.end10 - call void @_Z5fatalPc(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.7, i64 0, i64 0)) - br label %if.end15 - -if.end15: ; preds = %if.then14, %if.end10 - %7 = load float, float* %val, align 4 - %8 = load float*, float** %vect.addr, align 8 - %9 = load i32, i32* %i, align 4 - %10 = load i32, i32* %grid_cols.addr, align 4 - %mul = mul nsw i32 %9, %10 - %11 = load i32, i32* %j, align 4 - %add = add nsw i32 %mul, %11 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds float, float* %8, i64 %idxprom - store float %7, float* %arrayidx, align 4 - br label %for.inc - -for.inc: ; preds = %if.end15 - %12 = load i32, i32* %j, align 4 - %inc = add nsw i32 %12, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond3 - -for.end: ; preds = %for.cond3 - br label %for.inc16 - -for.inc16: ; preds = %for.end - %13 = load i32, i32* %i, align 4 - %inc17 = add nsw i32 %13, 1 - store i32 %inc17, i32* %i, align 4 - br label %for.cond - -for.end18: ; preds = %for.cond - %14 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call19 = call i32 @fclose(%struct._IO_FILE* %14) - ret void -} - -declare dso_local i8* @fgets(i8*, i32, %struct._IO_FILE*) #1 - -; Function Attrs: nounwind -declare dso_local i32 @feof(%struct._IO_FILE*) #2 - -; Function Attrs: nounwind -declare dso_local i32 @sscanf(i8*, i8*, ...) #2 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z14calculate_tempiPfS_S_iiiiffffff(i32 %iteration, float* %power, float* %temp_src, float* %temp_dst, i32 %grid_cols, i32 %grid_rows, i32 %border_cols, i32 %border_rows, float %Cap, float %Rx, float %Ry, float %Rz, float %step, float %time_elapsed) #0 { -entry: - %iteration.addr = alloca i32, align 4 - %power.addr = alloca float*, align 8 - %temp_src.addr = alloca float*, align 8 - %temp_dst.addr = alloca float*, align 8 - %grid_cols.addr = alloca i32, align 4 - %grid_rows.addr = alloca i32, align 4 - %border_cols.addr = alloca i32, align 4 - %border_rows.addr = alloca i32, align 4 - %Cap.addr = alloca float, align 4 - %Rx.addr = alloca float, align 4 - %Ry.addr = alloca float, align 4 - %Rz.addr = alloca float, align 4 - %step.addr = alloca float, align 4 - %time_elapsed.addr = alloca float, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i32 %iteration, i32* %iteration.addr, align 4 - store float* %power, float** %power.addr, align 8 - store float* %temp_src, float** %temp_src.addr, align 8 - store float* %temp_dst, float** %temp_dst.addr, align 8 - store i32 %grid_cols, i32* %grid_cols.addr, align 4 - store i32 %grid_rows, i32* %grid_rows.addr, align 4 - store i32 %border_cols, i32* %border_cols.addr, align 4 - store i32 %border_rows, i32* %border_rows.addr, align 4 - store float %Cap, float* %Cap.addr, align 4 - store float %Rx, float* %Rx.addr, align 4 - store float %Ry, float* %Ry.addr, align 4 - store float %Rz, float* %Rz.addr, align 4 - store float %step, float* %step.addr, align 4 - store float %time_elapsed, float* %time_elapsed.addr, align 4 - %kernel_args = alloca i8*, i64 14, align 16 - %0 = bitcast i32* %iteration.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast float** %power.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast float** %temp_src.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast float** %temp_dst.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %grid_cols.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32* %grid_rows.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = bitcast i32* %border_cols.addr to i8* - %13 = getelementptr i8*, i8** %kernel_args, i32 6 - store i8* %12, i8** %13 - %14 = bitcast i32* %border_rows.addr to i8* - %15 = getelementptr i8*, i8** %kernel_args, i32 7 - store i8* %14, i8** %15 - %16 = bitcast float* %Cap.addr to i8* - %17 = getelementptr i8*, i8** %kernel_args, i32 8 - store i8* %16, i8** %17 - %18 = bitcast float* %Rx.addr to i8* - %19 = getelementptr i8*, i8** %kernel_args, i32 9 - store i8* %18, i8** %19 - %20 = bitcast float* %Ry.addr to i8* - %21 = getelementptr i8*, i8** %kernel_args, i32 10 - store i8* %20, i8** %21 - %22 = bitcast float* %Rz.addr to i8* - %23 = getelementptr i8*, i8** %kernel_args, i32 11 - store i8* %22, i8** %23 - %24 = bitcast float* %step.addr to i8* - %25 = getelementptr i8*, i8** %kernel_args, i32 12 - store i8* %24, i8** %25 - %26 = bitcast float* %time_elapsed.addr to i8* - %27 = getelementptr i8*, i8** %kernel_args, i32 13 - store i8* %26, i8** %27 - %28 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %29 = load i64, i64* %shmem_size, align 8 - %30 = load i8*, i8** %stream, align 8 - %31 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %32 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %31, i8* align 8 %32, i64 12, i1 false) - %33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %34 = load i64, i64* %33, align 8 - %35 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %36 = load i32, i32* %35, align 8 - %37 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %38 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %37, i8* align 8 %38, i64 12, i1 false) - %39 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %40 = load i64, i64* %39, align 8 - %41 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %42 = load i32, i32* %41, align 8 - %43 = bitcast i8* %30 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32, float*, float*, float*, i32, i32, i32, i32, float, float, float, float, float, float)* @_Z14calculate_tempiPfS_S_iiiiffffff to i8*), i64 %34, i32 %36, i64 %40, i32 %42, i8** %kernel_args, i64 %29, %struct.CUstream_st* %43) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #3 - -; Function Attrs: noinline optnone uwtable -define dso_local i32 @_Z17compute_tran_tempPfPS_iiiiiiii(float* %MatrixPower, float** %MatrixTemp, i32 %col, i32 %row, i32 %total_iterations, i32 %num_iterations, i32 %blockCols, i32 %blockRows, i32 %borderCols, i32 %borderRows) #0 { -entry: - %MatrixPower.addr = alloca float*, align 8 - %MatrixTemp.addr = alloca float**, align 8 - %col.addr = alloca i32, align 4 - %row.addr = alloca i32, align 4 - %total_iterations.addr = alloca i32, align 4 - %num_iterations.addr = alloca i32, align 4 - %blockCols.addr = alloca i32, align 4 - %blockRows.addr = alloca i32, align 4 - %borderCols.addr = alloca i32, align 4 - %borderRows.addr = alloca i32, align 4 - %dimBlock = alloca %struct.dim3, align 4 - %dimGrid = alloca %struct.dim3, align 4 - %grid_height = alloca float, align 4 - %grid_width = alloca float, align 4 - %Cap = alloca float, align 4 - %Rx = alloca float, align 4 - %Ry = alloca float, align 4 - %Rz = alloca float, align 4 - %max_slope = alloca float, align 4 - %step = alloca float, align 4 - %t = alloca float, align 4 - %time_elapsed = alloca float, align 4 - %src = alloca i32, align 4 - %dst = alloca i32, align 4 - %temp = alloca i32, align 4 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp35 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp35.coerce = alloca { i64, i32 }, align 4 - store float* %MatrixPower, float** %MatrixPower.addr, align 8 - store float** %MatrixTemp, float*** %MatrixTemp.addr, align 8 - store i32 %col, i32* %col.addr, align 4 - store i32 %row, i32* %row.addr, align 4 - store i32 %total_iterations, i32* %total_iterations.addr, align 4 - store i32 %num_iterations, i32* %num_iterations.addr, align 4 - store i32 %blockCols, i32* %blockCols.addr, align 4 - store i32 %blockRows, i32* %blockRows.addr, align 4 - store i32 %borderCols, i32* %borderCols.addr, align 4 - store i32 %borderRows, i32* %borderRows.addr, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimBlock, i32 16, i32 16, i32 1) - %0 = load i32, i32* %blockCols.addr, align 4 - %1 = load i32, i32* %blockRows.addr, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimGrid, i32 %0, i32 %1, i32 1) - %2 = load float, float* @chip_height, align 4 - %3 = load i32, i32* %row.addr, align 4 - %conv = sitofp i32 %3 to float - %div = fdiv float %2, %conv - store float %div, float* %grid_height, align 4 - %4 = load float, float* @chip_width, align 4 - %5 = load i32, i32* %col.addr, align 4 - %conv1 = sitofp i32 %5 to float - %div2 = fdiv float %4, %conv1 - store float %div2, float* %grid_width, align 4 - %6 = load float, float* @t_chip, align 4 - %conv3 = fpext float %6 to double - %mul = fmul contract double 8.750000e+05, %conv3 - %7 = load float, float* %grid_width, align 4 - %conv4 = fpext float %7 to double - %mul5 = fmul contract double %mul, %conv4 - %8 = load float, float* %grid_height, align 4 - %conv6 = fpext float %8 to double - %mul7 = fmul contract double %mul5, %conv6 - %conv8 = fptrunc double %mul7 to float - store float %conv8, float* %Cap, align 4 - %9 = load float, float* %grid_width, align 4 - %conv9 = fpext float %9 to double - %10 = load float, float* @t_chip, align 4 - %conv10 = fpext float %10 to double - %mul11 = fmul contract double 2.000000e+02, %conv10 - %11 = load float, float* %grid_height, align 4 - %conv12 = fpext float %11 to double - %mul13 = fmul contract double %mul11, %conv12 - %div14 = fdiv double %conv9, %mul13 - %conv15 = fptrunc double %div14 to float - store float %conv15, float* %Rx, align 4 - %12 = load float, float* %grid_height, align 4 - %conv16 = fpext float %12 to double - %13 = load float, float* @t_chip, align 4 - %conv17 = fpext float %13 to double - %mul18 = fmul contract double 2.000000e+02, %conv17 - %14 = load float, float* %grid_width, align 4 - %conv19 = fpext float %14 to double - %mul20 = fmul contract double %mul18, %conv19 - %div21 = fdiv double %conv16, %mul20 - %conv22 = fptrunc double %div21 to float - store float %conv22, float* %Ry, align 4 - %15 = load float, float* @t_chip, align 4 - %16 = load float, float* %grid_height, align 4 - %mul23 = fmul contract float 1.000000e+02, %16 - %17 = load float, float* %grid_width, align 4 - %mul24 = fmul contract float %mul23, %17 - %div25 = fdiv float %15, %mul24 - store float %div25, float* %Rz, align 4 - %18 = load float, float* @t_chip, align 4 - %conv26 = fpext float %18 to double - %mul27 = fmul contract double 5.000000e-01, %conv26 - %mul28 = fmul contract double %mul27, 1.750000e+06 - %div29 = fdiv double 3.000000e+06, %mul28 - %conv30 = fptrunc double %div29 to float - store float %conv30, float* %max_slope, align 4 - %19 = load float, float* %max_slope, align 4 - %conv31 = fpext float %19 to double - %div32 = fdiv double 1.000000e-03, %conv31 - %conv33 = fptrunc double %div32 to float - store float %conv33, float* %step, align 4 - store float 0x3F50624DE0000000, float* %time_elapsed, align 4 - store i32 1, i32* %src, align 4 - store i32 0, i32* %dst, align 4 - store float 0.000000e+00, float* %t, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %20 = load float, float* %t, align 4 - %21 = load i32, i32* %total_iterations.addr, align 4 - %conv34 = sitofp i32 %21 to float - %cmp = fcmp olt float %20, %conv34 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %22 = load i32, i32* %src, align 4 - store i32 %22, i32* %temp, align 4 - %23 = load i32, i32* %dst, align 4 - store i32 %23, i32* %src, align 4 - %24 = load i32, i32* %temp, align 4 - store i32 %24, i32* %dst, align 4 - %25 = bitcast %struct.dim3* %agg.tmp to i8* - %26 = bitcast %struct.dim3* %dimGrid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %25, i8* align 4 %26, i64 12, i1 false) - %27 = bitcast %struct.dim3* %agg.tmp35 to i8* - %28 = bitcast %struct.dim3* %dimBlock to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %27, i8* align 4 %28, i64 12, i1 false) - %29 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %30 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %29, i8* align 4 %30, i64 12, i1 false) - %31 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %32 = load i64, i64* %31, align 4 - %33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %34 = load i32, i32* %33, align 4 - %35 = bitcast { i64, i32 }* %agg.tmp35.coerce to i8* - %36 = bitcast %struct.dim3* %agg.tmp35 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %35, i8* align 4 %36, i64 12, i1 false) - %37 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp35.coerce, i32 0, i32 0 - %38 = load i64, i64* %37, align 4 - %39 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp35.coerce, i32 0, i32 1 - %40 = load i32, i32* %39, align 4 - %call = call i32 @__cudaPushCallConfiguration(i64 %32, i32 %34, i64 %38, i32 %40, i64 0, i8* null) - %tobool = icmp ne i32 %call, 0 - br i1 %tobool, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %for.body - %41 = load i32, i32* %num_iterations.addr, align 4 - %conv36 = sitofp i32 %41 to float - %42 = load i32, i32* %total_iterations.addr, align 4 - %conv37 = sitofp i32 %42 to float - %43 = load float, float* %t, align 4 - %sub = fsub contract float %conv37, %43 - %cmp38 = fcmp ole float %conv36, %sub - br i1 %cmp38, label %cond.true, label %cond.false - -cond.true: ; preds = %kcall.configok - %44 = load i32, i32* %num_iterations.addr, align 4 - %conv39 = sitofp i32 %44 to float - br label %cond.end - -cond.false: ; preds = %kcall.configok - %45 = load i32, i32* %total_iterations.addr, align 4 - %conv40 = sitofp i32 %45 to float - %46 = load float, float* %t, align 4 - %sub41 = fsub contract float %conv40, %46 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi float [ %conv39, %cond.true ], [ %sub41, %cond.false ] - %conv42 = fptosi float %cond to i32 - %47 = load float*, float** %MatrixPower.addr, align 8 - %48 = load float**, float*** %MatrixTemp.addr, align 8 - %49 = load i32, i32* %src, align 4 - %idxprom = sext i32 %49 to i64 - %arrayidx = getelementptr inbounds float*, float** %48, i64 %idxprom - %50 = load float*, float** %arrayidx, align 8 - %51 = load float**, float*** %MatrixTemp.addr, align 8 - %52 = load i32, i32* %dst, align 4 - %idxprom43 = sext i32 %52 to i64 - %arrayidx44 = getelementptr inbounds float*, float** %51, i64 %idxprom43 - %53 = load float*, float** %arrayidx44, align 8 - %54 = load i32, i32* %col.addr, align 4 - %55 = load i32, i32* %row.addr, align 4 - %56 = load i32, i32* %borderCols.addr, align 4 - %57 = load i32, i32* %borderRows.addr, align 4 - %58 = load float, float* %Cap, align 4 - %59 = load float, float* %Rx, align 4 - %60 = load float, float* %Ry, align 4 - %61 = load float, float* %Rz, align 4 - %62 = load float, float* %step, align 4 - %63 = load float, float* %time_elapsed, align 4 - call void @_Z14calculate_tempiPfS_S_iiiiffffff(i32 %conv42, float* %47, float* %50, float* %53, i32 %54, i32 %55, i32 %56, i32 %57, float %58, float %59, float %60, float %61, float %62, float %63) - br label %kcall.end - -kcall.end: ; preds = %cond.end, %for.body - %call45 = call i32 @cudaDeviceSynchronize() - br label %for.inc - -for.inc: ; preds = %kcall.end - %64 = load i32, i32* %num_iterations.addr, align 4 - %conv46 = sitofp i32 %64 to float - %65 = load float, float* %t, align 4 - %add = fadd contract float %65, %conv46 - store float %add, float* %t, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %66 = load i32, i32* %dst, align 4 - ret i32 %66 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #1 - -declare dso_local i32 @cudaDeviceSynchronize() #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z5usageiPPc(i32 %argc, i8** %argv) #0 { -entry: - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %1 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %1, i64 0 - %2 = load i8*, i8** %arrayidx, align 8 - %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([100 x i8], [100 x i8]* @.str.8, i64 0, i64 0), i8* %2) - %3 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([78 x i8], [78 x i8]* @.str.9, i64 0, i64 0)) - %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([53 x i8], [53 x i8]* @.str.10, i64 0, i64 0)) - %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call3 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([38 x i8], [38 x i8]* @.str.11, i64 0, i64 0)) - %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call4 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([89 x i8], [89 x i8]* @.str.12, i64 0, i64 0)) - %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([86 x i8], [86 x i8]* @.str.13, i64 0, i64 0)) - %8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call6 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.14, i64 0, i64 0)) - call void @exit(i32 1) #9 - unreachable -} - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #5 - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #6 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call i32 @cudaSetDevice(i32 0) - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str.15, i64 0, i64 0), i32 16, i32 16) - %0 = load i32, i32* %argc.addr, align 4 - %1 = load i8**, i8*** %argv.addr, align 8 - call void @_Z3runiPPc(i32 %0, i8** %1) - ret i32 0 -} - -declare dso_local i32 @cudaSetDevice(i32) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z3runiPPc(i32 %argc, i8** %argv) #0 { -entry: - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %size = alloca i32, align 4 - %grid_rows = alloca i32, align 4 - %grid_cols = alloca i32, align 4 - %FilesavingTemp = alloca float*, align 8 - %FilesavingPower = alloca float*, align 8 - %MatrixOut = alloca float*, align 8 - %tfile = alloca i8*, align 8 - %pfile = alloca i8*, align 8 - %ofile = alloca i8*, align 8 - %total_iterations = alloca i32, align 4 - %pyramid_height = alloca i32, align 4 - %borderCols = alloca i32, align 4 - %borderRows = alloca i32, align 4 - %smallBlockCol = alloca i32, align 4 - %smallBlockRow = alloca i32, align 4 - %blockCols = alloca i32, align 4 - %blockRows = alloca i32, align 4 - %MatrixTemp = alloca [2 x float*], align 16 - %MatrixPower = alloca float*, align 8 - %ret = alloca i32, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - store i32 60, i32* %total_iterations, align 4 - store i32 1, i32* %pyramid_height, align 4 - %0 = load i32, i32* %argc.addr, align 4 - %cmp = icmp ne i32 %0, 7 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load i32, i32* %argc.addr, align 4 - %2 = load i8**, i8*** %argv.addr, align 8 - call void @_Z5usageiPPc(i32 %1, i8** %2) - br label %if.end - -if.end: ; preds = %if.then, %entry - %3 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %3, i64 1 - %4 = load i8*, i8** %arrayidx, align 8 - %call = call i32 @atoi(i8* %4) #10 - store i32 %call, i32* %grid_rows, align 4 - %cmp1 = icmp sle i32 %call, 0 - br i1 %cmp1, label %if.then13, label %lor.lhs.false - -lor.lhs.false: ; preds = %if.end - %5 = load i8**, i8*** %argv.addr, align 8 - %arrayidx2 = getelementptr inbounds i8*, i8** %5, i64 1 - %6 = load i8*, i8** %arrayidx2, align 8 - %call3 = call i32 @atoi(i8* %6) #10 - store i32 %call3, i32* %grid_cols, align 4 - %cmp4 = icmp sle i32 %call3, 0 - br i1 %cmp4, label %if.then13, label %lor.lhs.false5 - -lor.lhs.false5: ; preds = %lor.lhs.false - %7 = load i8**, i8*** %argv.addr, align 8 - %arrayidx6 = getelementptr inbounds i8*, i8** %7, i64 2 - %8 = load i8*, i8** %arrayidx6, align 8 - %call7 = call i32 @atoi(i8* %8) #10 - store i32 %call7, i32* %pyramid_height, align 4 - %cmp8 = icmp sle i32 %call7, 0 - br i1 %cmp8, label %if.then13, label %lor.lhs.false9 - -lor.lhs.false9: ; preds = %lor.lhs.false5 - %9 = load i8**, i8*** %argv.addr, align 8 - %arrayidx10 = getelementptr inbounds i8*, i8** %9, i64 3 - %10 = load i8*, i8** %arrayidx10, align 8 - %call11 = call i32 @atoi(i8* %10) #10 - store i32 %call11, i32* %total_iterations, align 4 - %cmp12 = icmp sle i32 %call11, 0 - br i1 %cmp12, label %if.then13, label %if.end14 - -if.then13: ; preds = %lor.lhs.false9, %lor.lhs.false5, %lor.lhs.false, %if.end - %11 = load i32, i32* %argc.addr, align 4 - %12 = load i8**, i8*** %argv.addr, align 8 - call void @_Z5usageiPPc(i32 %11, i8** %12) - br label %if.end14 - -if.end14: ; preds = %if.then13, %lor.lhs.false9 - %13 = load i8**, i8*** %argv.addr, align 8 - %arrayidx15 = getelementptr inbounds i8*, i8** %13, i64 4 - %14 = load i8*, i8** %arrayidx15, align 8 - store i8* %14, i8** %tfile, align 8 - %15 = load i8**, i8*** %argv.addr, align 8 - %arrayidx16 = getelementptr inbounds i8*, i8** %15, i64 5 - %16 = load i8*, i8** %arrayidx16, align 8 - store i8* %16, i8** %pfile, align 8 - %17 = load i8**, i8*** %argv.addr, align 8 - %arrayidx17 = getelementptr inbounds i8*, i8** %17, i64 6 - %18 = load i8*, i8** %arrayidx17, align 8 - store i8* %18, i8** %ofile, align 8 - %19 = load i32, i32* %grid_rows, align 4 - %20 = load i32, i32* %grid_cols, align 4 - %mul = mul nsw i32 %19, %20 - store i32 %mul, i32* %size, align 4 - %21 = load i32, i32* %pyramid_height, align 4 - %mul18 = mul nsw i32 %21, 2 - %div = sdiv i32 %mul18, 2 - store i32 %div, i32* %borderCols, align 4 - %22 = load i32, i32* %pyramid_height, align 4 - %mul19 = mul nsw i32 %22, 2 - %div20 = sdiv i32 %mul19, 2 - store i32 %div20, i32* %borderRows, align 4 - %23 = load i32, i32* %pyramid_height, align 4 - %mul21 = mul nsw i32 %23, 2 - %sub = sub nsw i32 16, %mul21 - store i32 %sub, i32* %smallBlockCol, align 4 - %24 = load i32, i32* %pyramid_height, align 4 - %mul22 = mul nsw i32 %24, 2 - %sub23 = sub nsw i32 16, %mul22 - store i32 %sub23, i32* %smallBlockRow, align 4 - %25 = load i32, i32* %grid_cols, align 4 - %26 = load i32, i32* %smallBlockCol, align 4 - %div24 = sdiv i32 %25, %26 - %27 = load i32, i32* %grid_cols, align 4 - %28 = load i32, i32* %smallBlockCol, align 4 - %rem = srem i32 %27, %28 - %cmp25 = icmp eq i32 %rem, 0 - %29 = zext i1 %cmp25 to i64 - %cond = select i1 %cmp25, i32 0, i32 1 - %add = add nsw i32 %div24, %cond - store i32 %add, i32* %blockCols, align 4 - %30 = load i32, i32* %grid_rows, align 4 - %31 = load i32, i32* %smallBlockRow, align 4 - %div26 = sdiv i32 %30, %31 - %32 = load i32, i32* %grid_rows, align 4 - %33 = load i32, i32* %smallBlockRow, align 4 - %rem27 = srem i32 %32, %33 - %cmp28 = icmp eq i32 %rem27, 0 - %34 = zext i1 %cmp28 to i64 - %cond29 = select i1 %cmp28, i32 0, i32 1 - %add30 = add nsw i32 %div26, %cond29 - store i32 %add30, i32* %blockRows, align 4 - %35 = load i32, i32* %size, align 4 - %conv = sext i32 %35 to i64 - %mul31 = mul i64 %conv, 4 - %call32 = call noalias i8* @malloc(i64 %mul31) #8 - %36 = bitcast i8* %call32 to float* - store float* %36, float** %FilesavingTemp, align 8 - %37 = load i32, i32* %size, align 4 - %conv33 = sext i32 %37 to i64 - %mul34 = mul i64 %conv33, 4 - %call35 = call noalias i8* @malloc(i64 %mul34) #8 - %38 = bitcast i8* %call35 to float* - store float* %38, float** %FilesavingPower, align 8 - %39 = load i32, i32* %size, align 4 - %conv36 = sext i32 %39 to i64 - %call37 = call noalias i8* @calloc(i64 %conv36, i64 4) #8 - %40 = bitcast i8* %call37 to float* - store float* %40, float** %MatrixOut, align 8 - %41 = load float*, float** %FilesavingPower, align 8 - %tobool = icmp ne float* %41, null - br i1 %tobool, label %lor.lhs.false38, label %if.then42 - -lor.lhs.false38: ; preds = %if.end14 - %42 = load float*, float** %FilesavingTemp, align 8 - %tobool39 = icmp ne float* %42, null - br i1 %tobool39, label %lor.lhs.false40, label %if.then42 - -lor.lhs.false40: ; preds = %lor.lhs.false38 - %43 = load float*, float** %MatrixOut, align 8 - %tobool41 = icmp ne float* %43, null - br i1 %tobool41, label %if.end43, label %if.then42 - -if.then42: ; preds = %lor.lhs.false40, %lor.lhs.false38, %if.end14 - call void @_Z5fatalPc(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.16, i64 0, i64 0)) - br label %if.end43 - -if.end43: ; preds = %if.then42, %lor.lhs.false40 - %44 = load i32, i32* %pyramid_height, align 4 - %45 = load i32, i32* %grid_cols, align 4 - %46 = load i32, i32* %grid_rows, align 4 - %47 = load i32, i32* %borderCols, align 4 - %48 = load i32, i32* %borderRows, align 4 - %49 = load i32, i32* %blockCols, align 4 - %50 = load i32, i32* %blockRows, align 4 - %51 = load i32, i32* %smallBlockCol, align 4 - %52 = load i32, i32* %smallBlockRow, align 4 - %call44 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([94 x i8], [94 x i8]* @.str.17, i64 0, i64 0), i32 %44, i32 %45, i32 %46, i32 %47, i32 %48, i32 %49, i32 %50, i32 %51, i32 %52) - %53 = load float*, float** %FilesavingTemp, align 8 - %54 = load i32, i32* %grid_rows, align 4 - %55 = load i32, i32* %grid_cols, align 4 - %56 = load i8*, i8** %tfile, align 8 - call void @_Z9readinputPfiiPc(float* %53, i32 %54, i32 %55, i8* %56) - %57 = load float*, float** %FilesavingPower, align 8 - %58 = load i32, i32* %grid_rows, align 4 - %59 = load i32, i32* %grid_cols, align 4 - %60 = load i8*, i8** %pfile, align 8 - call void @_Z9readinputPfiiPc(float* %57, i32 %58, i32 %59, i8* %60) - %arrayidx45 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 0 - %61 = bitcast float** %arrayidx45 to i8** - %62 = load i32, i32* %size, align 4 - %conv46 = sext i32 %62 to i64 - %mul47 = mul i64 4, %conv46 - %call48 = call i32 @cudaMalloc(i8** %61, i64 %mul47) - %arrayidx49 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 1 - %63 = bitcast float** %arrayidx49 to i8** - %64 = load i32, i32* %size, align 4 - %conv50 = sext i32 %64 to i64 - %mul51 = mul i64 4, %conv50 - %call52 = call i32 @cudaMalloc(i8** %63, i64 %mul51) - %arrayidx53 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 0 - %65 = load float*, float** %arrayidx53, align 16 - %66 = bitcast float* %65 to i8* - %67 = load float*, float** %FilesavingTemp, align 8 - %68 = bitcast float* %67 to i8* - %69 = load i32, i32* %size, align 4 - %conv54 = sext i32 %69 to i64 - %mul55 = mul i64 4, %conv54 - %call56 = call i32 @cudaMemcpy(i8* %66, i8* %68, i64 %mul55, i32 1) - %70 = bitcast float** %MatrixPower to i8** - %71 = load i32, i32* %size, align 4 - %conv57 = sext i32 %71 to i64 - %mul58 = mul i64 4, %conv57 - %call59 = call i32 @cudaMalloc(i8** %70, i64 %mul58) - %72 = load float*, float** %MatrixPower, align 8 - %73 = bitcast float* %72 to i8* - %74 = load float*, float** %FilesavingPower, align 8 - %75 = bitcast float* %74 to i8* - %76 = load i32, i32* %size, align 4 - %conv60 = sext i32 %76 to i64 - %mul61 = mul i64 4, %conv60 - %call62 = call i32 @cudaMemcpy(i8* %73, i8* %75, i64 %mul61, i32 1) - %call63 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str.18, i64 0, i64 0)) - %77 = load float*, float** %MatrixPower, align 8 - %arraydecay = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 0 - %78 = load i32, i32* %grid_cols, align 4 - %79 = load i32, i32* %grid_rows, align 4 - %80 = load i32, i32* %total_iterations, align 4 - %81 = load i32, i32* %pyramid_height, align 4 - %82 = load i32, i32* %blockCols, align 4 - %83 = load i32, i32* %blockRows, align 4 - %84 = load i32, i32* %borderCols, align 4 - %85 = load i32, i32* %borderRows, align 4 - %call64 = call i32 @_Z17compute_tran_tempPfPS_iiiiiiii(float* %77, float** %arraydecay, i32 %78, i32 %79, i32 %80, i32 %81, i32 %82, i32 %83, i32 %84, i32 %85) - store i32 %call64, i32* %ret, align 4 - %call65 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.19, i64 0, i64 0)) - %86 = load float*, float** %MatrixOut, align 8 - %87 = bitcast float* %86 to i8* - %88 = load i32, i32* %ret, align 4 - %idxprom = sext i32 %88 to i64 - %arrayidx66 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 %idxprom - %89 = load float*, float** %arrayidx66, align 8 - %90 = bitcast float* %89 to i8* - %91 = load i32, i32* %size, align 4 - %conv67 = sext i32 %91 to i64 - %mul68 = mul i64 4, %conv67 - %call69 = call i32 @cudaMemcpy(i8* %87, i8* %90, i64 %mul68, i32 2) - %92 = load float*, float** %MatrixOut, align 8 - %93 = load i32, i32* %grid_rows, align 4 - %94 = load i32, i32* %grid_cols, align 4 - %95 = load i8*, i8** %ofile, align 8 - call void @_Z11writeoutputPfiiPc(float* %92, i32 %93, i32 %94, i8* %95) - %96 = load float*, float** %MatrixPower, align 8 - %97 = bitcast float* %96 to i8* - %call70 = call i32 @cudaFree(i8* %97) - %arrayidx71 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 0 - %98 = load float*, float** %arrayidx71, align 16 - %99 = bitcast float* %98 to i8* - %call72 = call i32 @cudaFree(i8* %99) - %arrayidx73 = getelementptr inbounds [2 x float*], [2 x float*]* %MatrixTemp, i64 0, i64 1 - %100 = load float*, float** %arrayidx73, align 8 - %101 = bitcast float* %100 to i8* - %call74 = call i32 @cudaFree(i8* %101) - %102 = load float*, float** %MatrixOut, align 8 - %103 = bitcast float* %102 to i8* - call void @free(i8* %103) #8 - ret void -} - -; Function Attrs: nounwind readonly -declare dso_local i32 @atoi(i8*) #7 - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #2 - -; Function Attrs: nounwind -declare dso_local noalias i8* @calloc(i64, i64) #2 - -declare dso_local i32 @cudaMalloc(i8**, i64) #1 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #1 - -declare dso_local i32 @cudaFree(i8*) #1 - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #2 - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32, float*, float*, float*, i32, i32, i32, i32, float, float, float, float, float, float)* @_Z14calculate_tempiPfS_S_iiiiffffff to i8*), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([36 x i8], [36 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { argmemonly nounwind willreturn } -attributes #4 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { nounwind } -attributes #9 = { noreturn nounwind } -attributes #10 = { nounwind readonly } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/hotspot/hotspot.cu b/examples/hotspot/hotspot.cu deleted file mode 100644 index 9788e82..0000000 --- a/examples/hotspot/hotspot.cu +++ /dev/null @@ -1,353 +0,0 @@ -#include -#include -#include -#include - -#ifdef RD_WG_SIZE_0_0 -#define BLOCK_SIZE RD_WG_SIZE_0_0 -#elif defined(RD_WG_SIZE_0) -#define BLOCK_SIZE RD_WG_SIZE_0 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE RD_WG_SIZE -#else -#define BLOCK_SIZE 16 -#endif - -#define STR_SIZE 256 - -/* maximum power density possible (say 300W for a 10mm x 10mm chip) */ -#define MAX_PD (3.0e6) -/* required precision in degrees */ -#define PRECISION 0.001 -#define SPEC_HEAT_SI 1.75e6 -#define K_SI 100 -/* capacitance fitting factor */ -#define FACTOR_CHIP 0.5 - -/* chip parameters */ -float t_chip = 0.0005; -float chip_height = 0.016; -float chip_width = 0.016; -/* ambient temperature, assuming no package at all */ -float amb_temp = 80.0; - -void run(int argc, char **argv); - -/* define timer macros */ -#define pin_stats_reset() startCycle() -#define pin_stats_pause(cycles) stopCycle(cycles) -#define pin_stats_dump(cycles) printf("timer: %Lu\n", cycles) - -void fatal(char *s) { fprintf(stderr, "error: %s\n", s); } - -void writeoutput(float *vect, int grid_rows, int grid_cols, char *file) { - - int i, j, index = 0; - FILE *fp; - char str[STR_SIZE]; - - if ((fp = fopen(file, "w")) == 0) - printf("The file was not opened\n"); - - for (i = 0; i < grid_rows; i++) - for (j = 0; j < grid_cols; j++) { - - sprintf(str, "%d\t%g\n", index, vect[i * grid_cols + j]); - fputs(str, fp); - index++; - } - - fclose(fp); -} - -void readinput(float *vect, int grid_rows, int grid_cols, char *file) { - - int i, j; - FILE *fp; - char str[STR_SIZE]; - float val; - - if ((fp = fopen(file, "r")) == 0) - printf("The file was not opened\n"); - - for (i = 0; i <= grid_rows - 1; i++) - for (j = 0; j <= grid_cols - 1; j++) { - fgets(str, STR_SIZE, fp); - if (feof(fp)) - fatal("not enough lines in file"); - // if ((sscanf(str, "%d%f", &index, &val) != 2) || (index != - // ((i-1)*(grid_cols-2)+j-1))) - if ((sscanf(str, "%f", &val) != 1)) - fatal("invalid file format"); - vect[i * grid_cols + j] = val; - } - - fclose(fp); -} - -#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) -#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) -#define MIN(a, b) ((a) <= (b) ? (a) : (b)) - -__global__ void calculate_temp(int iteration, // number of iteration - float *power, // power input - float *temp_src, // temperature input/output - float *temp_dst, // temperature input/output - int grid_cols, // Col of grid - int grid_rows, // Row of grid - int border_cols, // border offset - int border_rows, // border offset - float Cap, // Capacitance - float Rx, float Ry, float Rz, float step, - float time_elapsed) { - - __shared__ float temp_on_cuda[BLOCK_SIZE][BLOCK_SIZE]; - __shared__ float power_on_cuda[BLOCK_SIZE][BLOCK_SIZE]; - __shared__ float temp_t[BLOCK_SIZE] - [BLOCK_SIZE]; // saving temparary temperature result - - float amb_temp = 80.0; - float step_div_Cap; - float Rx_1, Ry_1, Rz_1; - - int bx = blockIdx.x; - int by = blockIdx.y; - - int tx = threadIdx.x; - int ty = threadIdx.y; - - step_div_Cap = step / Cap; - - Rx_1 = 1 / Rx; - Ry_1 = 1 / Ry; - Rz_1 = 1 / Rz; - - // each block finally computes result for a small block - // after N iterations. - // it is the non-overlapping small blocks that cover - // all the input data - - // calculate the small block size - int small_block_rows = BLOCK_SIZE - iteration * 2; // EXPAND_RATE - int small_block_cols = BLOCK_SIZE - iteration * 2; // EXPAND_RATE - - // calculate the boundary for the block according to - // the boundary of its small block - int blkY = small_block_rows * by - border_rows; - int blkX = small_block_cols * bx - border_cols; - int blkYmax = blkY + BLOCK_SIZE - 1; - int blkXmax = blkX + BLOCK_SIZE - 1; - - // calculate the global thread coordination - int yidx = blkY + ty; - int xidx = blkX + tx; - - // load data if it is within the valid input range - int loadYidx = yidx, loadXidx = xidx; - int index = grid_cols * loadYidx + loadXidx; - - if (IN_RANGE(loadYidx, 0, grid_rows - 1) && - IN_RANGE(loadXidx, 0, grid_cols - 1)) { - temp_on_cuda[ty][tx] = temp_src[index]; // Load the temperature data from - // global memory to shared memory - power_on_cuda[ty][tx] = - power[index]; // Load the power data from global memory to shared memory - } - __syncthreads(); - - // effective range within this block that falls within - // the valid range of the input data - // used to rule out computation outside the boundary. - int validYmin = (blkY < 0) ? -blkY : 0; - int validYmax = (blkYmax > grid_rows - 1) - ? BLOCK_SIZE - 1 - (blkYmax - grid_rows + 1) - : BLOCK_SIZE - 1; - int validXmin = (blkX < 0) ? -blkX : 0; - int validXmax = (blkXmax > grid_cols - 1) - ? BLOCK_SIZE - 1 - (blkXmax - grid_cols + 1) - : BLOCK_SIZE - 1; - - int N = ty - 1; - int S = ty + 1; - int W = tx - 1; - int E = tx + 1; - - N = (N < validYmin) ? validYmin : N; - S = (S > validYmax) ? validYmax : S; - W = (W < validXmin) ? validXmin : W; - E = (E > validXmax) ? validXmax : E; - - bool computed; - for (int i = 0; i < iteration; i++) { - computed = false; - if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && - IN_RANGE(ty, i + 1, BLOCK_SIZE - i - 2) && - IN_RANGE(tx, validXmin, validXmax) && - IN_RANGE(ty, validYmin, validYmax)) { - computed = true; - temp_t[ty][tx] = - temp_on_cuda[ty][tx] + - step_div_Cap * (power_on_cuda[ty][tx] + - (temp_on_cuda[S][tx] + temp_on_cuda[N][tx] - - 2.0 * temp_on_cuda[ty][tx]) * - Ry_1 + - (temp_on_cuda[ty][E] + temp_on_cuda[ty][W] - - 2.0 * temp_on_cuda[ty][tx]) * - Rx_1 + - (amb_temp - temp_on_cuda[ty][tx]) * Rz_1); - } - __syncthreads(); - if (i == iteration - 1) - break; - if (computed) // Assign the computation range - temp_on_cuda[ty][tx] = temp_t[ty][tx]; - __syncthreads(); - } - - // update the global memory - // after the last iteration, only threads coordinated within the - // small block perform the calculation and switch on ``computed'' - if (computed) { - temp_dst[index] = temp_t[ty][tx]; - } -} - -/* - compute N time steps -*/ - -int compute_tran_temp(float *MatrixPower, float *MatrixTemp[2], int col, - int row, int total_iterations, int num_iterations, - int blockCols, int blockRows, int borderCols, - int borderRows) { - dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); - dim3 dimGrid(blockCols, blockRows); - - float grid_height = chip_height / row; - float grid_width = chip_width / col; - - float Cap = FACTOR_CHIP * SPEC_HEAT_SI * t_chip * grid_width * grid_height; - float Rx = grid_width / (2.0 * K_SI * t_chip * grid_height); - float Ry = grid_height / (2.0 * K_SI * t_chip * grid_width); - float Rz = t_chip / (K_SI * grid_height * grid_width); - - float max_slope = MAX_PD / (FACTOR_CHIP * t_chip * SPEC_HEAT_SI); - float step = PRECISION / max_slope; - float t; - float time_elapsed; - time_elapsed = 0.001; - - int src = 1, dst = 0; - - for (t = 0; t < total_iterations; t += num_iterations) { - int temp = src; - src = dst; - dst = temp; - calculate_temp<<>>( - MIN(num_iterations, total_iterations - t), MatrixPower, MatrixTemp[src], - MatrixTemp[dst], col, row, borderCols, borderRows, Cap, Rx, Ry, Rz, - step, time_elapsed); - cudaDeviceSynchronize(); - } - return dst; -} - -void usage(int argc, char **argv) { - fprintf(stderr, - "Usage: %s " - " \n", - argv[0]); - fprintf(stderr, "\t - number of rows/cols in the grid " - "(positive integer)\n"); - fprintf(stderr, "\t - pyramid heigh(positive integer)\n"); - fprintf(stderr, "\t - number of iterations\n"); - fprintf(stderr, "\t - name of the file containing the initial " - "temperature values of each cell\n"); - fprintf(stderr, "\t - name of the file containing the dissipated " - "power values of each cell\n"); - fprintf(stderr, "\t - name of the output file\n"); - exit(1); -} - -int main(int argc, char **argv) { - cudaSetDevice(0); - printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); - - run(argc, argv); - - return EXIT_SUCCESS; -} - -void run(int argc, char **argv) { - int size; - int grid_rows, grid_cols; - float *FilesavingTemp, *FilesavingPower, *MatrixOut; - char *tfile, *pfile, *ofile; - - int total_iterations = 60; - int pyramid_height = 1; // number of iterations - - if (argc != 7) - usage(argc, argv); - if ((grid_rows = atoi(argv[1])) <= 0 || (grid_cols = atoi(argv[1])) <= 0 || - (pyramid_height = atoi(argv[2])) <= 0 || - (total_iterations = atoi(argv[3])) <= 0) - usage(argc, argv); - - tfile = argv[4]; - pfile = argv[5]; - ofile = argv[6]; - - size = grid_rows * grid_cols; - -/* --------------- pyramid parameters --------------- */ -#define EXPAND_RATE \ - 2 // add one iteration will extend the pyramid base by 2 per each borderline - int borderCols = (pyramid_height)*EXPAND_RATE / 2; - int borderRows = (pyramid_height)*EXPAND_RATE / 2; - int smallBlockCol = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; - int smallBlockRow = BLOCK_SIZE - (pyramid_height)*EXPAND_RATE; - int blockCols = - grid_cols / smallBlockCol + ((grid_cols % smallBlockCol == 0) ? 0 : 1); - int blockRows = - grid_rows / smallBlockRow + ((grid_rows % smallBlockRow == 0) ? 0 : 1); - - FilesavingTemp = (float *)malloc(size * sizeof(float)); - FilesavingPower = (float *)malloc(size * sizeof(float)); - MatrixOut = (float *)calloc(size, sizeof(float)); - - if (!FilesavingPower || !FilesavingTemp || !MatrixOut) - fatal("unable to allocate memory"); - - printf("pyramidHeight: %d\ngridSize: [%d, %d]\nborder:[%d, " - "%d]\nblockGrid:[%d, %d]\ntargetBlock:[%d, %d]\n", - pyramid_height, grid_cols, grid_rows, borderCols, borderRows, - blockCols, blockRows, smallBlockCol, smallBlockRow); - - readinput(FilesavingTemp, grid_rows, grid_cols, tfile); - readinput(FilesavingPower, grid_rows, grid_cols, pfile); - - float *MatrixTemp[2], *MatrixPower; - cudaMalloc((void **)&MatrixTemp[0], sizeof(float) * size); - cudaMalloc((void **)&MatrixTemp[1], sizeof(float) * size); - cudaMemcpy(MatrixTemp[0], FilesavingTemp, sizeof(float) * size, - cudaMemcpyHostToDevice); - - cudaMalloc((void **)&MatrixPower, sizeof(float) * size); - cudaMemcpy(MatrixPower, FilesavingPower, sizeof(float) * size, - cudaMemcpyHostToDevice); - printf("Start computing the transient temperature\n"); - int ret = compute_tran_temp(MatrixPower, MatrixTemp, grid_cols, grid_rows, - total_iterations, pyramid_height, blockCols, - blockRows, borderCols, borderRows); - printf("Ending simulation\n"); - cudaMemcpy(MatrixOut, MatrixTemp[ret], sizeof(float) * size, - cudaMemcpyDeviceToHost); - - writeoutput(MatrixOut, grid_rows, grid_cols, ofile); - - cudaFree(MatrixPower); - cudaFree(MatrixTemp[0]); - cudaFree(MatrixTemp[1]); - free(MatrixOut); -} diff --git a/examples/hotspot/run.sh b/examples/hotspot/run.sh deleted file mode 100644 index 679325d..0000000 --- a/examples/hotspot/run.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -set -e -llvm-as hotspot-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as hotspot-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator hotspot-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator hotspot-host-x86_64-unknown-linux-gnu.bc host.bc - -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - -g++ -Wall -L../../build/runtime -L../../build/runtime/threadPool \ - -o hotspot -fPIC -no-pie host.o kernel.o -lc -lx86Runtime -lthreadPool -lpthread - -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -./hotspot 512 2 2 ../../rodinia-data/hotspot/temp_512 ../../rodinia-data/hotspot/power_512 output.out -if head output.out | grep -q "323.829"; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/hotspot3D/3D-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/hotspot3D/3D-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 46c4ec2..0000000 --- a/examples/hotspot3D/3D-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,587 +0,0 @@ -; ModuleID = '3D-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "3D.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_blockDim_t = type { i8 } -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } - -$_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_yEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv = comdat any - -@blockDim = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockDim_t, align 1 -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z11hotspotOpt1PfS_S_fiiifffffff(float* %p, float* %tIn, float* %tOut, float %sdc, i32 %nx, i32 %ny, i32 %nz, float %ce, float %cw, float %cn, float %cs, float %ct, float %cb, float %cc) #0 { -entry: - %p.addr = alloca float*, align 8 - %tIn.addr = alloca float*, align 8 - %tOut.addr = alloca float*, align 8 - %sdc.addr = alloca float, align 4 - %nx.addr = alloca i32, align 4 - %ny.addr = alloca i32, align 4 - %nz.addr = alloca i32, align 4 - %ce.addr = alloca float, align 4 - %cw.addr = alloca float, align 4 - %cn.addr = alloca float, align 4 - %cs.addr = alloca float, align 4 - %ct.addr = alloca float, align 4 - %cb.addr = alloca float, align 4 - %cc.addr = alloca float, align 4 - %amb_temp = alloca float, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %c = alloca i32, align 4 - %xy = alloca i32, align 4 - %W = alloca i32, align 4 - %E = alloca i32, align 4 - %N = alloca i32, align 4 - %S = alloca i32, align 4 - %temp1 = alloca float, align 4 - %temp2 = alloca float, align 4 - %temp3 = alloca float, align 4 - %k = alloca i32, align 4 - store float* %p, float** %p.addr, align 8 - store float* %tIn, float** %tIn.addr, align 8 - store float* %tOut, float** %tOut.addr, align 8 - store float %sdc, float* %sdc.addr, align 4 - store i32 %nx, i32* %nx.addr, align 4 - store i32 %ny, i32* %ny.addr, align 4 - store i32 %nz, i32* %nz.addr, align 4 - store float %ce, float* %ce.addr, align 4 - store float %cw, float* %cw.addr, align 4 - store float %cn, float* %cn.addr, align 4 - store float %cs, float* %cs.addr, align 4 - store float %ct, float* %ct.addr, align 4 - store float %cb, float* %cb.addr, align 4 - store float %cc, float* %cc.addr, align 4 - store float 8.000000e+01, float* %amb_temp, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #3 - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #3 - %mul = mul i32 %call, %call1 - %call2 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #3 - %add = add i32 %mul, %call2 - store i32 %add, i32* %i, align 4 - %call3 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_yEv() #3 - %call4 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #3 - %mul5 = mul i32 %call3, %call4 - %call6 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #3 - %add7 = add i32 %mul5, %call6 - store i32 %add7, i32* %j, align 4 - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %j, align 4 - %2 = load i32, i32* %nx.addr, align 4 - %mul8 = mul nsw i32 %1, %2 - %add9 = add nsw i32 %0, %mul8 - store i32 %add9, i32* %c, align 4 - %3 = load i32, i32* %nx.addr, align 4 - %4 = load i32, i32* %ny.addr, align 4 - %mul10 = mul nsw i32 %3, %4 - store i32 %mul10, i32* %xy, align 4 - %5 = load i32, i32* %i, align 4 - %cmp = icmp eq i32 %5, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %6 = load i32, i32* %c, align 4 - br label %cond.end - -cond.false: ; preds = %entry - %7 = load i32, i32* %c, align 4 - %sub = sub nsw i32 %7, 1 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %6, %cond.true ], [ %sub, %cond.false ] - store i32 %cond, i32* %W, align 4 - %8 = load i32, i32* %i, align 4 - %9 = load i32, i32* %nx.addr, align 4 - %sub11 = sub nsw i32 %9, 1 - %cmp12 = icmp eq i32 %8, %sub11 - br i1 %cmp12, label %cond.true13, label %cond.false14 - -cond.true13: ; preds = %cond.end - %10 = load i32, i32* %c, align 4 - br label %cond.end16 - -cond.false14: ; preds = %cond.end - %11 = load i32, i32* %c, align 4 - %add15 = add nsw i32 %11, 1 - br label %cond.end16 - -cond.end16: ; preds = %cond.false14, %cond.true13 - %cond17 = phi i32 [ %10, %cond.true13 ], [ %add15, %cond.false14 ] - store i32 %cond17, i32* %E, align 4 - %12 = load i32, i32* %j, align 4 - %cmp18 = icmp eq i32 %12, 0 - br i1 %cmp18, label %cond.true19, label %cond.false20 - -cond.true19: ; preds = %cond.end16 - %13 = load i32, i32* %c, align 4 - br label %cond.end22 - -cond.false20: ; preds = %cond.end16 - %14 = load i32, i32* %c, align 4 - %15 = load i32, i32* %nx.addr, align 4 - %sub21 = sub nsw i32 %14, %15 - br label %cond.end22 - -cond.end22: ; preds = %cond.false20, %cond.true19 - %cond23 = phi i32 [ %13, %cond.true19 ], [ %sub21, %cond.false20 ] - store i32 %cond23, i32* %N, align 4 - %16 = load i32, i32* %j, align 4 - %17 = load i32, i32* %ny.addr, align 4 - %sub24 = sub nsw i32 %17, 1 - %cmp25 = icmp eq i32 %16, %sub24 - br i1 %cmp25, label %cond.true26, label %cond.false27 - -cond.true26: ; preds = %cond.end22 - %18 = load i32, i32* %c, align 4 - br label %cond.end29 - -cond.false27: ; preds = %cond.end22 - %19 = load i32, i32* %c, align 4 - %20 = load i32, i32* %nx.addr, align 4 - %add28 = add nsw i32 %19, %20 - br label %cond.end29 - -cond.end29: ; preds = %cond.false27, %cond.true26 - %cond30 = phi i32 [ %18, %cond.true26 ], [ %add28, %cond.false27 ] - store i32 %cond30, i32* %S, align 4 - %21 = load float*, float** %tIn.addr, align 8 - %22 = load i32, i32* %c, align 4 - %idxprom = sext i32 %22 to i64 - %arrayidx = getelementptr inbounds float, float* %21, i64 %idxprom - %23 = load float, float* %arrayidx, align 4 - store float %23, float* %temp2, align 4 - store float %23, float* %temp1, align 4 - %24 = load float*, float** %tIn.addr, align 8 - %25 = load i32, i32* %c, align 4 - %26 = load i32, i32* %xy, align 4 - %add31 = add nsw i32 %25, %26 - %idxprom32 = sext i32 %add31 to i64 - %arrayidx33 = getelementptr inbounds float, float* %24, i64 %idxprom32 - %27 = load float, float* %arrayidx33, align 4 - store float %27, float* %temp3, align 4 - %28 = load float, float* %cc.addr, align 4 - %29 = load float, float* %temp2, align 4 - %mul34 = fmul contract float %28, %29 - %30 = load float, float* %cw.addr, align 4 - %31 = load float*, float** %tIn.addr, align 8 - %32 = load i32, i32* %W, align 4 - %idxprom35 = sext i32 %32 to i64 - %arrayidx36 = getelementptr inbounds float, float* %31, i64 %idxprom35 - %33 = load float, float* %arrayidx36, align 4 - %mul37 = fmul contract float %30, %33 - %add38 = fadd contract float %mul34, %mul37 - %34 = load float, float* %ce.addr, align 4 - %35 = load float*, float** %tIn.addr, align 8 - %36 = load i32, i32* %E, align 4 - %idxprom39 = sext i32 %36 to i64 - %arrayidx40 = getelementptr inbounds float, float* %35, i64 %idxprom39 - %37 = load float, float* %arrayidx40, align 4 - %mul41 = fmul contract float %34, %37 - %add42 = fadd contract float %add38, %mul41 - %38 = load float, float* %cs.addr, align 4 - %39 = load float*, float** %tIn.addr, align 8 - %40 = load i32, i32* %S, align 4 - %idxprom43 = sext i32 %40 to i64 - %arrayidx44 = getelementptr inbounds float, float* %39, i64 %idxprom43 - %41 = load float, float* %arrayidx44, align 4 - %mul45 = fmul contract float %38, %41 - %add46 = fadd contract float %add42, %mul45 - %42 = load float, float* %cn.addr, align 4 - %43 = load float*, float** %tIn.addr, align 8 - %44 = load i32, i32* %N, align 4 - %idxprom47 = sext i32 %44 to i64 - %arrayidx48 = getelementptr inbounds float, float* %43, i64 %idxprom47 - %45 = load float, float* %arrayidx48, align 4 - %mul49 = fmul contract float %42, %45 - %add50 = fadd contract float %add46, %mul49 - %46 = load float, float* %cb.addr, align 4 - %47 = load float, float* %temp1, align 4 - %mul51 = fmul contract float %46, %47 - %add52 = fadd contract float %add50, %mul51 - %48 = load float, float* %ct.addr, align 4 - %49 = load float, float* %temp3, align 4 - %mul53 = fmul contract float %48, %49 - %add54 = fadd contract float %add52, %mul53 - %50 = load float, float* %sdc.addr, align 4 - %51 = load float*, float** %p.addr, align 8 - %52 = load i32, i32* %c, align 4 - %idxprom55 = sext i32 %52 to i64 - %arrayidx56 = getelementptr inbounds float, float* %51, i64 %idxprom55 - %53 = load float, float* %arrayidx56, align 4 - %mul57 = fmul contract float %50, %53 - %add58 = fadd contract float %add54, %mul57 - %54 = load float, float* %ct.addr, align 4 - %55 = load float, float* %amb_temp, align 4 - %mul59 = fmul contract float %54, %55 - %add60 = fadd contract float %add58, %mul59 - %56 = load float*, float** %tOut.addr, align 8 - %57 = load i32, i32* %c, align 4 - %idxprom61 = sext i32 %57 to i64 - %arrayidx62 = getelementptr inbounds float, float* %56, i64 %idxprom61 - store float %add60, float* %arrayidx62, align 4 - %58 = load i32, i32* %xy, align 4 - %59 = load i32, i32* %c, align 4 - %add63 = add nsw i32 %59, %58 - store i32 %add63, i32* %c, align 4 - %60 = load i32, i32* %xy, align 4 - %61 = load i32, i32* %W, align 4 - %add64 = add nsw i32 %61, %60 - store i32 %add64, i32* %W, align 4 - %62 = load i32, i32* %xy, align 4 - %63 = load i32, i32* %E, align 4 - %add65 = add nsw i32 %63, %62 - store i32 %add65, i32* %E, align 4 - %64 = load i32, i32* %xy, align 4 - %65 = load i32, i32* %N, align 4 - %add66 = add nsw i32 %65, %64 - store i32 %add66, i32* %N, align 4 - %66 = load i32, i32* %xy, align 4 - %67 = load i32, i32* %S, align 4 - %add67 = add nsw i32 %67, %66 - store i32 %add67, i32* %S, align 4 - store i32 1, i32* %k, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %cond.end29 - %68 = load i32, i32* %k, align 4 - %69 = load i32, i32* %nz.addr, align 4 - %sub68 = sub nsw i32 %69, 1 - %cmp69 = icmp slt i32 %68, %sub68 - br i1 %cmp69, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %70 = load float, float* %temp2, align 4 - store float %70, float* %temp1, align 4 - %71 = load float, float* %temp3, align 4 - store float %71, float* %temp2, align 4 - %72 = load float*, float** %tIn.addr, align 8 - %73 = load i32, i32* %c, align 4 - %74 = load i32, i32* %xy, align 4 - %add70 = add nsw i32 %73, %74 - %idxprom71 = sext i32 %add70 to i64 - %arrayidx72 = getelementptr inbounds float, float* %72, i64 %idxprom71 - %75 = load float, float* %arrayidx72, align 4 - store float %75, float* %temp3, align 4 - %76 = load float, float* %cc.addr, align 4 - %77 = load float, float* %temp2, align 4 - %mul73 = fmul contract float %76, %77 - %78 = load float, float* %cw.addr, align 4 - %79 = load float*, float** %tIn.addr, align 8 - %80 = load i32, i32* %W, align 4 - %idxprom74 = sext i32 %80 to i64 - %arrayidx75 = getelementptr inbounds float, float* %79, i64 %idxprom74 - %81 = load float, float* %arrayidx75, align 4 - %mul76 = fmul contract float %78, %81 - %add77 = fadd contract float %mul73, %mul76 - %82 = load float, float* %ce.addr, align 4 - %83 = load float*, float** %tIn.addr, align 8 - %84 = load i32, i32* %E, align 4 - %idxprom78 = sext i32 %84 to i64 - %arrayidx79 = getelementptr inbounds float, float* %83, i64 %idxprom78 - %85 = load float, float* %arrayidx79, align 4 - %mul80 = fmul contract float %82, %85 - %add81 = fadd contract float %add77, %mul80 - %86 = load float, float* %cs.addr, align 4 - %87 = load float*, float** %tIn.addr, align 8 - %88 = load i32, i32* %S, align 4 - %idxprom82 = sext i32 %88 to i64 - %arrayidx83 = getelementptr inbounds float, float* %87, i64 %idxprom82 - %89 = load float, float* %arrayidx83, align 4 - %mul84 = fmul contract float %86, %89 - %add85 = fadd contract float %add81, %mul84 - %90 = load float, float* %cn.addr, align 4 - %91 = load float*, float** %tIn.addr, align 8 - %92 = load i32, i32* %N, align 4 - %idxprom86 = sext i32 %92 to i64 - %arrayidx87 = getelementptr inbounds float, float* %91, i64 %idxprom86 - %93 = load float, float* %arrayidx87, align 4 - %mul88 = fmul contract float %90, %93 - %add89 = fadd contract float %add85, %mul88 - %94 = load float, float* %cb.addr, align 4 - %95 = load float, float* %temp1, align 4 - %mul90 = fmul contract float %94, %95 - %add91 = fadd contract float %add89, %mul90 - %96 = load float, float* %ct.addr, align 4 - %97 = load float, float* %temp3, align 4 - %mul92 = fmul contract float %96, %97 - %add93 = fadd contract float %add91, %mul92 - %98 = load float, float* %sdc.addr, align 4 - %99 = load float*, float** %p.addr, align 8 - %100 = load i32, i32* %c, align 4 - %idxprom94 = sext i32 %100 to i64 - %arrayidx95 = getelementptr inbounds float, float* %99, i64 %idxprom94 - %101 = load float, float* %arrayidx95, align 4 - %mul96 = fmul contract float %98, %101 - %add97 = fadd contract float %add93, %mul96 - %102 = load float, float* %ct.addr, align 4 - %103 = load float, float* %amb_temp, align 4 - %mul98 = fmul contract float %102, %103 - %add99 = fadd contract float %add97, %mul98 - %104 = load float*, float** %tOut.addr, align 8 - %105 = load i32, i32* %c, align 4 - %idxprom100 = sext i32 %105 to i64 - %arrayidx101 = getelementptr inbounds float, float* %104, i64 %idxprom100 - store float %add99, float* %arrayidx101, align 4 - %106 = load i32, i32* %xy, align 4 - %107 = load i32, i32* %c, align 4 - %add102 = add nsw i32 %107, %106 - store i32 %add102, i32* %c, align 4 - %108 = load i32, i32* %xy, align 4 - %109 = load i32, i32* %W, align 4 - %add103 = add nsw i32 %109, %108 - store i32 %add103, i32* %W, align 4 - %110 = load i32, i32* %xy, align 4 - %111 = load i32, i32* %E, align 4 - %add104 = add nsw i32 %111, %110 - store i32 %add104, i32* %E, align 4 - %112 = load i32, i32* %xy, align 4 - %113 = load i32, i32* %N, align 4 - %add105 = add nsw i32 %113, %112 - store i32 %add105, i32* %N, align 4 - %114 = load i32, i32* %xy, align 4 - %115 = load i32, i32* %S, align 4 - %add106 = add nsw i32 %115, %114 - store i32 %add106, i32* %S, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %116 = load i32, i32* %k, align 4 - %inc = add nsw i32 %116, 1 - store i32 %inc, i32* %k, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %117 = load float, float* %temp2, align 4 - store float %117, float* %temp1, align 4 - %118 = load float, float* %temp3, align 4 - store float %118, float* %temp2, align 4 - %119 = load float, float* %cc.addr, align 4 - %120 = load float, float* %temp2, align 4 - %mul107 = fmul contract float %119, %120 - %121 = load float, float* %cw.addr, align 4 - %122 = load float*, float** %tIn.addr, align 8 - %123 = load i32, i32* %W, align 4 - %idxprom108 = sext i32 %123 to i64 - %arrayidx109 = getelementptr inbounds float, float* %122, i64 %idxprom108 - %124 = load float, float* %arrayidx109, align 4 - %mul110 = fmul contract float %121, %124 - %add111 = fadd contract float %mul107, %mul110 - %125 = load float, float* %ce.addr, align 4 - %126 = load float*, float** %tIn.addr, align 8 - %127 = load i32, i32* %E, align 4 - %idxprom112 = sext i32 %127 to i64 - %arrayidx113 = getelementptr inbounds float, float* %126, i64 %idxprom112 - %128 = load float, float* %arrayidx113, align 4 - %mul114 = fmul contract float %125, %128 - %add115 = fadd contract float %add111, %mul114 - %129 = load float, float* %cs.addr, align 4 - %130 = load float*, float** %tIn.addr, align 8 - %131 = load i32, i32* %S, align 4 - %idxprom116 = sext i32 %131 to i64 - %arrayidx117 = getelementptr inbounds float, float* %130, i64 %idxprom116 - %132 = load float, float* %arrayidx117, align 4 - %mul118 = fmul contract float %129, %132 - %add119 = fadd contract float %add115, %mul118 - %133 = load float, float* %cn.addr, align 4 - %134 = load float*, float** %tIn.addr, align 8 - %135 = load i32, i32* %N, align 4 - %idxprom120 = sext i32 %135 to i64 - %arrayidx121 = getelementptr inbounds float, float* %134, i64 %idxprom120 - %136 = load float, float* %arrayidx121, align 4 - %mul122 = fmul contract float %133, %136 - %add123 = fadd contract float %add119, %mul122 - %137 = load float, float* %cb.addr, align 4 - %138 = load float, float* %temp1, align 4 - %mul124 = fmul contract float %137, %138 - %add125 = fadd contract float %add123, %mul124 - %139 = load float, float* %ct.addr, align 4 - %140 = load float, float* %temp3, align 4 - %mul126 = fmul contract float %139, %140 - %add127 = fadd contract float %add125, %mul126 - %141 = load float, float* %sdc.addr, align 4 - %142 = load float*, float** %p.addr, align 8 - %143 = load i32, i32* %c, align 4 - %idxprom128 = sext i32 %143 to i64 - %arrayidx129 = getelementptr inbounds float, float* %142, i64 %idxprom128 - %144 = load float, float* %arrayidx129, align 4 - %mul130 = fmul contract float %141, %144 - %add131 = fadd contract float %add127, %mul130 - %145 = load float, float* %ct.addr, align 4 - %146 = load float, float* %amb_temp, align 4 - %mul132 = fmul contract float %145, %146 - %add133 = fadd contract float %add131, %mul132 - %147 = load float*, float** %tOut.addr, align 8 - %148 = load i32, i32* %c, align 4 - %idxprom134 = sext i32 %148 to i64 - %arrayidx135 = getelementptr inbounds float, float* %147, i64 %idxprom134 - store float %add133, float* %arrayidx135, align 4 - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() - ret i32 %0 -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.y() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() #2 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } -attributes #3 = { convergent nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !4, !6, !6, !6, !6, !7, !7, !6} -!llvm.ident = !{!8} -!nvvmir.version = !{!9} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (float*, float*, float*, float, i32, i32, i32, float, float, float, float, float, float, float)* @_Z11hotspotOpt1PfS_S_fiiifffffff, !"kernel", i32 1} -!4 = !{null, !"align", i32 8} -!5 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!6 = !{null, !"align", i32 16} -!7 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!8 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!9 = !{i32 1, i32 4} diff --git a/examples/hotspot3D/3D-host-x86_64-unknown-linux-gnu.ll b/examples/hotspot3D/3D-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index dba0e85..0000000 --- a/examples/hotspot3D/3D-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,1507 +0,0 @@ -; ModuleID = '3D-host-x86_64-unknown-linux-gnu.bc' -source_filename = "3D.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.timeval = type { i64, i64 } -%struct.timezone = type { i32, i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque - -$_ZN4dim3C2Ejjj = comdat any - -$_ZSt4sqrtf = comdat any - -@.str = private unnamed_addr constant [16 x i8] c"Time: %.3f (s)\0A\00", align 1 -@t_chip = dso_local global float 0x3F40624DE0000000, align 4 -@chip_height = dso_local global float 0x3F90624DE0000000, align 4 -@chip_width = dso_local global float 0x3F90624DE0000000, align 4 -@amb_temp = dso_local global float 8.000000e+01, align 4 -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str.1 = private unnamed_addr constant [11 x i8] c"Error: %s\0A\00", align 1 -@.str.2 = private unnamed_addr constant [2 x i8] c"r\00", align 1 -@.str.3 = private unnamed_addr constant [24 x i8] c"The file was not opened\00", align 1 -@.str.4 = private unnamed_addr constant [20 x i8] c"Error reading file\0A\00", align 1 -@.str.5 = private unnamed_addr constant [25 x i8] c"not enough lines in file\00", align 1 -@.str.6 = private unnamed_addr constant [3 x i8] c"%f\00", align 1 -@.str.7 = private unnamed_addr constant [20 x i8] c"invalid file format\00", align 1 -@.str.8 = private unnamed_addr constant [2 x i8] c"w\00", align 1 -@.str.9 = private unnamed_addr constant [25 x i8] c"The file was not opened\0A\00", align 1 -@.str.10 = private unnamed_addr constant [7 x i8] c"%d\09%g\0A\00", align 1 -@.str.11 = private unnamed_addr constant [81 x i8] c"Usage: %s \0A\00", align 1 -@.str.12 = private unnamed_addr constant [68 x i8] c"\09 - number of rows/cols in the grid (positive integer)\0A\00", align 1 -@.str.13 = private unnamed_addr constant [62 x i8] c"\09 - number of layers in the grid (positive integer)\0A\00", align 1 -@.str.14 = private unnamed_addr constant [37 x i8] c"\09 - number of iterations\0A\00", align 1 -@.str.15 = private unnamed_addr constant [83 x i8] c"\09 - name of the file containing the initial power values of each cell\0A\00", align 1 -@.str.16 = private unnamed_addr constant [88 x i8] c"\09 - name of the file containing the initial temperature values of each cell\0A\00", align 1 -@.str.17 = private unnamed_addr constant [28 x i8] c"\09!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F2\1Avisible .entry _Z11hotspotOpt1PfS_S_fiiif\01\00\06\A9\04\00\A3\00\0F.\00\0D\0E\93\04\0F6\00\18\1F16\00\22\07g\04?f326\00\15\07Q\04\1Fu6\00\17\1F46\00\22\1F56\00\22\1F6\D8\00\22\1F76\00\22\1F86\00\22\1F96\00\22/107\00#\1F17\00#\0F\1F\02#\1F1m\0A\14O6[12&\06\16\95pred %p<6'\06\00\92\00k%f<94>J\06-95K\06/79L\06\0C\1F6L\06\12\02s\00O8, [\0B\01\16\1D]?\00\1F7?\00\18\1E2?\00\1F6?\00\18\1E1?\00\1F5?\00\18\1E0?\00\1F4?\00\17\1E9>\00\1F3>\00\17\1E8>\00\1F2>\00\17\1F7Y\07\00\0F\FA\00\18\1F6?\00\00\0F\FA\00\18\1F5?\00\00\0F\FB\00\18\1E4\FB\00\1F1>\00\17\1F3\A9\08\00\0F}\00\18\0F?\08\01\0Fy\01\18\0F)\08\01\0F\BD\00\18#0]\88\03#to\1D\18\04E\00\144\D9\07\01\1F\00\0A\1C\00\115\1C\00\1F4;\00\05\146A\08\0F;\00\00\117\1C\00\1F6;\00\05\148\92\08\0F;\00\00\119\1C\00\1F8\C4\08\02\1F9\C4\08\02\1A7\16\00\03\C4\08!d5\17\00\00\E9\01\07\C5\08\1Bf\E8\12\132\FE\12\0F\DB\08\03+14\17\00\03\1F\09\0B[\00\114\9F\00+f2\16\00\02q\00\1B3\16\00\01q\00*f4\16\00\115p\00\1BfX\00\115o\00*f6\16\00\126n\00\1B7\16\00\02n\00\1982\12\DA6, 1117782016\CB\00\136\F9\00\1961\00\00\B8\01intid.x\17\00\00\B0\013cta\18\00rul.lo.s\1B\00$9,8\00)r1}\00S20, %K\003add0\00521,6\00*20\93\00\137P\0A\09\F6\12$22\93\00\19y\17\00\153\93\00\1By\93\00424,8\00*r2s\0F\135\93\00\19y\93\00&6,6\00\1B5\93\00\02\F1\01!26\7F\03\03F\00%7,\B0\00\08\17\00%8,4\00\08\17\00%9,j\02\0B\A9\00330,8\00\00'\00\08\93\00331,i\00\00&\00\0B\AF\15\03@\0B\171e\00(32e\00\06\17\00%3,\CF\02\0C|\00#4,8\00\00'\00\0Bb\00\02\EF\0B(34K\00\185\F5\00rsetp.neL\003p1,!\00\F2\0C0;\0A@%p1 bra LBB6_2;\0Abra.uni\10\0021;\0A\08\00\16:Z\00%1,\D8\00\08\AC\01\129)\02\0BA\00\133A\00\172A\00(36B\00\07\D8\01\13,\1D\00.-1Y\00\1F2Y\00\04*3:\CD\0C\000\00\0B\1E\01\148\C0\0C\06\1D\01\0F\12\02\03(38\96\01\07\DE\01#9,\1E\00,-1L\01#2,P\00\00'\00\01O\01\162O\01\1B5\B5\00\134\B5\00\174\0E\01\1F4O\01\04\01Y\00\1B4A\00\136A\00\185A\00\1F0O\01\03#5,\1D\00\1E1X\00\1F5X\00\04\186N\01\01g\03\1B9q\0E\129\F4\03\08`\03(41I\03\0Ak\02#3,!\00\02k\02\163\1C\01\1B8\83\00\137\83\00\177\DB\00\1F7\1C\01\04\01V\00\1B7A\00\139A\00\188\1C\01\182B\00\06\17\00\183\E9\013sub3\01#8,4\00\00#\00\0Er\00\1F8r\00\04\1896\01\01,\02\1B9\A2\03\129\97\04\189\84\00\1F4\7F\04\02(45\03\04\06\CE\01346,\1E\00\0E\84\02#4,P\00\00'\00\01h\01\164h\01\1C1\93\03$10\D5\03\180\D6\03\1F0l\01\04\01W\00,10E\00\142E\00\08\1B\04/47o\01\03\0FX\03\04311,5\00\00$\00\0Fv\00\00\1F1v\00\06\182s\01\111\B2\01\1B9\FC\07#10^\19\122\8B\00\03\85\09\05\E8\00\02\A2\00\02\8A\00)d1\D6\044shl3\0D\02\\\00\01 \00\132\BC\00\03\19\00$3,P\00\01'\00\01e\00\02\A6\0A\01B\06\00\22\00\1A]\F3\07\2210L\08\1C9\17\00\144\17\00\07\A8\00\1A4\A8\00\03J\01\1F9a\01\02\155\D5\00\1A4\03\04#1,5\00\00$\00\01\BE\09\00\BF\00\05\F5\00\01#\04*51\F1\00$6,\1C\00\0B\F1\00$7,\99\00\01'\00\07\F1\00#10\F2\00\1C7\F2\00\02\97\1C8f10.\00%1,\03\09\08\17\00&2,'\01\08\18\00%3,\A0\09\09\22\01\1F8\CA\01\03\05#\01\1A8\CA\01\01\E0\08\02 \00\0A\D9\00\02\E3\08\22d1\E5\0A\192\AB\00\124\D9\00#21\90\07$rn\1A\00#5,\9C\00\00&\00W;\0Afma\1D\00#6,\E8\00\02\D7\00\00/\00\08V\00%7,\88\0A\08\BB\00\00\03\09\04e\05\0A\BB\00$3, \00\0B\BB\00\194\BB\00\183e\00\138\BB\00,4]\9E\00#9,\85\00\02&\00)%f\CD\01\00(\01\04\E4\0A\09\9E\00&5,Y\03\0A\9F\00\03Z\0C\1D5\9F\00\197\9F\00\09f\00\131\9F\00\1C7\9F\00\01\09\0A\01\86\00\02&\00\00\B1\00\089\00\05\0F\02\0A\F8\01\05\CF\09+96\9E\00$9, \00\0A\9E\00)30\9E\00\09e\00\03\F8\01-30\9E\00#5,\85\00\02&\00\00\B0\00\089\00%6,\F5\0B\08\17\00&7,\EC\03\0CR\00#8,9\00\02(\00\00d\00\08;\00%9,]\0C\07\17\00\00\BD\00\05c\03\0BR\00\01\8E\0A\019\00\02(\00\00d\00\08;\00\05}\0A\0AQ\03\143\98\03\0A\96\02/32\1B\05\04\00\AE\0A\03 \00\0BY\01$4,P\00\01'\00\08|\00\133Y\01\0D\07\01\015\00\01\9C\00\02&\00\00\C7\00\089\00%5,\08\0D\0C:\00\176\EF\00\01\9F\01)34\D8\00\05\B9\02*6]\A8\00$6, \00\03\A8\00\05\F9\0D\22rd;\0E(f3\EA\08(52h\05\06\7F\05\1F3\82\09\04#4,\1E\00\00;\00\0F\16\0C\02\185\B4\0B/55_\00\03\186\D2\04\07_\00#7,\1E\00\00;\00\0F\F5\0A\02(57H\00\1F8_\00\03\189v\04\06r\08360,\1E\00\00;\00\0F\06\0A\03\170H\00/61_\00\02(62\98\03\07_\00#3,\1E\00\00;\00\0F/\09\02\186\B4\0B/64_\00\03\0A\96\04\07`\00#6,\1F\00\00<\00\0F\1B\08\03\196M\0FL67, V#\03\9C\10\1C6m\0A$13\8A\08\173\00\09668,8\00\08\A5\00\04c\01\1A3\04\01370,\1E\00\05\D5\09\14g\D5\09#5,Q\00\00'\00\01\D5\09\175\D5\09\1C6\90\00\04[\0C614:5\03)65S\07\0Fu\08\01\186l\04)66U\04\0F\BC\08\01(66[\03/56\A6\08\01/71\A6\08\02/72\A6\08\03373,5\00\00$\00\0D\A6\08\02\E7\02)73|\04\00\CC\02\03\1C\00\0A|\04\02\B0\02\12d\16\038d58\DE\00\127|\04/59\A6\08\04(67.\00\1F8\A6\08\02/69\A6\08\03/70\A6\08\03\1F6p\0A\03/61\A6\08\04\02\11\03-d6p\0A\020\03\12d\96\03(d6u\06\137L\07,63\A6\08\00s\01\02\9C\00\00&\00\0B8\05\01x\01\01\E8\00\02\D7\00\00/\00\08V\00\1F4\A6\08\03/64\A6\08\04\02l\03\01 \00\0B\BB\00\196\BB\00\08r\02#75\BB\00,6]\9E\00#6,\85\00\02&\00\00\B0\00\089\00\1F7\A6\08\03/67\A6\08\05\02L\03-d63\02)69\9F\00\083\02\137E\09-69\9F\00#9,\86\00\02&\00\00\B1\00\079\00/80\A6\08\03/70\A6\08\04\02\FF\02\01 \00\0A\9E\00(72\9E\00(71e\00\03\F8\01\1D7\02\08382,\85\00\02&\00\00\B0\00\089\00\1F3\A6\08\02/84\A6\08\07385,9\00\02(\00\00d\00\08;\00\1F6\A6\08\02/87\A6\08\07388,9\00\02(\00\00d\00\08;\00\1F9\A6\08\03/73\A6\08\02/74\A6\08\04\01<\02\02 \00\0BY\01$6,P\00\01'\00\08\C1\0D\03\D0\0C\1D7\96\02\01\8D\12\01\9C\00\02&\00\00\C7\00\089\00\1F2\A6\08\06&93\EF\00\01\85\119f91\D8\00\1F7\A6\08\04\01^\02\02 \00\03\A8\00\07\A6\08\137R\0E\08*\07\1F7*\07\03/75\A6\08\03\02\08\01\02'\01/74\A6\08\02(76H\00\1F7_\00\03\1F8\A6\08\03\01)\03\12r\C5\00/77\A6\08\02\187\18\11/80_\00\02/81\A6\08\03\01\EA\02\01\1E\00\00;\00\0F\A6\08\02(82H\00\1F3_\00\03\1F4\A6\08\03\01\F7\02\01\1E\00\00;\00\0F\A6\08\02(85H\00\1F6_\00\03\1F7\A6\08\04\01\05\03\01\1F\00\00<\00\0F\A6\08\03,88\ED\07\145\ED\07\08\08\14)89}\08\06\C4\00\01\A8\02\01\1F\00\0F\DE\08\04/90\DE\08\06\176N\08/37N\08\0E\183B\07/38N\08\0E\193\98\0C\1F9r\07\02/40r\07\03/41r\07\03/37r\07\02/38r\07\04\02\DE\15-d3 \0E\00y\00\03P\00\01'\00\08|\00\122!\04,40r\07\00[\14\02\9C\00\00&\00\0B\04\04\01\A4\13\01\E8\00\02\D7\00\00/\00\08V\00\1F5r\07\03/41r\07\04\02\AB\14-d4-\08(43\BB\00(42e\00\136\BB\00,3]\9E\00#7,\85\00\02&\00\00\B0\00\089\00\1F8r\07\03/44r\07\05\01K\00\02!\00\0B\9F\00\196\9F\00\08{\05#49\9F\00\0D{\05\00\E5\12\02\86\00\02&\00\00\B1\00\079\00/51r\07\03/47r\07\04\01J\00\02 \00\0B\9E\00\199\9E\00\08\A3\02\145\F8\01\0D\10\08\01\E6\0D\01\85\00\02&\00\00\B0\00\089\00\1F4r\07\02/55r\07\07\01\C3\0A\019\00\02(\00\00d\00\08;\00\1F7r\07\02/58r\07\07\01\1C\0B\019\00\02(\00\00d\00\08\DB\0A\1F0r\07\03/50r\07\02\1F53\15\05\01\FF\00\02 \00\0B\9C\0B$3,P\00\01'\00\08|\00\03\CB\08\1D5\96\02\01\11\0B\01\9C\00\02&\00\00\C7\00\089\00\1F3r\07\06&64\EF\00\126\EC\0A\09\0B\16/54r\07\04\01n\01\02 \00\03\A8\00\07r\07\2255,\0C\B04;\0Aret;\0A\0A}\0A\00\00\00\00\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([27433 x i8], [27433 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local i64 @_Z8get_timev() #0 { -entry: - %tv = alloca %struct.timeval, align 8 - %call = call i32 @gettimeofday(%struct.timeval* %tv, %struct.timezone* null) #8 - %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 0 - %0 = load i64, i64* %tv_sec, align 8 - %mul = mul nsw i64 %0, 1000000 - %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 1 - %1 = load i64, i64* %tv_usec, align 8 - %add = add nsw i64 %mul, %1 - ret i64 %add -} - -; Function Attrs: nounwind -declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z11hotspotOpt1PfS_S_fiiifffffff(float* %p, float* %tIn, float* %tOut, float %sdc, i32 %nx, i32 %ny, i32 %nz, float %ce, float %cw, float %cn, float %cs, float %ct, float %cb, float %cc) #2 { -entry: - %p.addr = alloca float*, align 8 - %tIn.addr = alloca float*, align 8 - %tOut.addr = alloca float*, align 8 - %sdc.addr = alloca float, align 4 - %nx.addr = alloca i32, align 4 - %ny.addr = alloca i32, align 4 - %nz.addr = alloca i32, align 4 - %ce.addr = alloca float, align 4 - %cw.addr = alloca float, align 4 - %cn.addr = alloca float, align 4 - %cs.addr = alloca float, align 4 - %ct.addr = alloca float, align 4 - %cb.addr = alloca float, align 4 - %cc.addr = alloca float, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store float* %p, float** %p.addr, align 8 - store float* %tIn, float** %tIn.addr, align 8 - store float* %tOut, float** %tOut.addr, align 8 - store float %sdc, float* %sdc.addr, align 4 - store i32 %nx, i32* %nx.addr, align 4 - store i32 %ny, i32* %ny.addr, align 4 - store i32 %nz, i32* %nz.addr, align 4 - store float %ce, float* %ce.addr, align 4 - store float %cw, float* %cw.addr, align 4 - store float %cn, float* %cn.addr, align 4 - store float %cs, float* %cs.addr, align 4 - store float %ct, float* %ct.addr, align 4 - store float %cb, float* %cb.addr, align 4 - store float %cc, float* %cc.addr, align 4 - %kernel_args = alloca i8*, i64 14, align 16 - %0 = bitcast float** %p.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast float** %tIn.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast float** %tOut.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast float* %sdc.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %nx.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32* %ny.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = bitcast i32* %nz.addr to i8* - %13 = getelementptr i8*, i8** %kernel_args, i32 6 - store i8* %12, i8** %13 - %14 = bitcast float* %ce.addr to i8* - %15 = getelementptr i8*, i8** %kernel_args, i32 7 - store i8* %14, i8** %15 - %16 = bitcast float* %cw.addr to i8* - %17 = getelementptr i8*, i8** %kernel_args, i32 8 - store i8* %16, i8** %17 - %18 = bitcast float* %cn.addr to i8* - %19 = getelementptr i8*, i8** %kernel_args, i32 9 - store i8* %18, i8** %19 - %20 = bitcast float* %cs.addr to i8* - %21 = getelementptr i8*, i8** %kernel_args, i32 10 - store i8* %20, i8** %21 - %22 = bitcast float* %ct.addr to i8* - %23 = getelementptr i8*, i8** %kernel_args, i32 11 - store i8* %22, i8** %23 - %24 = bitcast float* %cb.addr to i8* - %25 = getelementptr i8*, i8** %kernel_args, i32 12 - store i8* %24, i8** %25 - %26 = bitcast float* %cc.addr to i8* - %27 = getelementptr i8*, i8** %kernel_args, i32 13 - store i8* %26, i8** %27 - %28 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %29 = load i64, i64* %shmem_size, align 8 - %30 = load i8*, i8** %stream, align 8 - %31 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %32 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %31, i8* align 8 %32, i64 12, i1 false) - %33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %34 = load i64, i64* %33, align 8 - %35 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %36 = load i32, i32* %35, align 8 - %37 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %38 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %37, i8* align 8 %38, i64 12, i1 false) - %39 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %40 = load i64, i64* %39, align 8 - %41 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %42 = load i32, i32* %41, align 8 - %43 = bitcast i8* %30 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, float*, float*, float, i32, i32, i32, float, float, float, float, float, float, float)* @_Z11hotspotOpt1PfS_S_fiiifffffff to i8*), i64 %34, i32 %36, i64 %40, i32 %42, i8** %kernel_args, i64 %29, %struct.CUstream_st* %43) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #3 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z12hotspot_opt1PfS_S_iiifffffi(float* %p, float* %tIn, float* %tOut, i32 %nx, i32 %ny, i32 %nz, float %Cap, float %Rx, float %Ry, float %Rz, float %dt, i32 %numiter) #2 { -entry: - %p.addr = alloca float*, align 8 - %tIn.addr = alloca float*, align 8 - %tOut.addr = alloca float*, align 8 - %nx.addr = alloca i32, align 4 - %ny.addr = alloca i32, align 4 - %nz.addr = alloca i32, align 4 - %Cap.addr = alloca float, align 4 - %Rx.addr = alloca float, align 4 - %Ry.addr = alloca float, align 4 - %Rz.addr = alloca float, align 4 - %dt.addr = alloca float, align 4 - %numiter.addr = alloca i32, align 4 - %ce = alloca float, align 4 - %cw = alloca float, align 4 - %cn = alloca float, align 4 - %cs = alloca float, align 4 - %ct = alloca float, align 4 - %cb = alloca float, align 4 - %cc = alloca float, align 4 - %stepDivCap = alloca float, align 4 - %s = alloca i64, align 8 - %tIn_d = alloca float*, align 8 - %tOut_d = alloca float*, align 8 - %p_d = alloca float*, align 8 - %block_dim = alloca %struct.dim3, align 4 - %grid_dim = alloca %struct.dim3, align 4 - %start = alloca i64, align 8 - %i = alloca i32, align 4 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp23 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp23.coerce = alloca { i64, i32 }, align 4 - %t = alloca float*, align 8 - %stop = alloca i64, align 8 - %time = alloca float, align 4 - store float* %p, float** %p.addr, align 8 - store float* %tIn, float** %tIn.addr, align 8 - store float* %tOut, float** %tOut.addr, align 8 - store i32 %nx, i32* %nx.addr, align 4 - store i32 %ny, i32* %ny.addr, align 4 - store i32 %nz, i32* %nz.addr, align 4 - store float %Cap, float* %Cap.addr, align 4 - store float %Rx, float* %Rx.addr, align 4 - store float %Ry, float* %Ry.addr, align 4 - store float %Rz, float* %Rz.addr, align 4 - store float %dt, float* %dt.addr, align 4 - store i32 %numiter, i32* %numiter.addr, align 4 - %0 = load float, float* %dt.addr, align 4 - %1 = load float, float* %Cap.addr, align 4 - %div = fdiv float %0, %1 - store float %div, float* %stepDivCap, align 4 - %2 = load float, float* %stepDivCap, align 4 - %3 = load float, float* %Rx.addr, align 4 - %div1 = fdiv float %2, %3 - store float %div1, float* %cw, align 4 - store float %div1, float* %ce, align 4 - %4 = load float, float* %stepDivCap, align 4 - %5 = load float, float* %Ry.addr, align 4 - %div2 = fdiv float %4, %5 - store float %div2, float* %cs, align 4 - store float %div2, float* %cn, align 4 - %6 = load float, float* %stepDivCap, align 4 - %7 = load float, float* %Rz.addr, align 4 - %div3 = fdiv float %6, %7 - store float %div3, float* %cb, align 4 - store float %div3, float* %ct, align 4 - %8 = load float, float* %ce, align 4 - %conv = fpext float %8 to double - %mul = fmul contract double 2.000000e+00, %conv - %9 = load float, float* %cn, align 4 - %conv4 = fpext float %9 to double - %mul5 = fmul contract double 2.000000e+00, %conv4 - %add = fadd contract double %mul, %mul5 - %10 = load float, float* %ct, align 4 - %conv6 = fpext float %10 to double - %mul7 = fmul contract double 3.000000e+00, %conv6 - %add8 = fadd contract double %add, %mul7 - %sub = fsub contract double 1.000000e+00, %add8 - %conv9 = fptrunc double %sub to float - store float %conv9, float* %cc, align 4 - %11 = load i32, i32* %nx.addr, align 4 - %conv10 = sext i32 %11 to i64 - %mul11 = mul i64 4, %conv10 - %12 = load i32, i32* %ny.addr, align 4 - %conv12 = sext i32 %12 to i64 - %mul13 = mul i64 %mul11, %conv12 - %13 = load i32, i32* %nz.addr, align 4 - %conv14 = sext i32 %13 to i64 - %mul15 = mul i64 %mul13, %conv14 - store i64 %mul15, i64* %s, align 8 - %14 = bitcast float** %p_d to i8** - %15 = load i64, i64* %s, align 8 - %call = call i32 @cudaMalloc(i8** %14, i64 %15) - %16 = bitcast float** %tIn_d to i8** - %17 = load i64, i64* %s, align 8 - %call16 = call i32 @cudaMalloc(i8** %16, i64 %17) - %18 = bitcast float** %tOut_d to i8** - %19 = load i64, i64* %s, align 8 - %call17 = call i32 @cudaMalloc(i8** %18, i64 %19) - %20 = load float*, float** %tIn_d, align 8 - %21 = bitcast float* %20 to i8* - %22 = load float*, float** %tIn.addr, align 8 - %23 = bitcast float* %22 to i8* - %24 = load i64, i64* %s, align 8 - %call18 = call i32 @cudaMemcpy(i8* %21, i8* %23, i64 %24, i32 1) - %25 = load float*, float** %p_d, align 8 - %26 = bitcast float* %25 to i8* - %27 = load float*, float** %p.addr, align 8 - %28 = bitcast float* %27 to i8* - %29 = load i64, i64* %s, align 8 - %call19 = call i32 @cudaMemcpy(i8* %26, i8* %28, i64 %29, i32 1) - call void @_ZN4dim3C2Ejjj(%struct.dim3* %block_dim, i32 64, i32 4, i32 1) - %30 = load i32, i32* %nx.addr, align 4 - %div20 = sdiv i32 %30, 64 - %31 = load i32, i32* %ny.addr, align 4 - %div21 = sdiv i32 %31, 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid_dim, i32 %div20, i32 %div21, i32 1) - %call22 = call i64 @_Z8get_timev() - store i64 %call22, i64* %start, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %32 = load i32, i32* %i, align 4 - %33 = load i32, i32* %numiter.addr, align 4 - %cmp = icmp slt i32 %32, %33 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %34 = bitcast %struct.dim3* %agg.tmp to i8* - %35 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %34, i8* align 4 %35, i64 12, i1 false) - %36 = bitcast %struct.dim3* %agg.tmp23 to i8* - %37 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %36, i8* align 4 %37, i64 12, i1 false) - %38 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %39 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %38, i8* align 4 %39, i64 12, i1 false) - %40 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %41 = load i64, i64* %40, align 4 - %42 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %43 = load i32, i32* %42, align 4 - %44 = bitcast { i64, i32 }* %agg.tmp23.coerce to i8* - %45 = bitcast %struct.dim3* %agg.tmp23 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %44, i8* align 4 %45, i64 12, i1 false) - %46 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp23.coerce, i32 0, i32 0 - %47 = load i64, i64* %46, align 4 - %48 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp23.coerce, i32 0, i32 1 - %49 = load i32, i32* %48, align 4 - %call24 = call i32 @__cudaPushCallConfiguration(i64 %41, i32 %43, i64 %47, i32 %49, i64 0, i8* null) - %tobool = icmp ne i32 %call24, 0 - br i1 %tobool, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %for.body - %50 = load float*, float** %p_d, align 8 - %51 = load float*, float** %tIn_d, align 8 - %52 = load float*, float** %tOut_d, align 8 - %53 = load float, float* %stepDivCap, align 4 - %54 = load i32, i32* %nx.addr, align 4 - %55 = load i32, i32* %ny.addr, align 4 - %56 = load i32, i32* %nz.addr, align 4 - %57 = load float, float* %ce, align 4 - %58 = load float, float* %cw, align 4 - %59 = load float, float* %cn, align 4 - %60 = load float, float* %cs, align 4 - %61 = load float, float* %ct, align 4 - %62 = load float, float* %cb, align 4 - %63 = load float, float* %cc, align 4 - call void @_Z11hotspotOpt1PfS_S_fiiifffffff(float* %50, float* %51, float* %52, float %53, i32 %54, i32 %55, i32 %56, float %57, float %58, float %59, float %60, float %61, float %62, float %63) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %for.body - %64 = load float*, float** %tIn_d, align 8 - store float* %64, float** %t, align 8 - %65 = load float*, float** %tOut_d, align 8 - store float* %65, float** %tIn_d, align 8 - %66 = load float*, float** %t, align 8 - store float* %66, float** %tOut_d, align 8 - br label %for.inc - -for.inc: ; preds = %kcall.end - %67 = load i32, i32* %i, align 4 - %inc = add nsw i32 %67, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %call25 = call i32 @cudaDeviceSynchronize() - %call26 = call i64 @_Z8get_timev() - store i64 %call26, i64* %stop, align 8 - %68 = load i64, i64* %stop, align 8 - %69 = load i64, i64* %start, align 8 - %sub27 = sub nsw i64 %68, %69 - %conv28 = sitofp i64 %sub27 to double - %div29 = fdiv double %conv28, 1.000000e+06 - %conv30 = fptrunc double %div29 to float - store float %conv30, float* %time, align 4 - %70 = load float, float* %time, align 4 - %conv31 = fpext float %70 to double - %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str, i64 0, i64 0), double %conv31) - %71 = load float*, float** %tOut.addr, align 8 - %72 = bitcast float* %71 to i8* - %73 = load float*, float** %tOut_d, align 8 - %74 = bitcast float* %73 to i8* - %75 = load i64, i64* %s, align 8 - %call33 = call i32 @cudaMemcpy(i8* %72, i8* %74, i64 %75, i32 2) - %76 = load float*, float** %p_d, align 8 - %77 = bitcast float* %76 to i8* - %call34 = call i32 @cudaFree(i8* %77) - %78 = load float*, float** %tIn_d, align 8 - %79 = bitcast float* %78 to i8* - %call35 = call i32 @cudaFree(i8* %79) - %80 = load float*, float** %tOut_d, align 8 - %81 = bitcast float* %80 to i8* - %call36 = call i32 @cudaFree(i8* %81) - ret void -} - -declare dso_local i32 @cudaMalloc(i8**, i64) #4 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #4 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #4 - -declare dso_local i32 @cudaDeviceSynchronize() #4 - -declare dso_local i32 @printf(i8*, ...) #4 - -declare dso_local i32 @cudaFree(i8*) #4 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z5fatalPKc(i8* %s) #2 { -entry: - %s.addr = alloca i8*, align 8 - store i8* %s, i8** %s.addr, align 8 - %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %1 = load i8*, i8** %s.addr, align 8 - %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.1, i64 0, i64 0), i8* %1) - ret void -} - -declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #4 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z9readinputPfiiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i32 %layers, i8* %file) #2 { -entry: - %vect.addr = alloca float*, align 8 - %grid_rows.addr = alloca i32, align 4 - %grid_cols.addr = alloca i32, align 4 - %layers.addr = alloca i32, align 4 - %file.addr = alloca i8*, align 8 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - %fp = alloca %struct._IO_FILE*, align 8 - %str = alloca [256 x i8], align 16 - %val = alloca float, align 4 - store float* %vect, float** %vect.addr, align 8 - store i32 %grid_rows, i32* %grid_rows.addr, align 4 - store i32 %grid_cols, i32* %grid_cols.addr, align 4 - store i32 %layers, i32* %layers.addr, align 4 - store i8* %file, i8** %file.addr, align 8 - %0 = load i8*, i8** %file.addr, align 8 - %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.2, i64 0, i64 0)) - store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 - %cmp = icmp eq %struct._IO_FILE* %call, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - call void @_Z5fatalPKc(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.3, i64 0, i64 0)) - br label %if.end - -if.end: ; preds = %if.then, %entry - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc28, %if.end - %1 = load i32, i32* %i, align 4 - %2 = load i32, i32* %grid_rows.addr, align 4 - %sub = sub nsw i32 %2, 1 - %cmp1 = icmp sle i32 %1, %sub - br i1 %cmp1, label %for.body, label %for.end30 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond2 - -for.cond2: ; preds = %for.inc25, %for.body - %3 = load i32, i32* %j, align 4 - %4 = load i32, i32* %grid_cols.addr, align 4 - %sub3 = sub nsw i32 %4, 1 - %cmp4 = icmp sle i32 %3, %sub3 - br i1 %cmp4, label %for.body5, label %for.end27 - -for.body5: ; preds = %for.cond2 - store i32 0, i32* %k, align 4 - br label %for.cond6 - -for.cond6: ; preds = %for.inc, %for.body5 - %5 = load i32, i32* %k, align 4 - %6 = load i32, i32* %layers.addr, align 4 - %sub7 = sub nsw i32 %6, 1 - %cmp8 = icmp sle i32 %5, %sub7 - br i1 %cmp8, label %for.body9, label %for.end - -for.body9: ; preds = %for.cond6 - %arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 - %7 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call10 = call i8* @fgets(i8* %arraydecay, i32 256, %struct._IO_FILE* %7) - %cmp11 = icmp eq i8* %call10, null - br i1 %cmp11, label %if.then12, label %if.end13 - -if.then12: ; preds = %for.body9 - call void @_Z5fatalPKc(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.4, i64 0, i64 0)) - br label %if.end13 - -if.end13: ; preds = %if.then12, %for.body9 - %8 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call14 = call i32 @feof(%struct._IO_FILE* %8) #8 - %tobool = icmp ne i32 %call14, 0 - br i1 %tobool, label %if.then15, label %if.end16 - -if.then15: ; preds = %if.end13 - call void @_Z5fatalPKc(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.5, i64 0, i64 0)) - br label %if.end16 - -if.end16: ; preds = %if.then15, %if.end13 - %arraydecay17 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 - %call18 = call i32 (i8*, i8*, ...) @sscanf(i8* %arraydecay17, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.6, i64 0, i64 0), float* %val) #8 - %cmp19 = icmp ne i32 %call18, 1 - br i1 %cmp19, label %if.then20, label %if.end21 - -if.then20: ; preds = %if.end16 - call void @_Z5fatalPKc(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.7, i64 0, i64 0)) - br label %if.end21 - -if.end21: ; preds = %if.then20, %if.end16 - %9 = load float, float* %val, align 4 - %10 = load float*, float** %vect.addr, align 8 - %11 = load i32, i32* %i, align 4 - %12 = load i32, i32* %grid_cols.addr, align 4 - %mul = mul nsw i32 %11, %12 - %13 = load i32, i32* %j, align 4 - %add = add nsw i32 %mul, %13 - %14 = load i32, i32* %k, align 4 - %15 = load i32, i32* %grid_rows.addr, align 4 - %mul22 = mul nsw i32 %14, %15 - %16 = load i32, i32* %grid_cols.addr, align 4 - %mul23 = mul nsw i32 %mul22, %16 - %add24 = add nsw i32 %add, %mul23 - %idxprom = sext i32 %add24 to i64 - %arrayidx = getelementptr inbounds float, float* %10, i64 %idxprom - store float %9, float* %arrayidx, align 4 - br label %for.inc - -for.inc: ; preds = %if.end21 - %17 = load i32, i32* %k, align 4 - %inc = add nsw i32 %17, 1 - store i32 %inc, i32* %k, align 4 - br label %for.cond6 - -for.end: ; preds = %for.cond6 - br label %for.inc25 - -for.inc25: ; preds = %for.end - %18 = load i32, i32* %j, align 4 - %inc26 = add nsw i32 %18, 1 - store i32 %inc26, i32* %j, align 4 - br label %for.cond2 - -for.end27: ; preds = %for.cond2 - br label %for.inc28 - -for.inc28: ; preds = %for.end27 - %19 = load i32, i32* %i, align 4 - %inc29 = add nsw i32 %19, 1 - store i32 %inc29, i32* %i, align 4 - br label %for.cond - -for.end30: ; preds = %for.cond - %20 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call31 = call i32 @fclose(%struct._IO_FILE* %20) - ret void -} - -declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #4 - -declare dso_local i8* @fgets(i8*, i32, %struct._IO_FILE*) #4 - -; Function Attrs: nounwind -declare dso_local i32 @feof(%struct._IO_FILE*) #1 - -; Function Attrs: nounwind -declare dso_local i32 @sscanf(i8*, i8*, ...) #1 - -declare dso_local i32 @fclose(%struct._IO_FILE*) #4 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z11writeoutputPfiiiPc(float* %vect, i32 %grid_rows, i32 %grid_cols, i32 %layers, i8* %file) #2 { -entry: - %vect.addr = alloca float*, align 8 - %grid_rows.addr = alloca i32, align 4 - %grid_cols.addr = alloca i32, align 4 - %layers.addr = alloca i32, align 4 - %file.addr = alloca i8*, align 8 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - %index = alloca i32, align 4 - %fp = alloca %struct._IO_FILE*, align 8 - %str = alloca [256 x i8], align 16 - store float* %vect, float** %vect.addr, align 8 - store i32 %grid_rows, i32* %grid_rows.addr, align 4 - store i32 %grid_cols, i32* %grid_cols.addr, align 4 - store i32 %layers, i32* %layers.addr, align 4 - store i8* %file, i8** %file.addr, align 8 - store i32 0, i32* %index, align 4 - %0 = load i8*, i8** %file.addr, align 8 - %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.8, i64 0, i64 0)) - store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 - %cmp = icmp eq %struct._IO_FILE* %call, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.9, i64 0, i64 0)) - br label %if.end - -if.end: ; preds = %if.then, %entry - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc19, %if.end - %1 = load i32, i32* %i, align 4 - %2 = load i32, i32* %grid_rows.addr, align 4 - %cmp2 = icmp slt i32 %1, %2 - br i1 %cmp2, label %for.body, label %for.end21 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond3 - -for.cond3: ; preds = %for.inc16, %for.body - %3 = load i32, i32* %j, align 4 - %4 = load i32, i32* %grid_cols.addr, align 4 - %cmp4 = icmp slt i32 %3, %4 - br i1 %cmp4, label %for.body5, label %for.end18 - -for.body5: ; preds = %for.cond3 - store i32 0, i32* %k, align 4 - br label %for.cond6 - -for.cond6: ; preds = %for.inc, %for.body5 - %5 = load i32, i32* %k, align 4 - %6 = load i32, i32* %layers.addr, align 4 - %cmp7 = icmp slt i32 %5, %6 - br i1 %cmp7, label %for.body8, label %for.end - -for.body8: ; preds = %for.cond6 - %arraydecay = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 - %7 = load i32, i32* %index, align 4 - %8 = load float*, float** %vect.addr, align 8 - %9 = load i32, i32* %i, align 4 - %10 = load i32, i32* %grid_cols.addr, align 4 - %mul = mul nsw i32 %9, %10 - %11 = load i32, i32* %j, align 4 - %add = add nsw i32 %mul, %11 - %12 = load i32, i32* %k, align 4 - %13 = load i32, i32* %grid_rows.addr, align 4 - %mul9 = mul nsw i32 %12, %13 - %14 = load i32, i32* %grid_cols.addr, align 4 - %mul10 = mul nsw i32 %mul9, %14 - %add11 = add nsw i32 %add, %mul10 - %idxprom = sext i32 %add11 to i64 - %arrayidx = getelementptr inbounds float, float* %8, i64 %idxprom - %15 = load float, float* %arrayidx, align 4 - %conv = fpext float %15 to double - %call12 = call i32 (i8*, i8*, ...) @sprintf(i8* %arraydecay, i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.10, i64 0, i64 0), i32 %7, double %conv) #8 - %arraydecay13 = getelementptr inbounds [256 x i8], [256 x i8]* %str, i64 0, i64 0 - %16 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call14 = call i32 @fputs(i8* %arraydecay13, %struct._IO_FILE* %16) - %17 = load i32, i32* %index, align 4 - %inc = add nsw i32 %17, 1 - store i32 %inc, i32* %index, align 4 - br label %for.inc - -for.inc: ; preds = %for.body8 - %18 = load i32, i32* %k, align 4 - %inc15 = add nsw i32 %18, 1 - store i32 %inc15, i32* %k, align 4 - br label %for.cond6 - -for.end: ; preds = %for.cond6 - br label %for.inc16 - -for.inc16: ; preds = %for.end - %19 = load i32, i32* %j, align 4 - %inc17 = add nsw i32 %19, 1 - store i32 %inc17, i32* %j, align 4 - br label %for.cond3 - -for.end18: ; preds = %for.cond3 - br label %for.inc19 - -for.inc19: ; preds = %for.end18 - %20 = load i32, i32* %i, align 4 - %inc20 = add nsw i32 %20, 1 - store i32 %inc20, i32* %i, align 4 - br label %for.cond - -for.end21: ; preds = %for.cond - %21 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call22 = call i32 @fclose(%struct._IO_FILE* %21) - ret void -} - -; Function Attrs: nounwind -declare dso_local i32 @sprintf(i8*, i8*, ...) #1 - -declare dso_local i32 @fputs(i8*, %struct._IO_FILE*) #4 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z14computeTempCPUPfS_S_iiifffffi(float* %pIn, float* %tIn, float* %tOut, i32 %nx, i32 %ny, i32 %nz, float %Cap, float %Rx, float %Ry, float %Rz, float %dt, i32 %numiter) #0 { -entry: - %pIn.addr = alloca float*, align 8 - %tIn.addr = alloca float*, align 8 - %tOut.addr = alloca float*, align 8 - %nx.addr = alloca i32, align 4 - %ny.addr = alloca i32, align 4 - %nz.addr = alloca i32, align 4 - %Cap.addr = alloca float, align 4 - %Rx.addr = alloca float, align 4 - %Ry.addr = alloca float, align 4 - %Rz.addr = alloca float, align 4 - %dt.addr = alloca float, align 4 - %numiter.addr = alloca i32, align 4 - %ce = alloca float, align 4 - %cw = alloca float, align 4 - %cn = alloca float, align 4 - %cs = alloca float, align 4 - %ct = alloca float, align 4 - %cb = alloca float, align 4 - %cc = alloca float, align 4 - %stepDivCap = alloca float, align 4 - %c = alloca i32, align 4 - %w = alloca i32, align 4 - %e = alloca i32, align 4 - %n = alloca i32, align 4 - %s = alloca i32, align 4 - %b = alloca i32, align 4 - %t = alloca i32, align 4 - %x = alloca i32, align 4 - %y = alloca i32, align 4 - %z = alloca i32, align 4 - %i = alloca i32, align 4 - %temp = alloca float*, align 8 - store float* %pIn, float** %pIn.addr, align 8 - store float* %tIn, float** %tIn.addr, align 8 - store float* %tOut, float** %tOut.addr, align 8 - store i32 %nx, i32* %nx.addr, align 4 - store i32 %ny, i32* %ny.addr, align 4 - store i32 %nz, i32* %nz.addr, align 4 - store float %Cap, float* %Cap.addr, align 4 - store float %Rx, float* %Rx.addr, align 4 - store float %Ry, float* %Ry.addr, align 4 - store float %Rz, float* %Rz.addr, align 4 - store float %dt, float* %dt.addr, align 4 - store i32 %numiter, i32* %numiter.addr, align 4 - %0 = load float, float* %dt.addr, align 4 - %1 = load float, float* %Cap.addr, align 4 - %div = fdiv float %0, %1 - store float %div, float* %stepDivCap, align 4 - %2 = load float, float* %stepDivCap, align 4 - %3 = load float, float* %Rx.addr, align 4 - %div1 = fdiv float %2, %3 - store float %div1, float* %cw, align 4 - store float %div1, float* %ce, align 4 - %4 = load float, float* %stepDivCap, align 4 - %5 = load float, float* %Ry.addr, align 4 - %div2 = fdiv float %4, %5 - store float %div2, float* %cs, align 4 - store float %div2, float* %cn, align 4 - %6 = load float, float* %stepDivCap, align 4 - %7 = load float, float* %Rz.addr, align 4 - %div3 = fdiv float %6, %7 - store float %div3, float* %cb, align 4 - store float %div3, float* %ct, align 4 - %8 = load float, float* %ce, align 4 - %conv = fpext float %8 to double - %mul = fmul contract double 2.000000e+00, %conv - %9 = load float, float* %cn, align 4 - %conv4 = fpext float %9 to double - %mul5 = fmul contract double 2.000000e+00, %conv4 - %add = fadd contract double %mul, %mul5 - %10 = load float, float* %ct, align 4 - %conv6 = fpext float %10 to double - %mul7 = fmul contract double 3.000000e+00, %conv6 - %add8 = fadd contract double %add, %mul7 - %sub = fsub contract double 1.000000e+00, %add8 - %conv9 = fptrunc double %sub to float - store float %conv9, float* %cc, align 4 - store i32 0, i32* %i, align 4 - br label %do.body - -do.body: ; preds = %do.cond, %entry - store i32 0, i32* %z, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc95, %do.body - %11 = load i32, i32* %z, align 4 - %12 = load i32, i32* %nz.addr, align 4 - %cmp = icmp slt i32 %11, %12 - br i1 %cmp, label %for.body, label %for.end97 - -for.body: ; preds = %for.cond - store i32 0, i32* %y, align 4 - br label %for.cond10 - -for.cond10: ; preds = %for.inc92, %for.body - %13 = load i32, i32* %y, align 4 - %14 = load i32, i32* %ny.addr, align 4 - %cmp11 = icmp slt i32 %13, %14 - br i1 %cmp11, label %for.body12, label %for.end94 - -for.body12: ; preds = %for.cond10 - store i32 0, i32* %x, align 4 - br label %for.cond13 - -for.cond13: ; preds = %for.inc, %for.body12 - %15 = load i32, i32* %x, align 4 - %16 = load i32, i32* %nx.addr, align 4 - %cmp14 = icmp slt i32 %15, %16 - br i1 %cmp14, label %for.body15, label %for.end - -for.body15: ; preds = %for.cond13 - %17 = load i32, i32* %x, align 4 - %18 = load i32, i32* %y, align 4 - %19 = load i32, i32* %nx.addr, align 4 - %mul16 = mul nsw i32 %18, %19 - %add17 = add nsw i32 %17, %mul16 - %20 = load i32, i32* %z, align 4 - %21 = load i32, i32* %nx.addr, align 4 - %mul18 = mul nsw i32 %20, %21 - %22 = load i32, i32* %ny.addr, align 4 - %mul19 = mul nsw i32 %mul18, %22 - %add20 = add nsw i32 %add17, %mul19 - store i32 %add20, i32* %c, align 4 - %23 = load i32, i32* %x, align 4 - %cmp21 = icmp eq i32 %23, 0 - br i1 %cmp21, label %cond.true, label %cond.false - -cond.true: ; preds = %for.body15 - %24 = load i32, i32* %c, align 4 - br label %cond.end - -cond.false: ; preds = %for.body15 - %25 = load i32, i32* %c, align 4 - %sub22 = sub nsw i32 %25, 1 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %24, %cond.true ], [ %sub22, %cond.false ] - store i32 %cond, i32* %w, align 4 - %26 = load i32, i32* %x, align 4 - %27 = load i32, i32* %nx.addr, align 4 - %sub23 = sub nsw i32 %27, 1 - %cmp24 = icmp eq i32 %26, %sub23 - br i1 %cmp24, label %cond.true25, label %cond.false26 - -cond.true25: ; preds = %cond.end - %28 = load i32, i32* %c, align 4 - br label %cond.end28 - -cond.false26: ; preds = %cond.end - %29 = load i32, i32* %c, align 4 - %add27 = add nsw i32 %29, 1 - br label %cond.end28 - -cond.end28: ; preds = %cond.false26, %cond.true25 - %cond29 = phi i32 [ %28, %cond.true25 ], [ %add27, %cond.false26 ] - store i32 %cond29, i32* %e, align 4 - %30 = load i32, i32* %y, align 4 - %cmp30 = icmp eq i32 %30, 0 - br i1 %cmp30, label %cond.true31, label %cond.false32 - -cond.true31: ; preds = %cond.end28 - %31 = load i32, i32* %c, align 4 - br label %cond.end34 - -cond.false32: ; preds = %cond.end28 - %32 = load i32, i32* %c, align 4 - %33 = load i32, i32* %nx.addr, align 4 - %sub33 = sub nsw i32 %32, %33 - br label %cond.end34 - -cond.end34: ; preds = %cond.false32, %cond.true31 - %cond35 = phi i32 [ %31, %cond.true31 ], [ %sub33, %cond.false32 ] - store i32 %cond35, i32* %n, align 4 - %34 = load i32, i32* %y, align 4 - %35 = load i32, i32* %ny.addr, align 4 - %sub36 = sub nsw i32 %35, 1 - %cmp37 = icmp eq i32 %34, %sub36 - br i1 %cmp37, label %cond.true38, label %cond.false39 - -cond.true38: ; preds = %cond.end34 - %36 = load i32, i32* %c, align 4 - br label %cond.end41 - -cond.false39: ; preds = %cond.end34 - %37 = load i32, i32* %c, align 4 - %38 = load i32, i32* %nx.addr, align 4 - %add40 = add nsw i32 %37, %38 - br label %cond.end41 - -cond.end41: ; preds = %cond.false39, %cond.true38 - %cond42 = phi i32 [ %36, %cond.true38 ], [ %add40, %cond.false39 ] - store i32 %cond42, i32* %s, align 4 - %39 = load i32, i32* %z, align 4 - %cmp43 = icmp eq i32 %39, 0 - br i1 %cmp43, label %cond.true44, label %cond.false45 - -cond.true44: ; preds = %cond.end41 - %40 = load i32, i32* %c, align 4 - br label %cond.end48 - -cond.false45: ; preds = %cond.end41 - %41 = load i32, i32* %c, align 4 - %42 = load i32, i32* %nx.addr, align 4 - %43 = load i32, i32* %ny.addr, align 4 - %mul46 = mul nsw i32 %42, %43 - %sub47 = sub nsw i32 %41, %mul46 - br label %cond.end48 - -cond.end48: ; preds = %cond.false45, %cond.true44 - %cond49 = phi i32 [ %40, %cond.true44 ], [ %sub47, %cond.false45 ] - store i32 %cond49, i32* %b, align 4 - %44 = load i32, i32* %z, align 4 - %45 = load i32, i32* %nz.addr, align 4 - %sub50 = sub nsw i32 %45, 1 - %cmp51 = icmp eq i32 %44, %sub50 - br i1 %cmp51, label %cond.true52, label %cond.false53 - -cond.true52: ; preds = %cond.end48 - %46 = load i32, i32* %c, align 4 - br label %cond.end56 - -cond.false53: ; preds = %cond.end48 - %47 = load i32, i32* %c, align 4 - %48 = load i32, i32* %nx.addr, align 4 - %49 = load i32, i32* %ny.addr, align 4 - %mul54 = mul nsw i32 %48, %49 - %add55 = add nsw i32 %47, %mul54 - br label %cond.end56 - -cond.end56: ; preds = %cond.false53, %cond.true52 - %cond57 = phi i32 [ %46, %cond.true52 ], [ %add55, %cond.false53 ] - store i32 %cond57, i32* %t, align 4 - %50 = load float*, float** %tIn.addr, align 8 - %51 = load i32, i32* %c, align 4 - %idxprom = sext i32 %51 to i64 - %arrayidx = getelementptr inbounds float, float* %50, i64 %idxprom - %52 = load float, float* %arrayidx, align 4 - %53 = load float, float* %cc, align 4 - %mul58 = fmul contract float %52, %53 - %54 = load float*, float** %tIn.addr, align 8 - %55 = load i32, i32* %n, align 4 - %idxprom59 = sext i32 %55 to i64 - %arrayidx60 = getelementptr inbounds float, float* %54, i64 %idxprom59 - %56 = load float, float* %arrayidx60, align 4 - %57 = load float, float* %cn, align 4 - %mul61 = fmul contract float %56, %57 - %add62 = fadd contract float %mul58, %mul61 - %58 = load float*, float** %tIn.addr, align 8 - %59 = load i32, i32* %s, align 4 - %idxprom63 = sext i32 %59 to i64 - %arrayidx64 = getelementptr inbounds float, float* %58, i64 %idxprom63 - %60 = load float, float* %arrayidx64, align 4 - %61 = load float, float* %cs, align 4 - %mul65 = fmul contract float %60, %61 - %add66 = fadd contract float %add62, %mul65 - %62 = load float*, float** %tIn.addr, align 8 - %63 = load i32, i32* %e, align 4 - %idxprom67 = sext i32 %63 to i64 - %arrayidx68 = getelementptr inbounds float, float* %62, i64 %idxprom67 - %64 = load float, float* %arrayidx68, align 4 - %65 = load float, float* %ce, align 4 - %mul69 = fmul contract float %64, %65 - %add70 = fadd contract float %add66, %mul69 - %66 = load float*, float** %tIn.addr, align 8 - %67 = load i32, i32* %w, align 4 - %idxprom71 = sext i32 %67 to i64 - %arrayidx72 = getelementptr inbounds float, float* %66, i64 %idxprom71 - %68 = load float, float* %arrayidx72, align 4 - %69 = load float, float* %cw, align 4 - %mul73 = fmul contract float %68, %69 - %add74 = fadd contract float %add70, %mul73 - %70 = load float*, float** %tIn.addr, align 8 - %71 = load i32, i32* %t, align 4 - %idxprom75 = sext i32 %71 to i64 - %arrayidx76 = getelementptr inbounds float, float* %70, i64 %idxprom75 - %72 = load float, float* %arrayidx76, align 4 - %73 = load float, float* %ct, align 4 - %mul77 = fmul contract float %72, %73 - %add78 = fadd contract float %add74, %mul77 - %74 = load float*, float** %tIn.addr, align 8 - %75 = load i32, i32* %b, align 4 - %idxprom79 = sext i32 %75 to i64 - %arrayidx80 = getelementptr inbounds float, float* %74, i64 %idxprom79 - %76 = load float, float* %arrayidx80, align 4 - %77 = load float, float* %cb, align 4 - %mul81 = fmul contract float %76, %77 - %add82 = fadd contract float %add78, %mul81 - %78 = load float, float* %dt.addr, align 4 - %79 = load float, float* %Cap.addr, align 4 - %div83 = fdiv float %78, %79 - %80 = load float*, float** %pIn.addr, align 8 - %81 = load i32, i32* %c, align 4 - %idxprom84 = sext i32 %81 to i64 - %arrayidx85 = getelementptr inbounds float, float* %80, i64 %idxprom84 - %82 = load float, float* %arrayidx85, align 4 - %mul86 = fmul contract float %div83, %82 - %add87 = fadd contract float %add82, %mul86 - %83 = load float, float* %ct, align 4 - %84 = load float, float* @amb_temp, align 4 - %mul88 = fmul contract float %83, %84 - %add89 = fadd contract float %add87, %mul88 - %85 = load float*, float** %tOut.addr, align 8 - %86 = load i32, i32* %c, align 4 - %idxprom90 = sext i32 %86 to i64 - %arrayidx91 = getelementptr inbounds float, float* %85, i64 %idxprom90 - store float %add89, float* %arrayidx91, align 4 - br label %for.inc - -for.inc: ; preds = %cond.end56 - %87 = load i32, i32* %x, align 4 - %inc = add nsw i32 %87, 1 - store i32 %inc, i32* %x, align 4 - br label %for.cond13 - -for.end: ; preds = %for.cond13 - br label %for.inc92 - -for.inc92: ; preds = %for.end - %88 = load i32, i32* %y, align 4 - %inc93 = add nsw i32 %88, 1 - store i32 %inc93, i32* %y, align 4 - br label %for.cond10 - -for.end94: ; preds = %for.cond10 - br label %for.inc95 - -for.inc95: ; preds = %for.end94 - %89 = load i32, i32* %z, align 4 - %inc96 = add nsw i32 %89, 1 - store i32 %inc96, i32* %z, align 4 - br label %for.cond - -for.end97: ; preds = %for.cond - %90 = load float*, float** %tIn.addr, align 8 - store float* %90, float** %temp, align 8 - %91 = load float*, float** %tOut.addr, align 8 - store float* %91, float** %tIn.addr, align 8 - %92 = load float*, float** %temp, align 8 - store float* %92, float** %tOut.addr, align 8 - %93 = load i32, i32* %i, align 4 - %inc98 = add nsw i32 %93, 1 - store i32 %inc98, i32* %i, align 4 - br label %do.cond - -do.cond: ; preds = %for.end97 - %94 = load i32, i32* %i, align 4 - %95 = load i32, i32* %numiter.addr, align 4 - %cmp99 = icmp slt i32 %94, %95 - br i1 %cmp99, label %do.body, label %do.end - -do.end: ; preds = %do.cond - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local float @_Z8accuracyPfS_i(float* %arr1, float* %arr2, i32 %len) #2 { -entry: - %arr1.addr = alloca float*, align 8 - %arr2.addr = alloca float*, align 8 - %len.addr = alloca i32, align 4 - %err = alloca float, align 4 - %i = alloca i32, align 4 - store float* %arr1, float** %arr1.addr, align 8 - store float* %arr2, float** %arr2.addr, align 8 - store i32 %len, i32* %len.addr, align 4 - store float 0.000000e+00, float* %err, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %len.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load float*, float** %arr1.addr, align 8 - %3 = load i32, i32* %i, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds float, float* %2, i64 %idxprom - %4 = load float, float* %arrayidx, align 4 - %5 = load float*, float** %arr2.addr, align 8 - %6 = load i32, i32* %i, align 4 - %idxprom1 = sext i32 %6 to i64 - %arrayidx2 = getelementptr inbounds float, float* %5, i64 %idxprom1 - %7 = load float, float* %arrayidx2, align 4 - %sub = fsub contract float %4, %7 - %8 = load float*, float** %arr1.addr, align 8 - %9 = load i32, i32* %i, align 4 - %idxprom3 = sext i32 %9 to i64 - %arrayidx4 = getelementptr inbounds float, float* %8, i64 %idxprom3 - %10 = load float, float* %arrayidx4, align 4 - %11 = load float*, float** %arr2.addr, align 8 - %12 = load i32, i32* %i, align 4 - %idxprom5 = sext i32 %12 to i64 - %arrayidx6 = getelementptr inbounds float, float* %11, i64 %idxprom5 - %13 = load float, float* %arrayidx6, align 4 - %sub7 = fsub contract float %10, %13 - %mul = fmul contract float %sub, %sub7 - %14 = load float, float* %err, align 4 - %add = fadd contract float %14, %mul - store float %add, float* %err, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %15 = load i32, i32* %i, align 4 - %inc = add nsw i32 %15, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %16 = load float, float* %err, align 4 - %17 = load i32, i32* %len.addr, align 4 - %conv = sitofp i32 %17 to float - %div = fdiv float %16, %conv - %call = call float @_ZSt4sqrtf(float %div) - ret float %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local float @_ZSt4sqrtf(float %__x) #0 comdat { -entry: - %__x.addr = alloca float, align 4 - store float %__x, float* %__x.addr, align 4 - %0 = load float, float* %__x.addr, align 4 - %call = call float @sqrtf(float %0) #8 - ret float %call -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z5usageiPPc(i32 %argc, i8** %argv) #2 { -entry: - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %1 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %1, i64 0 - %2 = load i8*, i8** %arrayidx, align 8 - %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([81 x i8], [81 x i8]* @.str.11, i64 0, i64 0), i8* %2) - %3 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([68 x i8], [68 x i8]* @.str.12, i64 0, i64 0)) - %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([62 x i8], [62 x i8]* @.str.13, i64 0, i64 0)) - %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call3 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.14, i64 0, i64 0)) - %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call4 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([83 x i8], [83 x i8]* @.str.15, i64 0, i64 0)) - %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([88 x i8], [88 x i8]* @.str.16, i64 0, i64 0)) - %8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call6 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.17, i64 0, i64 0)) - call void @exit(i32 1) #9 - unreachable -} - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #5 - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #6 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %pfile = alloca i8*, align 8 - %tfile = alloca i8*, align 8 - %ofile = alloca i8*, align 8 - %iterations = alloca i32, align 4 - %numCols = alloca i32, align 4 - %numRows = alloca i32, align 4 - %layers = alloca i32, align 4 - %dx = alloca float, align 4 - %dy = alloca float, align 4 - %dz = alloca float, align 4 - %Cap = alloca float, align 4 - %Rx = alloca float, align 4 - %Ry = alloca float, align 4 - %Rz = alloca float, align 4 - %max_slope = alloca float, align 4 - %dt = alloca float, align 4 - %powerIn = alloca float*, align 8 - %tempOut = alloca float*, align 8 - %tempIn = alloca float*, align 8 - %tempCopy = alloca float*, align 8 - %size = alloca i32, align 4 - %answer = alloca float*, align 8 - %acc = alloca float, align 4 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call i32 @cudaSetDevice(i32 0) - %0 = load i32, i32* %argc.addr, align 4 - %cmp = icmp ne i32 %0, 7 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load i32, i32* %argc.addr, align 4 - %2 = load i8**, i8*** %argv.addr, align 8 - call void @_Z5usageiPPc(i32 %1, i8** %2) - br label %if.end - -if.end: ; preds = %if.then, %entry - %3 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %3, i64 3 - %4 = load i8*, i8** %arrayidx, align 8 - %call1 = call i32 @atoi(i8* %4) #10 - store i32 %call1, i32* %iterations, align 4 - %5 = load i8**, i8*** %argv.addr, align 8 - %arrayidx2 = getelementptr inbounds i8*, i8** %5, i64 4 - %6 = load i8*, i8** %arrayidx2, align 8 - store i8* %6, i8** %pfile, align 8 - %7 = load i8**, i8*** %argv.addr, align 8 - %arrayidx3 = getelementptr inbounds i8*, i8** %7, i64 5 - %8 = load i8*, i8** %arrayidx3, align 8 - store i8* %8, i8** %tfile, align 8 - %9 = load i8**, i8*** %argv.addr, align 8 - %arrayidx4 = getelementptr inbounds i8*, i8** %9, i64 6 - %10 = load i8*, i8** %arrayidx4, align 8 - store i8* %10, i8** %ofile, align 8 - %11 = load i8**, i8*** %argv.addr, align 8 - %arrayidx5 = getelementptr inbounds i8*, i8** %11, i64 1 - %12 = load i8*, i8** %arrayidx5, align 8 - %call6 = call i32 @atoi(i8* %12) #10 - store i32 %call6, i32* %numCols, align 4 - %13 = load i8**, i8*** %argv.addr, align 8 - %arrayidx7 = getelementptr inbounds i8*, i8** %13, i64 1 - %14 = load i8*, i8** %arrayidx7, align 8 - %call8 = call i32 @atoi(i8* %14) #10 - store i32 %call8, i32* %numRows, align 4 - %15 = load i8**, i8*** %argv.addr, align 8 - %arrayidx9 = getelementptr inbounds i8*, i8** %15, i64 2 - %16 = load i8*, i8** %arrayidx9, align 8 - %call10 = call i32 @atoi(i8* %16) #10 - store i32 %call10, i32* %layers, align 4 - %17 = load float, float* @chip_height, align 4 - %18 = load i32, i32* %numRows, align 4 - %conv = sitofp i32 %18 to float - %div = fdiv float %17, %conv - store float %div, float* %dx, align 4 - %19 = load float, float* @chip_width, align 4 - %20 = load i32, i32* %numCols, align 4 - %conv11 = sitofp i32 %20 to float - %div12 = fdiv float %19, %conv11 - store float %div12, float* %dy, align 4 - %21 = load float, float* @t_chip, align 4 - %22 = load i32, i32* %layers, align 4 - %conv13 = sitofp i32 %22 to float - %div14 = fdiv float %21, %conv13 - store float %div14, float* %dz, align 4 - %23 = load float, float* @t_chip, align 4 - %conv15 = fpext float %23 to double - %mul = fmul contract double 8.750000e+05, %conv15 - %24 = load float, float* %dx, align 4 - %conv16 = fpext float %24 to double - %mul17 = fmul contract double %mul, %conv16 - %25 = load float, float* %dy, align 4 - %conv18 = fpext float %25 to double - %mul19 = fmul contract double %mul17, %conv18 - %conv20 = fptrunc double %mul19 to float - store float %conv20, float* %Cap, align 4 - %26 = load float, float* %dy, align 4 - %conv21 = fpext float %26 to double - %27 = load float, float* @t_chip, align 4 - %conv22 = fpext float %27 to double - %mul23 = fmul contract double 2.000000e+02, %conv22 - %28 = load float, float* %dx, align 4 - %conv24 = fpext float %28 to double - %mul25 = fmul contract double %mul23, %conv24 - %div26 = fdiv double %conv21, %mul25 - %conv27 = fptrunc double %div26 to float - store float %conv27, float* %Rx, align 4 - %29 = load float, float* %dx, align 4 - %conv28 = fpext float %29 to double - %30 = load float, float* @t_chip, align 4 - %conv29 = fpext float %30 to double - %mul30 = fmul contract double 2.000000e+02, %conv29 - %31 = load float, float* %dy, align 4 - %conv31 = fpext float %31 to double - %mul32 = fmul contract double %mul30, %conv31 - %div33 = fdiv double %conv28, %mul32 - %conv34 = fptrunc double %div33 to float - store float %conv34, float* %Ry, align 4 - %32 = load float, float* %dz, align 4 - %33 = load float, float* %dx, align 4 - %mul35 = fmul contract float 1.000000e+02, %33 - %34 = load float, float* %dy, align 4 - %mul36 = fmul contract float %mul35, %34 - %div37 = fdiv float %32, %mul36 - store float %div37, float* %Rz, align 4 - %35 = load float, float* @t_chip, align 4 - %conv38 = fpext float %35 to double - %mul39 = fmul contract double 5.000000e-01, %conv38 - %mul40 = fmul contract double %mul39, 1.750000e+06 - %div41 = fdiv double 3.000000e+06, %mul40 - %conv42 = fptrunc double %div41 to float - store float %conv42, float* %max_slope, align 4 - %36 = load float, float* %max_slope, align 4 - %conv43 = fpext float %36 to double - %div44 = fdiv double 1.000000e-03, %conv43 - %conv45 = fptrunc double %div44 to float - store float %conv45, float* %dt, align 4 - %37 = load i32, i32* %numCols, align 4 - %38 = load i32, i32* %numRows, align 4 - %mul46 = mul nsw i32 %37, %38 - %39 = load i32, i32* %layers, align 4 - %mul47 = mul nsw i32 %mul46, %39 - store i32 %mul47, i32* %size, align 4 - %40 = load i32, i32* %size, align 4 - %conv48 = sext i32 %40 to i64 - %call49 = call noalias i8* @calloc(i64 %conv48, i64 4) #8 - %41 = bitcast i8* %call49 to float* - store float* %41, float** %powerIn, align 8 - %42 = load i32, i32* %size, align 4 - %conv50 = sext i32 %42 to i64 - %mul51 = mul i64 %conv50, 4 - %call52 = call noalias i8* @malloc(i64 %mul51) #8 - %43 = bitcast i8* %call52 to float* - store float* %43, float** %tempCopy, align 8 - %44 = load i32, i32* %size, align 4 - %conv53 = sext i32 %44 to i64 - %call54 = call noalias i8* @calloc(i64 %conv53, i64 4) #8 - %45 = bitcast i8* %call54 to float* - store float* %45, float** %tempIn, align 8 - %46 = load i32, i32* %size, align 4 - %conv55 = sext i32 %46 to i64 - %call56 = call noalias i8* @calloc(i64 %conv55, i64 4) #8 - %47 = bitcast i8* %call56 to float* - store float* %47, float** %tempOut, align 8 - %48 = load i32, i32* %size, align 4 - %conv57 = sext i32 %48 to i64 - %call58 = call noalias i8* @calloc(i64 %conv57, i64 4) #8 - %49 = bitcast i8* %call58 to float* - store float* %49, float** %answer, align 8 - %50 = load float*, float** %powerIn, align 8 - %51 = load i32, i32* %numRows, align 4 - %52 = load i32, i32* %numCols, align 4 - %53 = load i32, i32* %layers, align 4 - %54 = load i8*, i8** %pfile, align 8 - call void @_Z9readinputPfiiiPc(float* %50, i32 %51, i32 %52, i32 %53, i8* %54) - %55 = load float*, float** %tempIn, align 8 - %56 = load i32, i32* %numRows, align 4 - %57 = load i32, i32* %numCols, align 4 - %58 = load i32, i32* %layers, align 4 - %59 = load i8*, i8** %tfile, align 8 - call void @_Z9readinputPfiiiPc(float* %55, i32 %56, i32 %57, i32 %58, i8* %59) - %60 = load float*, float** %tempCopy, align 8 - %61 = bitcast float* %60 to i8* - %62 = load float*, float** %tempIn, align 8 - %63 = bitcast float* %62 to i8* - %64 = load i32, i32* %size, align 4 - %conv59 = sext i32 %64 to i64 - %mul60 = mul i64 %conv59, 4 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %61, i8* align 4 %63, i64 %mul60, i1 false) - %65 = load float*, float** %powerIn, align 8 - %66 = load float*, float** %tempIn, align 8 - %67 = load float*, float** %tempOut, align 8 - %68 = load i32, i32* %numCols, align 4 - %69 = load i32, i32* %numRows, align 4 - %70 = load i32, i32* %layers, align 4 - %71 = load float, float* %Cap, align 4 - %72 = load float, float* %Rx, align 4 - %73 = load float, float* %Ry, align 4 - %74 = load float, float* %Rz, align 4 - %75 = load float, float* %dt, align 4 - %76 = load i32, i32* %iterations, align 4 - call void @_Z12hotspot_opt1PfS_S_iiifffffi(float* %65, float* %66, float* %67, i32 %68, i32 %69, i32 %70, float %71, float %72, float %73, float %74, float %75, i32 %76) - %77 = load float*, float** %powerIn, align 8 - %78 = load float*, float** %tempCopy, align 8 - %79 = load float*, float** %answer, align 8 - %80 = load i32, i32* %numCols, align 4 - %81 = load i32, i32* %numRows, align 4 - %82 = load i32, i32* %layers, align 4 - %83 = load float, float* %Cap, align 4 - %84 = load float, float* %Rx, align 4 - %85 = load float, float* %Ry, align 4 - %86 = load float, float* %Rz, align 4 - %87 = load float, float* %dt, align 4 - %88 = load i32, i32* %iterations, align 4 - call void @_Z14computeTempCPUPfS_S_iiifffffi(float* %77, float* %78, float* %79, i32 %80, i32 %81, i32 %82, float %83, float %84, float %85, float %86, float %87, i32 %88) - %89 = load float*, float** %tempOut, align 8 - %90 = load float*, float** %answer, align 8 - %91 = load i32, i32* %numRows, align 4 - %92 = load i32, i32* %numCols, align 4 - %mul61 = mul nsw i32 %91, %92 - %93 = load i32, i32* %layers, align 4 - %mul62 = mul nsw i32 %mul61, %93 - %call63 = call float @_Z8accuracyPfS_i(float* %89, float* %90, i32 %mul62) - store float %call63, float* %acc, align 4 - %94 = load float, float* %acc, align 4 - %conv64 = fpext float %94 to double - %call65 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.18, i64 0, i64 0), double %conv64) - %95 = load float*, float** %tempOut, align 8 - %96 = load i32, i32* %numRows, align 4 - %97 = load i32, i32* %numCols, align 4 - %98 = load i32, i32* %layers, align 4 - %99 = load i8*, i8** %ofile, align 8 - call void @_Z11writeoutputPfiiiPc(float* %95, i32 %96, i32 %97, i32 %98, i8* %99) - %100 = load float*, float** %tempIn, align 8 - %101 = bitcast float* %100 to i8* - call void @free(i8* %101) #8 - %102 = load float*, float** %tempOut, align 8 - %103 = bitcast float* %102 to i8* - call void @free(i8* %103) #8 - %104 = load float*, float** %powerIn, align 8 - %105 = bitcast float* %104 to i8* - call void @free(i8* %105) #8 - ret i32 0 -} - -declare dso_local i32 @cudaSetDevice(i32) #4 - -; Function Attrs: nounwind readonly -declare dso_local i32 @atoi(i8*) #7 - -; Function Attrs: nounwind -declare dso_local noalias i8* @calloc(i64, i64) #1 - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #1 - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #1 - -; Function Attrs: nounwind -declare dso_local float @sqrtf(float) #1 - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, float*, float*, float, i32, i32, i32, float, float, float, float, float, float, float)* @_Z11hotspotOpt1PfS_S_fiiifffffff to i8*), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([33 x i8], [33 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { argmemonly nounwind willreturn } -attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { nounwind } -attributes #9 = { noreturn nounwind } -attributes #10 = { nounwind readonly } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/hotspot3D/3D.cu b/examples/hotspot3D/3D.cu deleted file mode 100644 index 51faa17..0000000 --- a/examples/hotspot3D/3D.cu +++ /dev/null @@ -1,205 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#define BLOCK_SIZE 16 -#define STR_SIZE 256 - -#define block_x_ 128 -#define block_y_ 2 -#define block_z_ 1 -#define MAX_PD (3.0e6) -/* required precision in degrees */ -#define PRECISION 0.001 -#define SPEC_HEAT_SI 1.75e6 -#define K_SI 100 -/* capacitance fitting factor */ -#define FACTOR_CHIP 0.5 - -#include "opt1.cu" - -/* chip parameters */ -float t_chip = 0.0005; -float chip_height = 0.016; -float chip_width = 0.016; /* ambient temperature, assuming no package at all - */ -float amb_temp = 80.0; - -void fatal(const char *s) { fprintf(stderr, "Error: %s\n", s); } - -void readinput(float *vect, int grid_rows, int grid_cols, int layers, - char *file) { - int i, j, k; - FILE *fp; - char str[STR_SIZE]; - float val; - - if ((fp = fopen(file, "r")) == 0) - fatal("The file was not opened"); - - for (i = 0; i <= grid_rows - 1; i++) - for (j = 0; j <= grid_cols - 1; j++) - for (k = 0; k <= layers - 1; k++) { - if (fgets(str, STR_SIZE, fp) == NULL) - fatal("Error reading file\n"); - if (feof(fp)) - fatal("not enough lines in file"); - if ((sscanf(str, "%f", &val) != 1)) - fatal("invalid file format"); - vect[i * grid_cols + j + k * grid_rows * grid_cols] = val; - } - - fclose(fp); -} - -void writeoutput(float *vect, int grid_rows, int grid_cols, int layers, - char *file) { - - int i, j, k, index = 0; - FILE *fp; - char str[STR_SIZE]; - - if ((fp = fopen(file, "w")) == 0) - printf("The file was not opened\n"); - - for (i = 0; i < grid_rows; i++) - for (j = 0; j < grid_cols; j++) - for (k = 0; k < layers; k++) { - sprintf(str, "%d\t%g\n", index, - vect[i * grid_cols + j + k * grid_rows * grid_cols]); - fputs(str, fp); - index++; - } - - fclose(fp); -} - -void computeTempCPU(float *pIn, float *tIn, float *tOut, int nx, int ny, int nz, - float Cap, float Rx, float Ry, float Rz, float dt, - int numiter) { - float ce, cw, cn, cs, ct, cb, cc; - float stepDivCap = dt / Cap; - ce = cw = stepDivCap / Rx; - cn = cs = stepDivCap / Ry; - ct = cb = stepDivCap / Rz; - - cc = 1.0 - (2.0 * ce + 2.0 * cn + 3.0 * ct); - - int c, w, e, n, s, b, t; - int x, y, z; - int i = 0; - do { - for (z = 0; z < nz; z++) - for (y = 0; y < ny; y++) - for (x = 0; x < nx; x++) { - c = x + y * nx + z * nx * ny; - - w = (x == 0) ? c : c - 1; - e = (x == nx - 1) ? c : c + 1; - n = (y == 0) ? c : c - nx; - s = (y == ny - 1) ? c : c + nx; - b = (z == 0) ? c : c - nx * ny; - t = (z == nz - 1) ? c : c + nx * ny; - - tOut[c] = tIn[c] * cc + tIn[n] * cn + tIn[s] * cs + tIn[e] * ce + - tIn[w] * cw + tIn[t] * ct + tIn[b] * cb + - (dt / Cap) * pIn[c] + ct * amb_temp; - } - float *temp = tIn; - tIn = tOut; - tOut = temp; - i++; - } while (i < numiter); -} - -float accuracy(float *arr1, float *arr2, int len) { - float err = 0.0; - int i; - for (i = 0; i < len; i++) { - err += (arr1[i] - arr2[i]) * (arr1[i] - arr2[i]); - } - - return (float)sqrt(err / len); -} - -void usage(int argc, char **argv) { - fprintf(stderr, - "Usage: %s " - "\n", - argv[0]); - fprintf( - stderr, - "\t - number of rows/cols in the grid (positive integer)\n"); - fprintf(stderr, - "\t - number of layers in the grid (positive integer)\n"); - - fprintf(stderr, "\t - number of iterations\n"); - fprintf(stderr, "\t - name of the file containing the initial " - "power values of each cell\n"); - fprintf(stderr, "\t - name of the file containing the initial " - "temperature values of each cell\n"); - fprintf(stderr, "\t -template -__inline int compare_vectors(T *data1, T *data2, unsigned int size) { - printf("Comparing vectors: \n"); - bool match = true; - for (unsigned int i = 0; i < size; i++) - if (data1[i] != data2[i]) { - match = false; - printf("Diff: data1[%d]=%d, data1[%d]=%d.\n", i, data1[i], i, data2[i]); - } - - if (match) { - printf("PASS! vectors are matching!\n"); - return 0; - } else { - printf("FAIL! vectors are NOT matching!\n"); - exit(1); - return -1; - } -} - -#endif diff --git a/examples/huffman/cpuencode.cpp b/examples/huffman/cpuencode.cpp deleted file mode 100644 index cb0ff4b..0000000 --- a/examples/huffman/cpuencode.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include "stdafx.h" - -#include "cpuencode.h" -#include "print_helpers.h" - -using namespace std; - -#if 1 - -// The max. codeword length for each byte symbol is 32-bits - -extern "C" void cpu_vlc_encode(unsigned int *indata, unsigned int num_elements, - unsigned int *outdata, unsigned int *outsize, - unsigned int *codewords, - unsigned int *codewordlens) { - unsigned int *bitstreamPt = - (unsigned int *)outdata; /* Pointer to current byte */ - *bitstreamPt = 0x00000000U; - unsigned int startbit = 0; - unsigned int totalBytes = 0; - - for (unsigned int k = 0; k < num_elements; k++) { - unsigned int cw32 = 0; - unsigned int val32 = indata[k]; - unsigned int numbits = 0; - unsigned int mask32; - - for (unsigned int i = 0; i < 4; i++) { - unsigned char symbol = (unsigned char)(val32 >> (8 * (3 - i))); - cw32 = codewords[symbol]; - numbits = codewordlens[symbol]; - - while (numbits > 0) { - int writebits = min(32 - startbit, numbits); - if (numbits == writebits) - mask32 = (cw32 & ((1 << numbits) - 1)) - << (32 - startbit - - numbits); // first make sure that the start of the word - // is clean, then shift to the left as many - // places as you need - else - mask32 = cw32 >> - (numbits - writebits); // shift out the bits that can not fit - *bitstreamPt = (*bitstreamPt) | mask32; - numbits = numbits - writebits; - startbit = (startbit + writebits) % 32; - if (startbit == 0) { - bitstreamPt++; - *bitstreamPt = 0x00000000; - totalBytes += 4; - } - } - } - } - totalBytes += (startbit / 8) + - ((startbit % 8 == 0) ? 0 : 1); // return aligned to 8-bits - *outsize = totalBytes; -} - -////////////////////////////////////////////////////////////////////// -/// ALTERNATIVE CODER -/// ASSUMPTION: The max. length of 4 combined codewords can be 2x original data, -/// i.e. g 64 bits -/////////////////////////////////////////////////////////////////////// - -#else - -extern "C" void cpu_vlc_encode(unsigned int *indata, unsigned int num_elements, - unsigned int *outdata, unsigned int *outsize, - unsigned int *codewords, - unsigned int *codewordlens) { - unsigned int *bitstreamPt = - (unsigned int *)outdata; /* Pointer to current byte */ - // assume memset is done. - *bitstreamPt = 0x00000000U; - unsigned int startbit = 0; - unsigned int totalBytes = 0; - - for (unsigned int k = 0; k < num_elements; k++) { - unsigned long long cw64 = 0, mask64 = 0; - unsigned int val32 = indata[k]; - unsigned int numbits = 0; - unsigned int mask32, temp32; - - for (unsigned int i = 0; i < 4; i++) { - unsigned char symbol = (unsigned char)(val32 >> (8 * (3 - i))); - cw64 = (cw64 << codewordlens[symbol]) | codewords[symbol]; - numbits += codewordlens[symbol]; - // if (numbits>32) printf("WARRNING! Element %d is combined into numbits = - // %d!!!!!!!\n", k, numbits); - } - - while (numbits > 0) { - int writebits = min(32 - startbit, numbits); - if (numbits == writebits) { - temp32 = (unsigned int)cw64; //(cw64 & 0xFFFFFFFF); - mask32 = temp32 << (32 - startbit - numbits); - } else { - mask32 = (unsigned int)(cw64 >> (numbits - writebits)); - cw64 = cw64 & ((1 << (numbits - writebits)) - 1); - } - *bitstreamPt = (*bitstreamPt) | mask32; - numbits = numbits - writebits; - startbit = (startbit + writebits) % 32; - if (startbit == 0) { - bitstreamPt++; - *bitstreamPt = 0x00000000; - totalBytes += 4; - } - } - } - totalBytes += (startbit / 8) + - ((startbit % 8 == 0) ? 0 : 1); // return aligned to 8-bits - *outsize = totalBytes; -} -#endif diff --git a/examples/huffman/cpuencode.h b/examples/huffman/cpuencode.h deleted file mode 100644 index 6c331fa..0000000 --- a/examples/huffman/cpuencode.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef _CE_H_ -#define _CE_H_ - -extern "C" void cpu_vlc_encode(unsigned int *indata, unsigned int num_elements, - unsigned int *outdata, unsigned int *outsize, - unsigned int *codewords, - unsigned int *codewordlens); -#endif diff --git a/examples/huffman/cuda_helpers.h b/examples/huffman/cuda_helpers.h deleted file mode 100644 index 3cf4524..0000000 --- a/examples/huffman/cuda_helpers.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef __CUDA_HELPERS__ -#define __CUDA_HELPERS__ -#include -/************************************************************************/ -/* Init CUDA */ -/************************************************************************/ -#if __DEVICE_EMULATION__ - -bool InitCUDA(void) { return true; } - -#else -bool InitCUDA(void) { - - cudaSetDevice(0); - - printf("CUDA initialized.\n"); - return true; -} -#endif -#endif diff --git a/examples/huffman/cutil.h b/examples/huffman/cutil.h deleted file mode 100644 index 8757a22..0000000 --- a/examples/huffman/cutil.h +++ /dev/null @@ -1,931 +0,0 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -/* CUda UTility Library */ - -#ifndef _CUTIL_H_ -#define _CUTIL_H_ - -#ifdef _WIN32 -#pragma warning(disable : 4996) // disable deprecated warning -#endif - -#include -#include - -// helper typedefs for building DLL -#ifdef _WIN32 -#ifdef BUILD_DLL -#define DLL_MAPPING __declspec(dllexport) -#else -#define DLL_MAPPING __declspec(dllimport) -#endif -#else -#define DLL_MAPPING -#endif - -#ifdef _WIN32 -#define CUTIL_API __stdcall -#else -#define CUTIL_API -#endif - -//////////////////////////////////////////////////////////////////////////// -//! CUT bool type -//////////////////////////////////////////////////////////////////////////// -enum CUTBoolean { CUTFalse = 0, CUTTrue = 1 }; - -//////////////////////////////////////////////////////////////////////////// -//! Deallocate memory allocated within Cutil -//! @param pointer to memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -void CUTIL_API cutFree(void *ptr); - -//////////////////////////////////////////////////////////////////////////// -//! Helper for bank conflict checking (should only be used with the -//! CUT_BANK_CHECKER macro) -//! @param tidx thread id in x dimension of block -//! @param tidy thread id in y dimension of block -//! @param tidz thread id in z dimension of block -//! @param bdimx block size in x dimension -//! @param bdimy block size in y dimension -//! @param bdimz block size in z dimension -//! @param file name of the source file where the access takes place -//! @param line line in the source file where the access takes place -//! @param aname name of the array which is accessed -//! @param index index into the array -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -void CUTIL_API cutCheckBankAccess(unsigned int tidx, unsigned int tidy, - unsigned int tidz, unsigned int bdimx, - unsigned int bdimy, unsigned int bdimz, - const char *file, const int line, - const char *aname, const int index); - -//////////////////////////////////////////////////////////////////////////// -//! Find the path for a filename -//! @return the path if succeeded, otherwise 0 -//! @param filename name of the file -//! @param executablePath optional absolute path of the executable -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -char *CUTIL_API cutFindFilePath(const char *filename, - const char *executablePath); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing single precision floating point data -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! initialized within Cutil then cutFree() has to be used to -//! deallocate the memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutReadFilef(const char *filename, float **data, - unsigned int *len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing double precision floating point data -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! initialized within Cutil then cutFree() has to be used to -//! deallocate the memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutReadFiled(const char *filename, double **data, - unsigned int *len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing integer data -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! initialized within Cutil then cutFree() has to be used to -//! deallocate the memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutReadFilei(const char *filename, int **data, - unsigned int *len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing unsigned integer data -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! initialized within Cutil then cutFree() has to be used to -//! deallocate the memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutReadFileui(const char *filename, unsigned int **data, - unsigned int *len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing char / byte data -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! initialized within Cutil then cutFree() has to be used to -//! deallocate the memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutReadFileb(const char *filename, char **data, - unsigned int *len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Read file \filename containing unsigned char / byte data -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param filename name of the source file -//! @param data uninitialized pointer, returned initialized and pointing to -//! the data read -//! @param len number of data elements in data, -1 on error -//! @note If a NULL pointer is passed to this function and it is -//! initialized within Cutil then cutFree() has to be used to -//! deallocate the memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutReadFileub(const char *filename, unsigned char **data, - unsigned int *len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing single precision floating point -//! data -//! @return CUTTrue if writing the file succeeded, otherwise false -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//! @param epsilon epsilon for comparison -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutWriteFilef(const char *filename, const float *data, - unsigned int len, const float epsilon, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing double precision floating point -//! data -//! @return CUTTrue if writing the file succeeded, otherwise false -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//! @param epsilon epsilon for comparison -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutWriteFiled(const char *filename, const float *data, - unsigned int len, const double epsilon, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing integer data -//! @return CUTTrue if writing the file succeeded, otherwise false -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutWriteFilei(const char *filename, const int *data, - unsigned int len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing unsigned integer data -//! @return CUTTrue if writing the file succeeded, otherwise false -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutWriteFileui(const char *filename, - const unsigned int *data, unsigned int len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing char / byte data -//! @return CUTTrue if writing the file succeeded, otherwise false -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutWriteFileb(const char *filename, const char *data, - unsigned int len, bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Write a data file \filename containing unsigned char / byte data -//! @return CUTTrue if writing the file succeeded, otherwise false -//! @param filename name of the file to write -//! @param data pointer to data to write -//! @param len number of data elements in data, -1 on error -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutWriteFileub(const char *filename, - const unsigned char *data, unsigned int len, - bool verbose = false); - -//////////////////////////////////////////////////////////////////////////// -//! Load PGM image file (with unsigned char as data element type) -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//! @note If a NULL pointer is passed to this function and it is -//! initialized within Cutil then cutFree() has to be used to -//! deallocate the memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutLoadPGMub(const char *file, unsigned char **data, - unsigned int *w, unsigned int *h); - -//////////////////////////////////////////////////////////////////////////// -//! Load PPM image file (with unsigned char as data element type) -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutLoadPPMub(const char *file, unsigned char **data, - unsigned int *w, unsigned int *h); - -//////////////////////////////////////////////////////////////////////////// -//! Load PPM image file (with unsigned char as data element type), padding -//! 4th component -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutLoadPPM4ub(const char *file, unsigned char **data, - unsigned int *w, unsigned int *h); - -//////////////////////////////////////////////////////////////////////////// -//! Load PGM image file (with unsigned int as data element type) -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//! @note If a NULL pointer is passed to this function and it is -//! initialized within Cutil then cutFree() has to be used to -//! deallocate the memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutLoadPGMi(const char *file, unsigned int **data, - unsigned int *w, unsigned int *h); - -//////////////////////////////////////////////////////////////////////////// -//! Load PGM image file (with unsigned short as data element type) -//! @return CUTTrue if reading the file succeeded, otherwise false -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//! @note If a NULL pointer is passed to this function and it is -//! initialized withing Cutil then cutFree() has to be used to -//! deallocate the memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutLoadPGMs(const char *file, unsigned short **data, - unsigned int *w, unsigned int *h); - -//////////////////////////////////////////////////////////////////////////// -//! Load PGM image file (with float as data element type) -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//! @note If a NULL pointer is passed to this function and it is -//! initialized withing Cutil then cutFree() has to be used to -//! deallocate the memory -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutLoadPGMf(const char *file, float **data, - unsigned int *w, unsigned int *h); - -//////////////////////////////////////////////////////////////////////////// -//! Save PGM image file (with unsigned char as data element type) -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutSavePGMub(const char *file, unsigned char *data, - unsigned int w, unsigned int h); - -//////////////////////////////////////////////////////////////////////////// -//! Save PPM image file (with unsigned char as data element type) -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutSavePPMub(const char *file, unsigned char *data, - unsigned int w, unsigned int h); - -//////////////////////////////////////////////////////////////////////////// -//! Save PPM image file (with unsigned char as data element type, padded to -//! 4 bytes) -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutSavePPM4ub(const char *file, unsigned char *data, - unsigned int w, unsigned int h); - -//////////////////////////////////////////////////////////////////////////// -//! Save PGM image file (with unsigned int as data element type) -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutSavePGMi(const char *file, unsigned int *data, - unsigned int w, unsigned int h); - -//////////////////////////////////////////////////////////////////////////// -//! Save PGM image file (with unsigned short as data element type) -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutSavePGMs(const char *file, unsigned short *data, - unsigned int w, unsigned int h); - -//////////////////////////////////////////////////////////////////////////// -//! Save PGM image file (with float as data element type) -//! @param file name of the image file -//! @param data handle to the data read -//! @param w width of the image -//! @param h height of the image -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutSavePGMf(const char *file, float *data, unsigned int w, - unsigned int h); - -//////////////////////////////////////////////////////////////////////////// -// Command line arguments: General notes -// * All command line arguments begin with '--' followed by the token; -// token and value are seperated by '='; example --samples=50 -// * Arrays have the form --model=[one.obj,two.obj,three.obj] -// (without whitespaces) -//////////////////////////////////////////////////////////////////////////// - -//////////////////////////////////////////////////////////////////////////// -//! Check if command line argument \a flag-name is given -//! @return CUTTrue if command line argument \a flag_name has been given, -//! otherwise 0 -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param flag_name name of command line flag -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutCheckCmdLineFlag(const int argc, const char **argv, - const char *flag_name); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type int -//! @return CUTTrue if command line argument \a arg_name has been given and -//! is of the requested type, otherwise CUTFalse -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutGetCmdLineArgumenti(const int argc, const char **argv, - const char *arg_name, int *val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type float -//! @return CUTTrue if command line argument \a arg_name has been given and -//! is of the requested type, otherwise CUTFalse -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutGetCmdLineArgumentf(const int argc, const char **argv, - const char *arg_name, float *val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument of type string -//! @return CUTTrue if command line argument \a arg_name has been given and -//! is of the requested type, otherwise CUTFalse -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val value of the command line argument -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutGetCmdLineArgumentstr(const int argc, const char **argv, - const char *arg_name, char **val); - -//////////////////////////////////////////////////////////////////////////// -//! Get the value of a command line argument list those element are strings -//! @return CUTTrue if command line argument \a arg_name has been given and -//! is of the requested type, otherwise CUTFalse -//! @param argc argc as passed to main() -//! @param argv argv as passed to main() -//! @param arg_name name of the command line argument -//! @param val command line argument list -//! @param len length of the list / number of elements -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutGetCmdLineArgumentListstr(const int argc, - const char **argv, - const char *arg_name, - char **val, - unsigned int *len); - -//////////////////////////////////////////////////////////////////////////// -//! Extended assert -//! @return CUTTrue if the condition \a val holds, otherwise CUTFalse -//! @param val condition to test -//! @param file __FILE__ macro -//! @param line __LINE__ macro -//! @note This function should be used via the CONDITION(val) macro -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutCheckCondition(int val, const char *file, - const int line); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays -//! @return CUTTrue if \a reference and \a data are identical, -//! otherwise CUTFalse -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutComparef(const float *reference, const float *data, - const unsigned int len); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two integer arrays -//! @return CUTTrue if \a reference and \a data are identical, -//! otherwise CUTFalse -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutComparei(const int *reference, const int *data, - const unsigned int len); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two unsigned integer arrays, with epsilon and threshold -//! @return CUTTrue if \a reference and \a data are identical, -//! otherwise CUTFalse -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param threshold tolerance % # of comparison errors (0.15f = 15%) -//////////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutCompareuit(const unsigned int *reference, - const unsigned int *data, - const unsigned int len, const float epsilon, - const float threshold); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two unsigned char arrays -//! @return CUTTrue if \a reference and \a data are identical, -//! otherwise CUTFalse -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutCompareub(const unsigned char *reference, - const unsigned char *data, - const unsigned int len); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two integers with a tolernance for # of byte errors -//! @return CUTTrue if \a reference and \a data are identical, -//! otherwise CUTFalse -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//! @param threshold tolerance % # of comparison errors (0.15f = 15%) -//////////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutCompareubt(const unsigned char *reference, - const unsigned char *data, - const unsigned int len, const float epsilon, - const float threshold); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two integer arrays witha n epsilon tolerance for equality -//! @return CUTTrue if \a reference and \a data are identical, -//! otherwise CUTFalse -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutCompareube(const unsigned char *reference, - const unsigned char *data, - const unsigned int len, const float epsilon); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays with an epsilon tolerance for equality -//! @return CUTTrue if \a reference and \a data are identical, -//! otherwise CUTFalse -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutComparefe(const float *reference, const float *data, - const unsigned int len, const float epsilon); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays with an epsilon tolerance for equality and a -//! threshold for # pixel errors -//! @return CUTTrue if \a reference and \a data are identical, -//! otherwise CUTFalse -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutComparefet(const float *reference, const float *data, - const unsigned int len, const float epsilon, - const float threshold); - -//////////////////////////////////////////////////////////////////////////// -//! Compare two float arrays using L2-norm with an epsilon tolerance for -//! equality -//! @return CUTTrue if \a reference and \a data are identical, -//! otherwise CUTFalse -//! @param reference handle to the reference data / gold image -//! @param data handle to the computed data -//! @param len number of elements in reference and data -//! @param epsilon epsilon to use for the comparison -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutCompareL2fe(const float *reference, const float *data, - const unsigned int len, - const float epsilon); - -//////////////////////////////////////////////////////////////////////////////// -//! Compare two PPM image files with an epsilon tolerance for equality -//! @return CUTTrue if \a reference and \a data are identical, -//! otherwise CUTFalse -//! @param src_file filename for the image to be compared -//! @param data filename for the reference data / gold image -//! @param epsilon epsilon to use for the comparison -//! @param threshold threshold of pixels that can still mismatch to pass (i.e. -//! 0.15f = 15% must pass) $param verboseErrors output details of image mismatch -//! to std::err -//////////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutComparePPM(const char *src_file, const char *ref_file, - const float epsilon, const float threshold, - bool verboseErrors = false); - -//////////////////////////////////////////////////////////////////////////// -//! Timer functionality - -//////////////////////////////////////////////////////////////////////////// -//! Create a new timer -//! @return CUTTrue if a time has been created, otherwise false -//! @param name of the new timer, 0 if the creation failed -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutCreateTimer(unsigned int *name); - -//////////////////////////////////////////////////////////////////////////// -//! Delete a timer -//! @return CUTTrue if a time has been deleted, otherwise false -//! @param name of the timer to delete -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutDeleteTimer(unsigned int name); - -//////////////////////////////////////////////////////////////////////////// -//! Start the time with name \a name -//! @param name name of the timer to start -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutStartTimer(const unsigned int name); - -//////////////////////////////////////////////////////////////////////////// -//! Stop the time with name \a name. Does not reset. -//! @param name name of the timer to stop -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutStopTimer(const unsigned int name); - -//////////////////////////////////////////////////////////////////////////// -//! Resets the timer's counter. -//! @param name name of the timer to reset. -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -CUTBoolean CUTIL_API cutResetTimer(const unsigned int name); - -//////////////////////////////////////////////////////////////////////////// -//! Returns total execution time in milliseconds for the timer over all -//! runs since the last reset or timer creation. -//! @param name name of the timer to return the time of -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -float CUTIL_API cutGetTimerValue(const unsigned int name); - -//////////////////////////////////////////////////////////////////////////// -//! Return the average time in milliseconds for timer execution as the -//! total time for the timer dividied by the number of completed (stopped) -//! runs the timer has made. -//! Excludes the current running time if the timer is currently running. -//! @param name name of the timer to return the time of -//////////////////////////////////////////////////////////////////////////// -DLL_MAPPING -float CUTIL_API cutGetAverageTimerValue(const unsigned int name); - -//////////////////////////////////////////////////////////////////////////// -//! Macros - -#if CUDART_VERSION >= 4000 -#define CUT_DEVICE_SYNCHRONIZE() cudaDeviceSynchronize(); -#else -#define CUT_DEVICE_SYNCHRONIZE() cudaThreadSynchronize(); -#endif - -#if CUDART_VERSION >= 4000 -#define CUT_DEVICE_RESET() cudaDeviceReset(); -#else -#define CUT_DEVICE_RESET() cudaThreadExit(); -#endif - -// This is for the CUTIL bank checker -#ifdef _DEBUG -#if __DEVICE_EMULATION__ -// Interface for bank conflict checker -#define CUT_BANK_CHECKER(array, index) \ - (cutCheckBankAccess(threadIdx.x, threadIdx.y, threadIdx.z, blockDim.x, \ - blockDim.y, blockDim.z, __FILE__, __LINE__, #array, \ - index), \ - array[index]) -#else -#define CUT_BANK_CHECKER(array, index) array[index] -#endif -#else -#define CUT_BANK_CHECKER(array, index) array[index] -#endif - -#define CU_SAFE_CALL_NO_SYNC(call) \ - { \ - CUresult err = call; \ - if (CUDA_SUCCESS != err) { \ - fprintf(stderr, "Cuda driver error %x in file '%s' in line %i.\n", err, \ - __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } - -#define CU_SAFE_CALL(call) CU_SAFE_CALL_NO_SYNC(call); - -#define CU_SAFE_CTX_SYNC() \ - { \ - CUresult err = cuCtxSynchronize(); \ - if (CUDA_SUCCESS != err) { \ - fprintf(stderr, "Cuda driver error %x in file '%s' in line %i.\n", err, \ - __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } - -#define CUDA_SAFE_CALL_NO_SYNC(call) \ - { \ - cudaError err = call; \ - if (cudaSuccess != err) { \ - fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", __FILE__, \ - __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - } - -#define CUDA_SAFE_CALL(call) CUDA_SAFE_CALL_NO_SYNC(call); - -#define CUDA_SAFE_THREAD_SYNC() \ - { \ - cudaError err = CUT_DEVICE_SYNCHRONIZE(); \ - if (cudaSuccess != err) { \ - fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", __FILE__, \ - __LINE__, cudaGetErrorString(err)); \ - } \ - } - -#define CUFFT_SAFE_CALL(call) \ - { \ - cufftResult err = call; \ - if (CUFFT_SUCCESS != err) { \ - fprintf(stderr, "CUFFT error in file '%s' in line %i.\n", __FILE__, \ - __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } - -#define CUT_SAFE_CALL(call) \ - if (CUTTrue != call) { \ - fprintf(stderr, "Cut error in file '%s' in line %i.\n", __FILE__, \ - __LINE__); \ - exit(EXIT_FAILURE); \ - } - -//! Check for CUDA error -#ifdef _DEBUG -#define CUT_CHECK_ERROR(errorMessage) \ - { \ - cudaError_t err = cudaGetLastError(); \ - if (cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - err = CUT_DEVICE_SYNCHRONIZE(); \ - if (cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - } -#else -#define CUT_CHECK_ERROR(errorMessage) \ - { \ - cudaError_t err = cudaGetLastError(); \ - if (cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - } -#endif - -//! Check for malloc error -#define CUT_SAFE_MALLOC(mallocCall) \ - { \ - if (!(mallocCall)) { \ - fprintf(stderr, "Host malloc failure in file '%s' in line %i\n", \ - __FILE__, __LINE__); \ - exit(EXIT_FAILURE); \ - } \ - } \ - while (0) \ - ; - -//! Check if conditon is true (flexible assert) -#define CUT_CONDITION(val) \ - if (CUTFalse == cutCheckCondition(val, __FILE__, __LINE__)) { \ - exit(EXIT_FAILURE); \ - } - -#if __DEVICE_EMULATION__ - -#define CUT_DEVICE_INIT(ARGC, ARGV) - -#else - -#define CUT_DEVICE_INIT(ARGC, ARGV) \ - { \ - int deviceCount; \ - CUDA_SAFE_CALL_NO_SYNC(cudaGetDeviceCount(&deviceCount)); \ - if (deviceCount == 0) { \ - fprintf(stderr, "cutil error: no devices supporting CUDA.\n"); \ - exit(EXIT_FAILURE); \ - } \ - int dev = 0; \ - cutGetCmdLineArgumenti(ARGC, (const char **)ARGV, "device", &dev); \ - if (dev < 0) \ - dev = 0; \ - if (dev > deviceCount - 1) \ - dev = deviceCount - 1; \ - cudaDeviceProp deviceProp; \ - CUDA_SAFE_CALL_NO_SYNC(cudaGetDeviceProperties(&deviceProp, dev)); \ - if (cutCheckCmdLineFlag(ARGC, (const char **)ARGV, "quiet") == CUTFalse) \ - fprintf(stderr, "Using device %d: %s\n", dev, deviceProp.name); \ - CUDA_SAFE_CALL(cudaSetDevice(dev)); \ - } - -//! Check for CUDA context lost -#define CUDA_CHECK_CTX_LOST(errorMessage) \ - { \ - cudaError_t err = cudaGetLastError(); \ - if (cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - err = CUT_DEVICE_SYNCHRONIZE(); \ - if (cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - } - -//! Check for CUDA context lost -#define CU_CHECK_CTX_LOST(errorMessage) \ - { \ - cudaError_t err = cudaGetLastError(); \ - if (CUDA_ERROR_INVALID_CONTEXT != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - err = CUT_DEVICE_SYNCHRONIZE(); \ - if (cudaSuccess != err) { \ - fprintf(stderr, "Cuda error: %s in file '%s' in line %i : %s.\n", \ - errorMessage, __FILE__, __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - } - -#endif - -#define CUT_DEVICE_INIT_DRV(cuDevice, ARGC, ARGV) \ - { \ - cuDevice = 0; \ - int deviceCount = 0; \ - CUresult err = cuInit(0); \ - if (CUDA_SUCCESS == err) \ - CU_SAFE_CALL_NO_SYNC(cuDeviceGetCount(&deviceCount)); \ - if (deviceCount == 0) { \ - fprintf(stderr, "cutil error: no devices supporting CUDA\n"); \ - exit(EXIT_FAILURE); \ - } \ - int dev = 0; \ - cutGetCmdLineArgumenti(ARGC, (const char **)ARGV, "device", &dev); \ - if (dev < 0) \ - dev = 0; \ - if (dev > deviceCount - 1) \ - dev = deviceCount - 1; \ - CU_SAFE_CALL_NO_SYNC(cuDeviceGet(&cuDevice, dev)); \ - char name[100]; \ - cuDeviceGetName(name, 100, cuDevice); \ - if (cutCheckCmdLineFlag(ARGC, (const char **)ARGV, "quiet") == CUTFalse) \ - fprintf(stderr, "Using device %d: %s\n", dev, name); \ - } - -#define CUT_EXIT(argc, argv) \ - if (!cutCheckCmdLineFlag(argc, (const char **)argv, "noprompt")) { \ - printf("\nPress ENTER to exit...\n"); \ - fflush(stdout); \ - fflush(stderr); \ - getchar(); \ - } \ - exit(EXIT_SUCCESS); - -#endif // #ifndef _CUTIL_H_ diff --git a/examples/huffman/hist.cu b/examples/huffman/hist.cu deleted file mode 100644 index 0ff3f31..0000000 --- a/examples/huffman/hist.cu +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * NVIDIA Corporation and its licensors retain all intellectual property and * - * proprietary rights in and to this software and related documentation. Any - * use, reproduction, disclosure, or distribution of this software and related - * documentation without an express license agreement from NVIDIA Corporation is - * strictly prohibited. - * - * Please refer to the applicable NVIDIA end user license agreement (EULA) - * associated with this source code for terms and conditions that govern - * your use of this NVIDIA software. - * - */ - -#include -#include - -#define CHECK(ans) \ - { gpuAssert((ans), __FILE__, __LINE__); } -inline void gpuAssert(cudaError_t code, const char *file, int line, - bool abort = true) { - if (code != cudaSuccess) { - fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, - line); - if (abort) - exit(code); - } -} - -using namespace std; - -#define SIZE (100 * 1024 * 1024) - -__global__ void histo_kernel(unsigned char *buffer, long size, - unsigned int *histo) { - - __shared__ unsigned int temp[256]; - - temp[threadIdx.x] = 0; - __syncthreads(); - - int i = threadIdx.x + blockIdx.x * blockDim.x; - int offset = blockDim.x * gridDim.x; - while (i < size) { - atomicAdd(&temp[buffer[i]], 1); - i += offset; - } - - __syncthreads(); - atomicAdd(&(histo[threadIdx.x]), temp[threadIdx.x]); -} - -int runHisto(char *file, unsigned int *freq, unsigned int memSize, - unsigned int *source) { - - FILE *f = fopen(file, "rb"); - if (!f) { - perror(file); - exit(1); - } - fseek(f, 0, SEEK_SET); - size_t result = fread(source, 1, memSize, f); - if (result != memSize) - fputs("Cannot read input file", stderr); - - fclose(f); - - unsigned char *buffer = (unsigned char *)source; - - int blocks = 2; - - // allocate memory on the GPU for the file's data - int partSize = memSize / 32; - int totalNum = memSize / sizeof(unsigned int); - int partialNum = partSize / sizeof(unsigned int); - - unsigned char *dev_buffer0; - unsigned char *dev_buffer1; - unsigned int *dev_histo; - cudaMalloc((void **)&dev_buffer0, partSize); - cudaMalloc((void **)&dev_buffer1, partSize); - cudaMalloc((void **)&dev_histo, 256 * sizeof(int)); - cudaMemset(dev_histo, 0, 256 * sizeof(int)); - - for (int i = 0; i < totalNum; i += partialNum * 2) { - CHECK( - cudaMemcpy(dev_buffer0, buffer + i, partSize, cudaMemcpyHostToDevice)); - CHECK(cudaMemcpy(dev_buffer1, buffer + i + partialNum, partSize, - cudaMemcpyHostToDevice)); - - // kernel launch - 2x the number of mps gave best timing - histo_kernel<<>>(dev_buffer0, partSize, dev_histo); - cudaDeviceSynchronize(); - histo_kernel<<>>(dev_buffer1, partSize, dev_histo); - cudaDeviceSynchronize(); - } - cudaMemcpy(freq, dev_histo, 256 * sizeof(int), cudaMemcpyDeviceToHost); - - cudaFree(dev_histo); - cudaFree(dev_buffer0); - cudaFree(dev_buffer1); - return 0; -} diff --git a/examples/huffman/huffTree.h b/examples/huffman/huffTree.h deleted file mode 100644 index 8a37568..0000000 --- a/examples/huffman/huffTree.h +++ /dev/null @@ -1,90 +0,0 @@ -#include "stdio.h" -#include -#include // for CHAR_BIT -#include -#include -#include -#include -#include - -using namespace std; - -const int UniqueSymbols = 1 << CHAR_BIT; -void printBits(unsigned int val, int numbits) { - for (int i = numbits - 1; i >= 0; i--) - putchar('0' + ((val >> i) & 1)); -} - -typedef vector HuffCode; -typedef map HuffCodeMap; - -class INode { -public: - const int f; - virtual ~INode() {} - -protected: - INode(int f) : f(f) {} -}; - -class InternalNode : public INode { -public: - INode *const left; - INode *const right; - - InternalNode(INode *c0, INode *c1) - : INode(c0->f + c1->f), left(c0), right(c1) {} - ~InternalNode() { - delete left; - delete right; - } -}; - -class LeafNode : public INode { -public: - const char c; - - LeafNode(int f, char c) : INode(f), c(c) {} -}; - -struct NodeCmp { - bool operator()(const INode *lhs, const INode *rhs) const { - return lhs->f > rhs->f; - } -}; - -INode *BuildTree(unsigned int (&frequencies)[UniqueSymbols]) { - std::priority_queue, NodeCmp> trees; - - for (int i = 0; i < UniqueSymbols; ++i) { - if (frequencies[i] != 0) - trees.push(new LeafNode(frequencies[i], (char)i)); - } - while (trees.size() > 1) { - INode *childR = trees.top(); - trees.pop(); - - INode *childL = trees.top(); - trees.pop(); - - INode *parent = new InternalNode(childR, childL); - trees.push(parent); - } - return trees.top(); -} - -void GenerateCodes(const INode *node, const HuffCode &prefix, - HuffCodeMap &outCodes) { - if (const LeafNode *lf = dynamic_cast(node)) { - outCodes[lf->c] = prefix; - } else if (const InternalNode *in = - dynamic_cast(node)) { - HuffCode leftPrefix = prefix; - leftPrefix.push_back(false); - GenerateCodes(in->left, leftPrefix, outCodes); - - HuffCode rightPrefix = prefix; - rightPrefix.push_back(true); - GenerateCodes(in->right, rightPrefix, outCodes); - } -} diff --git a/examples/huffman/load_data.h b/examples/huffman/load_data.h deleted file mode 100644 index b0149ab..0000000 --- a/examples/huffman/load_data.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef _LOADTESTDATA_H_ -#define _LOADTESTDATA_H_ - -//#include "testdatagen.h" -#include "hist.cu" -#include "huffTree.h" - -inline void initParams(char *file_name, uint num_block_threads, - uint &num_blocks, uint &num_elements, uint &mem_size, - uint symbol_type_size) { - if (file_name == NULL) { - num_elements = num_blocks * num_block_threads; - mem_size = num_elements * symbol_type_size; - } else { - FILE *f = fopen(file_name, "rb"); - if (!f) { - perror(file_name); - exit(1); - } - fseek(f, 0, SEEK_END); - mem_size = ftell(f); - fclose(f); - num_elements = mem_size / symbol_type_size; - // todo add check if we need 1 more block! - num_blocks = num_elements / num_block_threads; - } -} - -inline void loadData(char *file_name, uint *sourceData, uint *codewords, - uint *codewordlens, uint num_elements, uint mem_size, - double &H) { - if (file_name == NULL) { - printf("No input file\n"); - exit(-1); - } else { - unsigned int freqs[UniqueSymbols] = {0}; - runHisto(file_name, freqs, mem_size, sourceData); - INode *root = BuildTree(freqs); - - HuffCodeMap codes; - GenerateCodes(root, HuffCode(), codes); - delete root; - - for (HuffCodeMap::const_iterator it = codes.begin(); it != codes.end(); - ++it) { - unsigned int count = distance(it->second.begin(), it->second.end()); - for (int i = 0; i < count; i++) - if (it->second[i]) - codewords[(unsigned int)(it->first)] += - (uint)pow(2.0f, (int)count - i - 1); - codewordlens[(unsigned int)(it->first)] = count; - } - - H = 0.0; - for (unsigned int i = 0; i < 256; i++) - if (freqs[i] > 0) { - double p = (double)freqs[i] / (double)mem_size; - H += p * log(p) / log(2.0); - } - H = -H; - printf("\n%s, %u bytes, entropy %f\n\n", file_name, mem_size, H); - } -} - -#endif diff --git a/examples/huffman/main_test_cu-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/huffman/main_test_cu-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 47206d1..0000000 --- a/examples/huffman/main_test_cu-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,1933 +0,0 @@ -; ModuleID = 'main_test_cu-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "main_test_cu.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_blockDim_t = type { i8 } -%struct.__cuda_builtin_gridDim_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv = comdat any - -$_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv = comdat any - -@_ZZ12histo_kernelPhlPjE4temp = internal addrspace(3) global [256 x i32] undef, align 4 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@blockDim = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockDim_t, align 1 -@gridDim = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_gridDim_t, align 1 -@_ZZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_E2sm = internal addrspace(3) global [3072 x i32] undef, align 4 -@_ZZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_E5kcmax = internal addrspace(3) global i32 undef, align 4 -@_ZZL10uniformAddPjS_iiiE3uni = internal addrspace(3) global i32 undef, align 4 -@_ZZL7prescanILb1ELb0EEvPjPKjS0_iiiE6s_data = internal addrspace(3) global [3072 x i32] undef, align 4 -@_ZZL7prescanILb1ELb1EEvPjPKjS0_iiiE6s_data = internal addrspace(3) global [3072 x i32] undef, align 4 -@_ZZL7prescanILb0ELb0EEvPjPKjS0_iiiE6s_data = internal addrspace(3) global [3072 x i32] undef, align 4 -@_ZZL7prescanILb0ELb1EEvPjPKjS0_iiiE6s_data = internal addrspace(3) global [3072 x i32] undef, align 4 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z12histo_kernelPhlPj(i8* %buffer, i64 %size, i32* %histo) #0 { -entry: - %buffer.addr = alloca i8*, align 8 - %size.addr = alloca i64, align 8 - %histo.addr = alloca i32*, align 8 - %i = alloca i32, align 4 - %offset = alloca i32, align 4 - store i8* %buffer, i8** %buffer.addr, align 8 - store i64 %size, i64* %size.addr, align 8 - store i32* %histo, i32** %histo.addr, align 8 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom = zext i32 %call to i64 - %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ12histo_kernelPhlPjE4temp to [256 x i32]*), i64 0, i64 %idxprom - store i32 0, i32* %arrayidx, align 4 - call void @llvm.nvvm.barrier0() - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %call2 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %call3 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %mul = mul i32 %call2, %call3 - %add = add i32 %call1, %mul - store i32 %add, i32* %i, align 4 - %call4 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %call5 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #2 - %mul6 = mul i32 %call4, %call5 - store i32 %mul6, i32* %offset, align 4 - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %0 = load i32, i32* %i, align 4 - %conv = sext i32 %0 to i64 - %1 = load i64, i64* %size.addr, align 8 - %cmp = icmp slt i64 %conv, %1 - br i1 %cmp, label %while.body, label %while.end - -while.body: ; preds = %while.cond - %2 = load i8*, i8** %buffer.addr, align 8 - %3 = load i32, i32* %i, align 4 - %idxprom7 = sext i32 %3 to i64 - %arrayidx8 = getelementptr inbounds i8, i8* %2, i64 %idxprom7 - %4 = load i8, i8* %arrayidx8, align 1 - %idxprom9 = zext i8 %4 to i64 - %arrayidx10 = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ12histo_kernelPhlPjE4temp to [256 x i32]*), i64 0, i64 %idxprom9 - %call11 = call i32 @_ZL9atomicAddPjj(i32* %arrayidx10, i32 1) #2 - %5 = load i32, i32* %offset, align 4 - %6 = load i32, i32* %i, align 4 - %add12 = add nsw i32 %6, %5 - store i32 %add12, i32* %i, align 4 - br label %while.cond - -while.end: ; preds = %while.cond - call void @llvm.nvvm.barrier0() - %7 = load i32*, i32** %histo.addr, align 8 - %call13 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom14 = zext i32 %call13 to i64 - %arrayidx15 = getelementptr inbounds i32, i32* %7, i64 %idxprom14 - %call16 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom17 = zext i32 %call16 to i64 - %arrayidx18 = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ12histo_kernelPhlPjE4temp to [256 x i32]*), i64 0, i64 %idxprom17 - %8 = load i32, i32* %arrayidx18, align 4 - %call19 = call i32 @_ZL9atomicAddPjj(i32* %arrayidx15, i32 %8) #2 - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: convergent nounwind -declare void @llvm.nvvm.barrier0() #2 - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() - ret i32 %0 -} - -; Function Attrs: convergent noinline nounwind optnone -define internal i32 @_ZL9atomicAddPjj(i32* %address, i32 %val) #0 { -entry: - %address.addr = alloca i32*, align 8 - %val.addr = alloca i32, align 4 - store i32* %address, i32** %address.addr, align 8 - store i32 %val, i32* %val.addr, align 4 - %0 = load i32*, i32** %address.addr, align 8 - %1 = load i32, i32* %val.addr, align 4 - %call = call i32 @_ZL12__uAtomicAddPjj(i32* %0, i32 %1) #2 - ret i32 %call -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_(i32* %data, i32* %gm_codewords, i32* %gm_codewordlens, i32* %cw32, i32* %cw32len, i32* %cw32idx, i32* %out, i32* %outidx) #0 { -entry: - %data.addr = alloca i32*, align 8 - %gm_codewords.addr = alloca i32*, align 8 - %gm_codewordlens.addr = alloca i32*, align 8 - %cw32.addr = alloca i32*, align 8 - %cw32len.addr = alloca i32*, align 8 - %cw32idx.addr = alloca i32*, align 8 - %out.addr = alloca i32*, align 8 - %outidx.addr = alloca i32*, align 8 - %kn = alloca i32, align 4 - %k = alloca i32, align 4 - %kc = alloca i32, align 4 - %startbit = alloca i32, align 4 - %wrbits = alloca i32, align 4 - %cw64 = alloca i64, align 8 - %val32 = alloca i32, align 4 - %codewordlen = alloca i32, align 4 - %tmpbyte = alloca i8, align 1 - %tmpcwlen = alloca i8, align 1 - %tmpcw32 = alloca i32, align 4 - %codewords = alloca i32*, align 8 - %codewordlens = alloca i32*, align 8 - %as = alloca i32*, align 8 - %i = alloca i32, align 4 - %offset = alloca i32, align 4 - %d = alloca i32, align 4 - %ai = alloca i8, align 1 - %bi = alloca i8, align 1 - %d56 = alloca i32, align 4 - %ai64 = alloca i8, align 1 - %bi70 = alloca i8, align 1 - %t = alloca i32, align 4 - store i32* %data, i32** %data.addr, align 8 - store i32* %gm_codewords, i32** %gm_codewords.addr, align 8 - store i32* %gm_codewordlens, i32** %gm_codewordlens.addr, align 8 - store i32* %cw32, i32** %cw32.addr, align 8 - store i32* %cw32len, i32** %cw32len.addr, align 8 - store i32* %cw32idx, i32** %cw32idx.addr, align 8 - store i32* %out, i32** %out.addr, align 8 - store i32* %outidx, i32** %outidx.addr, align 8 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %call1 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %mul = mul i32 %call, %call1 - %call2 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %add = add i32 %mul, %call2 - store i32 %add, i32* %kn, align 4 - %call3 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call3, i32* %k, align 4 - store i64 0, i64* %cw64, align 8 - store i32 0, i32* %codewordlen, align 4 - store i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_E2sm to [3072 x i32]*), i64 0, i64 0), i32** %codewords, align 8 - store i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_E2sm to [3072 x i32]*), i64 0, i64 256), i32** %codewordlens, align 8 - store i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_E2sm to [3072 x i32]*), i64 0, i64 512), i32** %as, align 8 - %0 = load i32*, i32** %gm_codewords.addr, align 8 - %1 = load i32, i32* %k, align 4 - %idxprom = zext i32 %1 to i64 - %arrayidx = getelementptr inbounds i32, i32* %0, i64 %idxprom - %2 = load i32, i32* %arrayidx, align 4 - %3 = load i32*, i32** %codewords, align 8 - %4 = load i32, i32* %k, align 4 - %idxprom4 = zext i32 %4 to i64 - %arrayidx5 = getelementptr inbounds i32, i32* %3, i64 %idxprom4 - store i32 %2, i32* %arrayidx5, align 4 - %5 = load i32*, i32** %gm_codewordlens.addr, align 8 - %6 = load i32, i32* %k, align 4 - %idxprom6 = zext i32 %6 to i64 - %arrayidx7 = getelementptr inbounds i32, i32* %5, i64 %idxprom6 - %7 = load i32, i32* %arrayidx7, align 4 - %8 = load i32*, i32** %codewordlens, align 8 - %9 = load i32, i32* %k, align 4 - %idxprom8 = zext i32 %9 to i64 - %arrayidx9 = getelementptr inbounds i32, i32* %8, i64 %idxprom8 - store i32 %7, i32* %arrayidx9, align 4 - %10 = load i32*, i32** %data.addr, align 8 - %11 = load i32, i32* %kn, align 4 - %idxprom10 = zext i32 %11 to i64 - %arrayidx11 = getelementptr inbounds i32, i32* %10, i64 %idxprom10 - %12 = load i32, i32* %arrayidx11, align 4 - store i32 %12, i32* %val32, align 4 - call void @llvm.nvvm.barrier0() - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %13 = load i32, i32* %i, align 4 - %cmp = icmp ult i32 %13, 4 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %14 = load i32, i32* %val32, align 4 - %15 = load i32, i32* %i, align 4 - %sub = sub i32 3, %15 - %mul12 = mul i32 %sub, 8 - %shr = lshr i32 %14, %mul12 - %conv = trunc i32 %shr to i8 - store i8 %conv, i8* %tmpbyte, align 1 - %16 = load i32*, i32** %codewords, align 8 - %17 = load i8, i8* %tmpbyte, align 1 - %idxprom13 = zext i8 %17 to i64 - %arrayidx14 = getelementptr inbounds i32, i32* %16, i64 %idxprom13 - %18 = load i32, i32* %arrayidx14, align 4 - store i32 %18, i32* %tmpcw32, align 4 - %19 = load i32*, i32** %codewordlens, align 8 - %20 = load i8, i8* %tmpbyte, align 1 - %idxprom15 = zext i8 %20 to i64 - %arrayidx16 = getelementptr inbounds i32, i32* %19, i64 %idxprom15 - %21 = load i32, i32* %arrayidx16, align 4 - %conv17 = trunc i32 %21 to i8 - store i8 %conv17, i8* %tmpcwlen, align 1 - %22 = load i64, i64* %cw64, align 8 - %23 = load i8, i8* %tmpcwlen, align 1 - %conv18 = zext i8 %23 to i32 - %sh_prom = zext i32 %conv18 to i64 - %shl = shl i64 %22, %sh_prom - %24 = load i32, i32* %tmpcw32, align 4 - %conv19 = zext i32 %24 to i64 - %or = or i64 %shl, %conv19 - store i64 %or, i64* %cw64, align 8 - %25 = load i8, i8* %tmpcwlen, align 1 - %conv20 = zext i8 %25 to i32 - %26 = load i32, i32* %codewordlen, align 4 - %add21 = add i32 %26, %conv20 - store i32 %add21, i32* %codewordlen, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %27 = load i32, i32* %i, align 4 - %inc = add i32 %27, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %28 = load i32, i32* %codewordlen, align 4 - %29 = load i32*, i32** %as, align 8 - %30 = load i32, i32* %k, align 4 - %idxprom22 = zext i32 %30 to i64 - %arrayidx23 = getelementptr inbounds i32, i32* %29, i64 %idxprom22 - store i32 %28, i32* %arrayidx23, align 4 - call void @llvm.nvvm.barrier0() - store i32 1, i32* %offset, align 4 - %call24 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %shr25 = lshr i32 %call24, 1 - store i32 %shr25, i32* %d, align 4 - br label %for.cond26 - -for.cond26: ; preds = %for.inc46, %for.end - %31 = load i32, i32* %d, align 4 - %cmp27 = icmp ugt i32 %31, 0 - br i1 %cmp27, label %for.body28, label %for.end48 - -for.body28: ; preds = %for.cond26 - call void @llvm.nvvm.barrier0() - %32 = load i32, i32* %k, align 4 - %33 = load i32, i32* %d, align 4 - %cmp29 = icmp ult i32 %32, %33 - br i1 %cmp29, label %if.then, label %if.end - -if.then: ; preds = %for.body28 - %34 = load i32, i32* %offset, align 4 - %35 = load i32, i32* %k, align 4 - %mul30 = mul i32 2, %35 - %add31 = add i32 %mul30, 1 - %mul32 = mul i32 %34, %add31 - %sub33 = sub i32 %mul32, 1 - %conv34 = trunc i32 %sub33 to i8 - store i8 %conv34, i8* %ai, align 1 - %36 = load i32, i32* %offset, align 4 - %37 = load i32, i32* %k, align 4 - %mul35 = mul i32 2, %37 - %add36 = add i32 %mul35, 2 - %mul37 = mul i32 %36, %add36 - %sub38 = sub i32 %mul37, 1 - %conv39 = trunc i32 %sub38 to i8 - store i8 %conv39, i8* %bi, align 1 - %38 = load i32*, i32** %as, align 8 - %39 = load i8, i8* %ai, align 1 - %idxprom40 = zext i8 %39 to i64 - %arrayidx41 = getelementptr inbounds i32, i32* %38, i64 %idxprom40 - %40 = load i32, i32* %arrayidx41, align 4 - %41 = load i32*, i32** %as, align 8 - %42 = load i8, i8* %bi, align 1 - %idxprom42 = zext i8 %42 to i64 - %arrayidx43 = getelementptr inbounds i32, i32* %41, i64 %idxprom42 - %43 = load i32, i32* %arrayidx43, align 4 - %add44 = add i32 %43, %40 - store i32 %add44, i32* %arrayidx43, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body28 - %44 = load i32, i32* %offset, align 4 - %mul45 = mul i32 %44, 2 - store i32 %mul45, i32* %offset, align 4 - br label %for.inc46 - -for.inc46: ; preds = %if.end - %45 = load i32, i32* %d, align 4 - %shr47 = lshr i32 %45, 1 - store i32 %shr47, i32* %d, align 4 - br label %for.cond26 - -for.end48: ; preds = %for.cond26 - %46 = load i32, i32* %k, align 4 - %cmp49 = icmp eq i32 %46, 0 - br i1 %cmp49, label %if.then50, label %if.end55 - -if.then50: ; preds = %for.end48 - %47 = load i32*, i32** %as, align 8 - %call51 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %sub52 = sub i32 %call51, 1 - %idxprom53 = zext i32 %sub52 to i64 - %arrayidx54 = getelementptr inbounds i32, i32* %47, i64 %idxprom53 - store i32 0, i32* %arrayidx54, align 4 - br label %if.end55 - -if.end55: ; preds = %if.then50, %for.end48 - store i32 1, i32* %d56, align 4 - br label %for.cond57 - -for.cond57: ; preds = %for.inc86, %if.end55 - %48 = load i32, i32* %d56, align 4 - %call58 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %cmp59 = icmp ult i32 %48, %call58 - br i1 %cmp59, label %for.body60, label %for.end88 - -for.body60: ; preds = %for.cond57 - %49 = load i32, i32* %offset, align 4 - %shr61 = lshr i32 %49, 1 - store i32 %shr61, i32* %offset, align 4 - call void @llvm.nvvm.barrier0() - %50 = load i32, i32* %k, align 4 - %51 = load i32, i32* %d56, align 4 - %cmp62 = icmp ult i32 %50, %51 - br i1 %cmp62, label %if.then63, label %if.end85 - -if.then63: ; preds = %for.body60 - %52 = load i32, i32* %offset, align 4 - %53 = load i32, i32* %k, align 4 - %mul65 = mul i32 2, %53 - %add66 = add i32 %mul65, 1 - %mul67 = mul i32 %52, %add66 - %sub68 = sub i32 %mul67, 1 - %conv69 = trunc i32 %sub68 to i8 - store i8 %conv69, i8* %ai64, align 1 - %54 = load i32, i32* %offset, align 4 - %55 = load i32, i32* %k, align 4 - %mul71 = mul i32 2, %55 - %add72 = add i32 %mul71, 2 - %mul73 = mul i32 %54, %add72 - %sub74 = sub i32 %mul73, 1 - %conv75 = trunc i32 %sub74 to i8 - store i8 %conv75, i8* %bi70, align 1 - %56 = load i32*, i32** %as, align 8 - %57 = load i8, i8* %ai64, align 1 - %idxprom76 = zext i8 %57 to i64 - %arrayidx77 = getelementptr inbounds i32, i32* %56, i64 %idxprom76 - %58 = load i32, i32* %arrayidx77, align 4 - store i32 %58, i32* %t, align 4 - %59 = load i32*, i32** %as, align 8 - %60 = load i8, i8* %bi70, align 1 - %idxprom78 = zext i8 %60 to i64 - %arrayidx79 = getelementptr inbounds i32, i32* %59, i64 %idxprom78 - %61 = load i32, i32* %arrayidx79, align 4 - %62 = load i32*, i32** %as, align 8 - %63 = load i8, i8* %ai64, align 1 - %idxprom80 = zext i8 %63 to i64 - %arrayidx81 = getelementptr inbounds i32, i32* %62, i64 %idxprom80 - store i32 %61, i32* %arrayidx81, align 4 - %64 = load i32, i32* %t, align 4 - %65 = load i32*, i32** %as, align 8 - %66 = load i8, i8* %bi70, align 1 - %idxprom82 = zext i8 %66 to i64 - %arrayidx83 = getelementptr inbounds i32, i32* %65, i64 %idxprom82 - %67 = load i32, i32* %arrayidx83, align 4 - %add84 = add i32 %67, %64 - store i32 %add84, i32* %arrayidx83, align 4 - br label %if.end85 - -if.end85: ; preds = %if.then63, %for.body60 - br label %for.inc86 - -for.inc86: ; preds = %if.end85 - %68 = load i32, i32* %d56, align 4 - %mul87 = mul i32 %68, 2 - store i32 %mul87, i32* %d56, align 4 - br label %for.cond57 - -for.end88: ; preds = %for.cond57 - call void @llvm.nvvm.barrier0() - %69 = load i32, i32* %k, align 4 - %call89 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %sub90 = sub i32 %call89, 1 - %cmp91 = icmp eq i32 %69, %sub90 - br i1 %cmp91, label %if.then92, label %if.end102 - -if.then92: ; preds = %for.end88 - %70 = load i32*, i32** %as, align 8 - %71 = load i32, i32* %k, align 4 - %idxprom93 = zext i32 %71 to i64 - %arrayidx94 = getelementptr inbounds i32, i32* %70, i64 %idxprom93 - %72 = load i32, i32* %arrayidx94, align 4 - %73 = load i32, i32* %codewordlen, align 4 - %add95 = add i32 %72, %73 - %74 = load i32*, i32** %outidx.addr, align 8 - %call96 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %idxprom97 = zext i32 %call96 to i64 - %arrayidx98 = getelementptr inbounds i32, i32* %74, i64 %idxprom97 - store i32 %add95, i32* %arrayidx98, align 4 - %75 = load i32*, i32** %as, align 8 - %76 = load i32, i32* %k, align 4 - %idxprom99 = zext i32 %76 to i64 - %arrayidx100 = getelementptr inbounds i32, i32* %75, i64 %idxprom99 - %77 = load i32, i32* %arrayidx100, align 4 - %78 = load i32, i32* %codewordlen, align 4 - %add101 = add i32 %77, %78 - %div = udiv i32 %add101, 32 - store i32 %div, i32* addrspacecast (i32 addrspace(3)* @_ZZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_E5kcmax to i32*), align 4 - br label %if.end102 - -if.end102: ; preds = %if.then92, %for.end88 - %79 = load i32*, i32** %as, align 8 - %80 = load i32, i32* %k, align 4 - %idxprom103 = zext i32 %80 to i64 - %arrayidx104 = getelementptr inbounds i32, i32* %79, i64 %idxprom103 - %81 = load i32, i32* %arrayidx104, align 4 - %div105 = udiv i32 %81, 32 - store i32 %div105, i32* %kc, align 4 - %82 = load i32*, i32** %as, align 8 - %83 = load i32, i32* %k, align 4 - %idxprom106 = zext i32 %83 to i64 - %arrayidx107 = getelementptr inbounds i32, i32* %82, i64 %idxprom106 - %84 = load i32, i32* %arrayidx107, align 4 - %rem = urem i32 %84, 32 - store i32 %rem, i32* %startbit, align 4 - %85 = load i32*, i32** %as, align 8 - %86 = load i32, i32* %k, align 4 - %idxprom108 = zext i32 %86 to i64 - %arrayidx109 = getelementptr inbounds i32, i32* %85, i64 %idxprom108 - store i32 0, i32* %arrayidx109, align 4 - call void @llvm.nvvm.barrier0() - %87 = load i32, i32* %codewordlen, align 4 - %88 = load i32, i32* %startbit, align 4 - %sub110 = sub i32 32, %88 - %cmp111 = icmp ugt i32 %87, %sub110 - br i1 %cmp111, label %cond.true, label %cond.false - -cond.true: ; preds = %if.end102 - %89 = load i32, i32* %startbit, align 4 - %sub112 = sub i32 32, %89 - br label %cond.end - -cond.false: ; preds = %if.end102 - %90 = load i32, i32* %codewordlen, align 4 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %sub112, %cond.true ], [ %90, %cond.false ] - store i32 %cond, i32* %wrbits, align 4 - %91 = load i64, i64* %cw64, align 8 - %92 = load i32, i32* %codewordlen, align 4 - %93 = load i32, i32* %wrbits, align 4 - %sub113 = sub i32 %92, %93 - %sh_prom114 = zext i32 %sub113 to i64 - %shr115 = lshr i64 %91, %sh_prom114 - %conv116 = trunc i64 %shr115 to i32 - store i32 %conv116, i32* %tmpcw32, align 4 - %94 = load i32*, i32** %as, align 8 - %95 = load i32, i32* %kc, align 4 - %idxprom117 = zext i32 %95 to i64 - %arrayidx118 = getelementptr inbounds i32, i32* %94, i64 %idxprom117 - %96 = load i32, i32* %tmpcw32, align 4 - %97 = load i32, i32* %startbit, align 4 - %sub119 = sub i32 32, %97 - %98 = load i32, i32* %wrbits, align 4 - %sub120 = sub i32 %sub119, %98 - %shl121 = shl i32 %96, %sub120 - %call122 = call i32 @_ZL8atomicOrPjj(i32* %arrayidx118, i32 %shl121) #2 - %99 = load i32, i32* %wrbits, align 4 - %100 = load i32, i32* %codewordlen, align 4 - %sub123 = sub i32 %100, %99 - store i32 %sub123, i32* %codewordlen, align 4 - %101 = load i32, i32* %codewordlen, align 4 - %tobool = icmp ne i32 %101, 0 - br i1 %tobool, label %if.then124, label %if.end143 - -if.then124: ; preds = %cond.end - %102 = load i32, i32* %codewordlen, align 4 - %cmp125 = icmp ugt i32 %102, 32 - br i1 %cmp125, label %cond.true126, label %cond.false127 - -cond.true126: ; preds = %if.then124 - br label %cond.end128 - -cond.false127: ; preds = %if.then124 - %103 = load i32, i32* %codewordlen, align 4 - br label %cond.end128 - -cond.end128: ; preds = %cond.false127, %cond.true126 - %cond129 = phi i32 [ 32, %cond.true126 ], [ %103, %cond.false127 ] - store i32 %cond129, i32* %wrbits, align 4 - %104 = load i64, i64* %cw64, align 8 - %105 = load i32, i32* %codewordlen, align 4 - %106 = load i32, i32* %wrbits, align 4 - %sub130 = sub i32 %105, %106 - %sh_prom131 = zext i32 %sub130 to i64 - %shr132 = lshr i64 %104, %sh_prom131 - %conv133 = trunc i64 %shr132 to i32 - %107 = load i32, i32* %wrbits, align 4 - %shl134 = shl i32 1, %107 - %sub135 = sub nsw i32 %shl134, 1 - %and = and i32 %conv133, %sub135 - store i32 %and, i32* %tmpcw32, align 4 - %108 = load i32*, i32** %as, align 8 - %109 = load i32, i32* %kc, align 4 - %add136 = add i32 %109, 1 - %idxprom137 = zext i32 %add136 to i64 - %arrayidx138 = getelementptr inbounds i32, i32* %108, i64 %idxprom137 - %110 = load i32, i32* %tmpcw32, align 4 - %111 = load i32, i32* %wrbits, align 4 - %sub139 = sub i32 32, %111 - %shl140 = shl i32 %110, %sub139 - %call141 = call i32 @_ZL8atomicOrPjj(i32* %arrayidx138, i32 %shl140) #2 - %112 = load i32, i32* %wrbits, align 4 - %113 = load i32, i32* %codewordlen, align 4 - %sub142 = sub i32 %113, %112 - store i32 %sub142, i32* %codewordlen, align 4 - br label %if.end143 - -if.end143: ; preds = %cond.end128, %cond.end - %114 = load i32, i32* %codewordlen, align 4 - %tobool144 = icmp ne i32 %114, 0 - br i1 %tobool144, label %if.then145, label %if.end157 - -if.then145: ; preds = %if.end143 - %115 = load i64, i64* %cw64, align 8 - %116 = load i32, i32* %codewordlen, align 4 - %shl146 = shl i32 1, %116 - %sub147 = sub nsw i32 %shl146, 1 - %conv148 = sext i32 %sub147 to i64 - %and149 = and i64 %115, %conv148 - %conv150 = trunc i64 %and149 to i32 - store i32 %conv150, i32* %tmpcw32, align 4 - %117 = load i32*, i32** %as, align 8 - %118 = load i32, i32* %kc, align 4 - %add151 = add i32 %118, 2 - %idxprom152 = zext i32 %add151 to i64 - %arrayidx153 = getelementptr inbounds i32, i32* %117, i64 %idxprom152 - %119 = load i32, i32* %tmpcw32, align 4 - %120 = load i32, i32* %codewordlen, align 4 - %sub154 = sub i32 32, %120 - %shl155 = shl i32 %119, %sub154 - %call156 = call i32 @_ZL8atomicOrPjj(i32* %arrayidx153, i32 %shl155) #2 - br label %if.end157 - -if.end157: ; preds = %if.then145, %if.end143 - call void @llvm.nvvm.barrier0() - %121 = load i32, i32* %k, align 4 - %122 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_E5kcmax to i32*), align 4 - %cmp158 = icmp ule i32 %121, %122 - br i1 %cmp158, label %if.then159, label %if.end164 - -if.then159: ; preds = %if.end157 - %123 = load i32*, i32** %as, align 8 - %124 = load i32, i32* %k, align 4 - %idxprom160 = zext i32 %124 to i64 - %arrayidx161 = getelementptr inbounds i32, i32* %123, i64 %idxprom160 - %125 = load i32, i32* %arrayidx161, align 4 - %126 = load i32*, i32** %out.addr, align 8 - %127 = load i32, i32* %kn, align 4 - %idxprom162 = zext i32 %127 to i64 - %arrayidx163 = getelementptr inbounds i32, i32* %126, i64 %idxprom162 - store i32 %125, i32* %arrayidx163, align 4 - br label %if.end164 - -if.end164: ; preds = %if.then159, %if.end157 - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define internal i32 @_ZL8atomicOrPjj(i32* %address, i32 %val) #0 { -entry: - %address.addr = alloca i32*, align 8 - %val.addr = alloca i32, align 4 - store i32* %address, i32** %address.addr, align 8 - store i32 %val, i32* %val.addr, align 4 - %0 = load i32*, i32** %address.addr, align 8 - %1 = load i32, i32* %val.addr, align 4 - %call = call i32 @_ZL11__uAtomicOrPjj(i32* %0, i32 %1) #2 - ret i32 %call -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_ZL5pack2PjS_S_S_j(i32* %srcData, i32* %cindex, i32* %cindex2, i32* %dstData, i32 %original_num_block_elements) #0 { -entry: - %srcData.addr = alloca i32*, align 8 - %cindex.addr = alloca i32*, align 8 - %cindex2.addr = alloca i32*, align 8 - %dstData.addr = alloca i32*, align 8 - %original_num_block_elements.addr = alloca i32, align 4 - %tid = alloca i32, align 4 - %offset = alloca i32, align 4 - %bitsize = alloca i32, align 4 - %pos = alloca i32, align 4 - %dword = alloca i32, align 4 - %bit = alloca i32, align 4 - %i = alloca i32, align 4 - %dw = alloca i32, align 4 - %tmp = alloca i32, align 4 - store i32* %srcData, i32** %srcData.addr, align 8 - store i32* %cindex, i32** %cindex.addr, align 8 - store i32* %cindex2, i32** %cindex2.addr, align 8 - store i32* %dstData, i32** %dstData.addr, align 8 - store i32 %original_num_block_elements, i32* %original_num_block_elements.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %mul = mul i32 %call, %call1 - %call2 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %add = add i32 %mul, %call2 - store i32 %add, i32* %tid, align 4 - %0 = load i32, i32* %tid, align 4 - %1 = load i32, i32* %original_num_block_elements.addr, align 4 - %mul3 = mul i32 %0, %1 - store i32 %mul3, i32* %offset, align 4 - %2 = load i32*, i32** %cindex.addr, align 8 - %3 = load i32, i32* %tid, align 4 - %idxprom = zext i32 %3 to i64 - %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom - %4 = load i32, i32* %arrayidx, align 4 - store i32 %4, i32* %bitsize, align 4 - %5 = load i32*, i32** %cindex2.addr, align 8 - %6 = load i32, i32* %tid, align 4 - %idxprom4 = zext i32 %6 to i64 - %arrayidx5 = getelementptr inbounds i32, i32* %5, i64 %idxprom4 - %7 = load i32, i32* %arrayidx5, align 4 - store i32 %7, i32* %pos, align 4 - %8 = load i32, i32* %pos, align 4 - %div = udiv i32 %8, 32 - store i32 %div, i32* %dword, align 4 - %9 = load i32, i32* %pos, align 4 - %rem = urem i32 %9, 32 - store i32 %rem, i32* %bit, align 4 - %10 = load i32*, i32** %srcData.addr, align 8 - %11 = load i32, i32* %offset, align 4 - %idxprom6 = zext i32 %11 to i64 - %arrayidx7 = getelementptr inbounds i32, i32* %10, i64 %idxprom6 - %12 = load i32, i32* %arrayidx7, align 4 - store i32 %12, i32* %dw, align 4 - %13 = load i32, i32* %dw, align 4 - %14 = load i32, i32* %bit, align 4 - %shr = lshr i32 %13, %14 - store i32 %shr, i32* %tmp, align 4 - %15 = load i32*, i32** %dstData.addr, align 8 - %16 = load i32, i32* %dword, align 4 - %idxprom8 = zext i32 %16 to i64 - %arrayidx9 = getelementptr inbounds i32, i32* %15, i64 %idxprom8 - %17 = load i32, i32* %tmp, align 4 - %call10 = call i32 @_ZL8atomicOrPjj(i32* %arrayidx9, i32 %17) #2 - %18 = load i32, i32* %bit, align 4 - %cmp = icmp eq i32 %18, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - br label %cond.end - -cond.false: ; preds = %entry - %19 = load i32, i32* %dw, align 4 - %20 = load i32, i32* %bit, align 4 - %sub = sub i32 32, %20 - %shl = shl i32 %19, %sub - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ 0, %cond.true ], [ %shl, %cond.false ] - store i32 %cond, i32* %tmp, align 4 - store i32 1, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %cond.end - %21 = load i32, i32* %i, align 4 - %22 = load i32, i32* %bitsize, align 4 - %div11 = udiv i32 %22, 32 - %cmp12 = icmp ult i32 %21, %div11 - br i1 %cmp12, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %23 = load i32*, i32** %srcData.addr, align 8 - %24 = load i32, i32* %offset, align 4 - %25 = load i32, i32* %i, align 4 - %add13 = add i32 %24, %25 - %idxprom14 = zext i32 %add13 to i64 - %arrayidx15 = getelementptr inbounds i32, i32* %23, i64 %idxprom14 - %26 = load i32, i32* %arrayidx15, align 4 - store i32 %26, i32* %dw, align 4 - %27 = load i32, i32* %dw, align 4 - %28 = load i32, i32* %bit, align 4 - %shr16 = lshr i32 %27, %28 - %29 = load i32, i32* %tmp, align 4 - %or = or i32 %29, %shr16 - store i32 %or, i32* %tmp, align 4 - %30 = load i32, i32* %tmp, align 4 - %31 = load i32*, i32** %dstData.addr, align 8 - %32 = load i32, i32* %dword, align 4 - %33 = load i32, i32* %i, align 4 - %add17 = add i32 %32, %33 - %idxprom18 = zext i32 %add17 to i64 - %arrayidx19 = getelementptr inbounds i32, i32* %31, i64 %idxprom18 - store i32 %30, i32* %arrayidx19, align 4 - %34 = load i32, i32* %bit, align 4 - %cmp20 = icmp eq i32 %34, 0 - br i1 %cmp20, label %cond.true21, label %cond.false22 - -cond.true21: ; preds = %for.body - br label %cond.end25 - -cond.false22: ; preds = %for.body - %35 = load i32, i32* %dw, align 4 - %36 = load i32, i32* %bit, align 4 - %sub23 = sub i32 32, %36 - %shl24 = shl i32 %35, %sub23 - br label %cond.end25 - -cond.end25: ; preds = %cond.false22, %cond.true21 - %cond26 = phi i32 [ 0, %cond.true21 ], [ %shl24, %cond.false22 ] - store i32 %cond26, i32* %tmp, align 4 - br label %for.inc - -for.inc: ; preds = %cond.end25 - %37 = load i32, i32* %i, align 4 - %inc = add i32 %37, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %38 = load i32, i32* %bit, align 4 - %cmp27 = icmp ne i32 %38, 0 - br i1 %cmp27, label %if.then, label %lor.lhs.false - -lor.lhs.false: ; preds = %for.end - %39 = load i32, i32* %bitsize, align 4 - %rem28 = urem i32 %39, 32 - %cmp29 = icmp ne i32 %rem28, 0 - br i1 %cmp29, label %if.then, label %if.end - -if.then: ; preds = %lor.lhs.false, %for.end - %40 = load i32*, i32** %dstData.addr, align 8 - %41 = load i32, i32* %dword, align 4 - %42 = load i32, i32* %i, align 4 - %add30 = add i32 %41, %42 - %idxprom31 = zext i32 %add30 to i64 - %arrayidx32 = getelementptr inbounds i32, i32* %40, i64 %idxprom31 - %43 = load i32, i32* %tmp, align 4 - %call33 = call i32 @_ZL8atomicOrPjj(i32* %arrayidx32, i32 %43) #2 - br label %if.end - -if.end: ; preds = %if.then, %lor.lhs.false - %44 = load i32, i32* %bitsize, align 4 - %rem34 = urem i32 %44, 32 - %cmp35 = icmp ne i32 %rem34, 0 - br i1 %cmp35, label %if.then36, label %if.end57 - -if.then36: ; preds = %if.end - %45 = load i32*, i32** %srcData.addr, align 8 - %46 = load i32, i32* %offset, align 4 - %47 = load i32, i32* %i, align 4 - %add37 = add i32 %46, %47 - %idxprom38 = zext i32 %add37 to i64 - %arrayidx39 = getelementptr inbounds i32, i32* %45, i64 %idxprom38 - %48 = load i32, i32* %arrayidx39, align 4 - store i32 %48, i32* %dw, align 4 - %49 = load i32*, i32** %dstData.addr, align 8 - %50 = load i32, i32* %dword, align 4 - %51 = load i32, i32* %i, align 4 - %add40 = add i32 %50, %51 - %idxprom41 = zext i32 %add40 to i64 - %arrayidx42 = getelementptr inbounds i32, i32* %49, i64 %idxprom41 - %52 = load i32, i32* %dw, align 4 - %53 = load i32, i32* %bit, align 4 - %shr43 = lshr i32 %52, %53 - %call44 = call i32 @_ZL8atomicOrPjj(i32* %arrayidx42, i32 %shr43) #2 - %54 = load i32*, i32** %dstData.addr, align 8 - %55 = load i32, i32* %dword, align 4 - %56 = load i32, i32* %i, align 4 - %add45 = add i32 %55, %56 - %add46 = add i32 %add45, 1 - %idxprom47 = zext i32 %add46 to i64 - %arrayidx48 = getelementptr inbounds i32, i32* %54, i64 %idxprom47 - %57 = load i32, i32* %bit, align 4 - %cmp49 = icmp eq i32 %57, 0 - br i1 %cmp49, label %cond.true50, label %cond.false51 - -cond.true50: ; preds = %if.then36 - br label %cond.end54 - -cond.false51: ; preds = %if.then36 - %58 = load i32, i32* %dw, align 4 - %59 = load i32, i32* %bit, align 4 - %sub52 = sub i32 32, %59 - %shl53 = shl i32 %58, %sub52 - br label %cond.end54 - -cond.end54: ; preds = %cond.false51, %cond.true50 - %cond55 = phi i32 [ 0, %cond.true50 ], [ %shl53, %cond.false51 ] - %call56 = call i32 @_ZL8atomicOrPjj(i32* %arrayidx48, i32 %cond55) #2 - br label %if.end57 - -if.end57: ; preds = %cond.end54, %if.end - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_ZL10uniformAddPjS_iii(i32* %g_data, i32* %uniforms, i32 %n, i32 %blockOffset, i32 %baseIndex) #0 { -entry: - %g_data.addr = alloca i32*, align 8 - %uniforms.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %blockOffset.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %address = alloca i32, align 4 - store i32* %g_data, i32** %g_data.addr, align 8 - store i32* %uniforms, i32** %uniforms.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %blockOffset, i32* %blockOffset.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %cmp = icmp eq i32 %call, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %0 = load i32*, i32** %uniforms.addr, align 8 - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %1 = load i32, i32* %blockOffset.addr, align 4 - %add = add i32 %call1, %1 - %idxprom = zext i32 %add to i64 - %arrayidx = getelementptr inbounds i32, i32* %0, i64 %idxprom - %2 = load i32, i32* %arrayidx, align 4 - store i32 %2, i32* addrspacecast (i32 addrspace(3)* @_ZZL10uniformAddPjS_iiiE3uni to i32*), align 4 - br label %if.end - -if.end: ; preds = %if.then, %entry - %call2 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %call3 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %shl = shl i32 %call3, 1 - %call4 = call i32 @_ZL7__mul24ii(i32 %call2, i32 %shl) #2 - %3 = load i32, i32* %baseIndex.addr, align 4 - %add5 = add nsw i32 %call4, %3 - %call6 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %add7 = add i32 %add5, %call6 - store i32 %add7, i32* %address, align 4 - call void @llvm.nvvm.barrier0() - %4 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZL10uniformAddPjS_iiiE3uni to i32*), align 4 - %5 = load i32*, i32** %g_data.addr, align 8 - %6 = load i32, i32* %address, align 4 - %idxprom8 = zext i32 %6 to i64 - %arrayidx9 = getelementptr inbounds i32, i32* %5, i64 %idxprom8 - %7 = load i32, i32* %arrayidx9, align 4 - %add10 = add i32 %7, %4 - store i32 %add10, i32* %arrayidx9, align 4 - %call11 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %call12 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %add13 = add i32 %call11, %call12 - %8 = load i32, i32* %n.addr, align 4 - %cmp14 = icmp ult i32 %add13, %8 - %conv = zext i1 %cmp14 to i32 - %9 = load i32, i32* addrspacecast (i32 addrspace(3)* @_ZZL10uniformAddPjS_iiiE3uni to i32*), align 4 - %mul = mul i32 %conv, %9 - %10 = load i32*, i32** %g_data.addr, align 8 - %11 = load i32, i32* %address, align 4 - %call15 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %add16 = add i32 %11, %call15 - %idxprom17 = zext i32 %add16 to i64 - %arrayidx18 = getelementptr inbounds i32, i32* %10, i64 %idxprom17 - %12 = load i32, i32* %arrayidx18, align 4 - %add19 = add i32 %12, %mul - store i32 %add19, i32* %arrayidx18, align 4 - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define internal i32 @_ZL7__mul24ii(i32 %__a, i32 %__b) #1 { -entry: - %__a.addr = alloca i32, align 4 - %__b.addr = alloca i32, align 4 - store i32 %__a, i32* %__a.addr, align 4 - store i32 %__b, i32* %__b.addr, align 4 - %0 = load i32, i32* %__a.addr, align 4 - %1 = load i32, i32* %__b.addr, align 4 - %call = call i32 @__nv_mul24(i32 %0, i32 %1) #2 - ret i32 %call -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() #3 - -; Function Attrs: alwaysinline convergent nounwind -define internal i32 @_ZL12__uAtomicAddPjj(i32* %__p, i32 %__v) #1 { -entry: - %__p.addr = alloca i32*, align 8 - %__v.addr = alloca i32, align 4 - store i32* %__p, i32** %__p.addr, align 8 - store i32 %__v, i32* %__v.addr, align 4 - %0 = load i32*, i32** %__p.addr, align 8 - %1 = load i32, i32* %__v.addr, align 4 - %2 = atomicrmw add i32* %0, i32 %1 seq_cst - ret i32 %2 -} - -; Function Attrs: alwaysinline convergent nounwind -define internal i32 @_ZL11__uAtomicOrPjj(i32* %__p, i32 %__v) #1 { -entry: - %__p.addr = alloca i32*, align 8 - %__v.addr = alloca i32, align 4 - store i32* %__p, i32** %__p.addr, align 8 - store i32 %__v, i32* %__v.addr, align 4 - %0 = load i32*, i32** %__p.addr, align 8 - %1 = load i32, i32* %__v.addr, align 4 - %2 = atomicrmw or i32* %0, i32 %1 seq_cst - ret i32 %2 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_ZL7prescanILb1ELb0EEvPjPKjS0_iii(i32* %g_odata, i32* %g_idata, i32* %g_blockSums, i32 %n, i32 %blockIndex, i32 %baseIndex) #0 { -entry: - %g_odata.addr = alloca i32*, align 8 - %g_idata.addr = alloca i32*, align 8 - %g_blockSums.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %blockIndex.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %ai = alloca i32, align 4 - %bi = alloca i32, align 4 - %mem_ai = alloca i32, align 4 - %mem_bi = alloca i32, align 4 - %bankOffsetA = alloca i32, align 4 - %bankOffsetB = alloca i32, align 4 - store i32* %g_odata, i32** %g_odata.addr, align 8 - store i32* %g_idata, i32** %g_idata.addr, align 8 - store i32* %g_blockSums, i32** %g_blockSums.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - %0 = load i32*, i32** %g_idata.addr, align 8 - %1 = load i32, i32* %n.addr, align 4 - %2 = load i32, i32* %baseIndex.addr, align 4 - %cmp = icmp eq i32 %2, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %call1 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %shl = shl i32 %call1, 1 - %call2 = call i32 @_ZL7__mul24ii(i32 %call, i32 %shl) #2 - br label %cond.end - -cond.false: ; preds = %entry - %3 = load i32, i32* %baseIndex.addr, align 4 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %call2, %cond.true ], [ %3, %cond.false ] - call void @_ZL22loadSharedChunkFromMemILb0EEvPjPKjiiRiS3_S3_S3_S3_S3_(i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb1ELb0EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32* %0, i32 %1, i32 %cond, i32* dereferenceable(4) %ai, i32* dereferenceable(4) %bi, i32* dereferenceable(4) %mem_ai, i32* dereferenceable(4) %mem_bi, i32* dereferenceable(4) %bankOffsetA, i32* dereferenceable(4) %bankOffsetB) #2 - %4 = load i32, i32* %blockIndex.addr, align 4 - %5 = load i32*, i32** %g_blockSums.addr, align 8 - call void @_ZL12prescanBlockILb1EEvPjiS0_(i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb1ELb0EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32 %4, i32* %5) #2 - %6 = load i32*, i32** %g_odata.addr, align 8 - %7 = load i32, i32* %n.addr, align 4 - %8 = load i32, i32* %ai, align 4 - %9 = load i32, i32* %bi, align 4 - %10 = load i32, i32* %mem_ai, align 4 - %11 = load i32, i32* %mem_bi, align 4 - %12 = load i32, i32* %bankOffsetA, align 4 - %13 = load i32, i32* %bankOffsetB, align 4 - call void @_ZL21storeSharedChunkToMemILb0EEvPjPKjiiiiiii(i32* %6, i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb1ELb0EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13) #2 - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define internal void @_ZL22loadSharedChunkFromMemILb0EEvPjPKjiiRiS3_S3_S3_S3_S3_(i32* %s_data, i32* %g_idata, i32 %n, i32 %baseIndex, i32* dereferenceable(4) %ai, i32* dereferenceable(4) %bi, i32* dereferenceable(4) %mem_ai, i32* dereferenceable(4) %mem_bi, i32* dereferenceable(4) %bankOffsetA, i32* dereferenceable(4) %bankOffsetB) #0 { -entry: - %s_data.addr = alloca i32*, align 8 - %g_idata.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %ai.addr = alloca i32*, align 8 - %bi.addr = alloca i32*, align 8 - %mem_ai.addr = alloca i32*, align 8 - %mem_bi.addr = alloca i32*, align 8 - %bankOffsetA.addr = alloca i32*, align 8 - %bankOffsetB.addr = alloca i32*, align 8 - %thid = alloca i32, align 4 - store i32* %s_data, i32** %s_data.addr, align 8 - store i32* %g_idata, i32** %g_idata.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - store i32* %ai, i32** %ai.addr, align 8 - store i32* %bi, i32** %bi.addr, align 8 - store i32* %mem_ai, i32** %mem_ai.addr, align 8 - store i32* %mem_bi, i32** %mem_bi.addr, align 8 - store i32* %bankOffsetA, i32** %bankOffsetA.addr, align 8 - store i32* %bankOffsetB, i32** %bankOffsetB.addr, align 8 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %thid, align 4 - %0 = load i32, i32* %baseIndex.addr, align 4 - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %add = add i32 %0, %call1 - %1 = load i32*, i32** %mem_ai.addr, align 8 - store i32 %add, i32* %1, align 4 - %2 = load i32*, i32** %mem_ai.addr, align 8 - %3 = load i32, i32* %2, align 4 - %call2 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %add3 = add i32 %3, %call2 - %4 = load i32*, i32** %mem_bi.addr, align 8 - store i32 %add3, i32* %4, align 4 - %5 = load i32, i32* %thid, align 4 - %6 = load i32*, i32** %ai.addr, align 8 - store i32 %5, i32* %6, align 4 - %7 = load i32, i32* %thid, align 4 - %call4 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %add5 = add i32 %7, %call4 - %8 = load i32*, i32** %bi.addr, align 8 - store i32 %add5, i32* %8, align 4 - %9 = load i32*, i32** %ai.addr, align 8 - %10 = load i32, i32* %9, align 4 - %shr = ashr i32 %10, 4 - %11 = load i32*, i32** %bankOffsetA.addr, align 8 - store i32 %shr, i32* %11, align 4 - %12 = load i32*, i32** %bi.addr, align 8 - %13 = load i32, i32* %12, align 4 - %shr6 = ashr i32 %13, 4 - %14 = load i32*, i32** %bankOffsetB.addr, align 8 - store i32 %shr6, i32* %14, align 4 - %15 = load i32*, i32** %g_idata.addr, align 8 - %16 = load i32*, i32** %mem_ai.addr, align 8 - %17 = load i32, i32* %16, align 4 - %idxprom = sext i32 %17 to i64 - %arrayidx = getelementptr inbounds i32, i32* %15, i64 %idxprom - %18 = load i32, i32* %arrayidx, align 4 - %19 = load i32*, i32** %s_data.addr, align 8 - %20 = load i32*, i32** %ai.addr, align 8 - %21 = load i32, i32* %20, align 4 - %22 = load i32*, i32** %bankOffsetA.addr, align 8 - %23 = load i32, i32* %22, align 4 - %add7 = add nsw i32 %21, %23 - %idxprom8 = sext i32 %add7 to i64 - %arrayidx9 = getelementptr inbounds i32, i32* %19, i64 %idxprom8 - store i32 %18, i32* %arrayidx9, align 4 - %24 = load i32*, i32** %g_idata.addr, align 8 - %25 = load i32*, i32** %mem_bi.addr, align 8 - %26 = load i32, i32* %25, align 4 - %idxprom10 = sext i32 %26 to i64 - %arrayidx11 = getelementptr inbounds i32, i32* %24, i64 %idxprom10 - %27 = load i32, i32* %arrayidx11, align 4 - %28 = load i32*, i32** %s_data.addr, align 8 - %29 = load i32*, i32** %bi.addr, align 8 - %30 = load i32, i32* %29, align 4 - %31 = load i32*, i32** %bankOffsetB.addr, align 8 - %32 = load i32, i32* %31, align 4 - %add12 = add nsw i32 %30, %32 - %idxprom13 = sext i32 %add12 to i64 - %arrayidx14 = getelementptr inbounds i32, i32* %28, i64 %idxprom13 - store i32 %27, i32* %arrayidx14, align 4 - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define internal void @_ZL12prescanBlockILb1EEvPjiS0_(i32* %data, i32 %blockIndex, i32* %blockSums) #0 { -entry: - %data.addr = alloca i32*, align 8 - %blockIndex.addr = alloca i32, align 4 - %blockSums.addr = alloca i32*, align 8 - %stride = alloca i32, align 4 - store i32* %data, i32** %data.addr, align 8 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - store i32* %blockSums, i32** %blockSums.addr, align 8 - %0 = load i32*, i32** %data.addr, align 8 - %call = call i32 @_ZL8buildSumPj(i32* %0) #2 - store i32 %call, i32* %stride, align 4 - %1 = load i32*, i32** %data.addr, align 8 - %2 = load i32*, i32** %blockSums.addr, align 8 - %3 = load i32, i32* %blockIndex.addr, align 4 - %cmp = icmp eq i32 %3, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - br label %cond.end - -cond.false: ; preds = %entry - %4 = load i32, i32* %blockIndex.addr, align 4 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %call1, %cond.true ], [ %4, %cond.false ] - call void @_ZL16clearLastElementILb1EEvPjS0_i(i32* %1, i32* %2, i32 %cond) #2 - %5 = load i32*, i32** %data.addr, align 8 - %6 = load i32, i32* %stride, align 4 - call void @_ZL16scanRootToLeavesPjj(i32* %5, i32 %6) #2 - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define internal void @_ZL21storeSharedChunkToMemILb0EEvPjPKjiiiiiii(i32* %g_odata, i32* %s_data, i32 %n, i32 %ai, i32 %bi, i32 %mem_ai, i32 %mem_bi, i32 %bankOffsetA, i32 %bankOffsetB) #0 { -entry: - %g_odata.addr = alloca i32*, align 8 - %s_data.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %ai.addr = alloca i32, align 4 - %bi.addr = alloca i32, align 4 - %mem_ai.addr = alloca i32, align 4 - %mem_bi.addr = alloca i32, align 4 - %bankOffsetA.addr = alloca i32, align 4 - %bankOffsetB.addr = alloca i32, align 4 - store i32* %g_odata, i32** %g_odata.addr, align 8 - store i32* %s_data, i32** %s_data.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %ai, i32* %ai.addr, align 4 - store i32 %bi, i32* %bi.addr, align 4 - store i32 %mem_ai, i32* %mem_ai.addr, align 4 - store i32 %mem_bi, i32* %mem_bi.addr, align 4 - store i32 %bankOffsetA, i32* %bankOffsetA.addr, align 4 - store i32 %bankOffsetB, i32* %bankOffsetB.addr, align 4 - call void @llvm.nvvm.barrier0() - %0 = load i32*, i32** %s_data.addr, align 8 - %1 = load i32, i32* %ai.addr, align 4 - %2 = load i32, i32* %bankOffsetA.addr, align 4 - %add = add nsw i32 %1, %2 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds i32, i32* %0, i64 %idxprom - %3 = load i32, i32* %arrayidx, align 4 - %4 = load i32*, i32** %g_odata.addr, align 8 - %5 = load i32, i32* %mem_ai.addr, align 4 - %idxprom1 = sext i32 %5 to i64 - %arrayidx2 = getelementptr inbounds i32, i32* %4, i64 %idxprom1 - store i32 %3, i32* %arrayidx2, align 4 - %6 = load i32*, i32** %s_data.addr, align 8 - %7 = load i32, i32* %bi.addr, align 4 - %8 = load i32, i32* %bankOffsetB.addr, align 4 - %add3 = add nsw i32 %7, %8 - %idxprom4 = sext i32 %add3 to i64 - %arrayidx5 = getelementptr inbounds i32, i32* %6, i64 %idxprom4 - %9 = load i32, i32* %arrayidx5, align 4 - %10 = load i32*, i32** %g_odata.addr, align 8 - %11 = load i32, i32* %mem_bi.addr, align 4 - %idxprom6 = sext i32 %11 to i64 - %arrayidx7 = getelementptr inbounds i32, i32* %10, i64 %idxprom6 - store i32 %9, i32* %arrayidx7, align 4 - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define internal i32 @_ZL8buildSumPj(i32* %s_data) #0 { -entry: - %s_data.addr = alloca i32*, align 8 - %thid = alloca i32, align 4 - %stride = alloca i32, align 4 - %d = alloca i32, align 4 - %i = alloca i32, align 4 - %ai = alloca i32, align 4 - %bi = alloca i32, align 4 - store i32* %s_data, i32** %s_data.addr, align 8 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %thid, align 4 - store i32 1, i32* %stride, align 4 - %call1 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - store i32 %call1, i32* %d, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %d, align 4 - %cmp = icmp sgt i32 %0, 0 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - call void @llvm.nvvm.barrier0() - %1 = load i32, i32* %thid, align 4 - %2 = load i32, i32* %d, align 4 - %cmp2 = icmp ult i32 %1, %2 - br i1 %cmp2, label %if.then, label %if.end - -if.then: ; preds = %for.body - %3 = load i32, i32* %stride, align 4 - %call3 = call i32 @_ZL7__mul24ii(i32 2, i32 %3) #2 - %4 = load i32, i32* %thid, align 4 - %call4 = call i32 @_ZL7__mul24ii(i32 %call3, i32 %4) #2 - store i32 %call4, i32* %i, align 4 - %5 = load i32, i32* %i, align 4 - %6 = load i32, i32* %stride, align 4 - %add = add i32 %5, %6 - %sub = sub i32 %add, 1 - store i32 %sub, i32* %ai, align 4 - %7 = load i32, i32* %ai, align 4 - %8 = load i32, i32* %stride, align 4 - %add5 = add i32 %7, %8 - store i32 %add5, i32* %bi, align 4 - %9 = load i32, i32* %ai, align 4 - %shr = ashr i32 %9, 4 - %10 = load i32, i32* %ai, align 4 - %add6 = add nsw i32 %10, %shr - store i32 %add6, i32* %ai, align 4 - %11 = load i32, i32* %bi, align 4 - %shr7 = ashr i32 %11, 4 - %12 = load i32, i32* %bi, align 4 - %add8 = add nsw i32 %12, %shr7 - store i32 %add8, i32* %bi, align 4 - %13 = load i32*, i32** %s_data.addr, align 8 - %14 = load i32, i32* %ai, align 4 - %idxprom = sext i32 %14 to i64 - %arrayidx = getelementptr inbounds i32, i32* %13, i64 %idxprom - %15 = load i32, i32* %arrayidx, align 4 - %16 = load i32*, i32** %s_data.addr, align 8 - %17 = load i32, i32* %bi, align 4 - %idxprom9 = sext i32 %17 to i64 - %arrayidx10 = getelementptr inbounds i32, i32* %16, i64 %idxprom9 - %18 = load i32, i32* %arrayidx10, align 4 - %add11 = add i32 %18, %15 - store i32 %add11, i32* %arrayidx10, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body - %19 = load i32, i32* %stride, align 4 - %mul = mul i32 %19, 2 - store i32 %mul, i32* %stride, align 4 - br label %for.inc - -for.inc: ; preds = %if.end - %20 = load i32, i32* %d, align 4 - %shr12 = ashr i32 %20, 1 - store i32 %shr12, i32* %d, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %21 = load i32, i32* %stride, align 4 - ret i32 %21 -} - -; Function Attrs: convergent noinline nounwind optnone -define internal void @_ZL16clearLastElementILb1EEvPjS0_i(i32* %s_data, i32* %g_blockSums, i32 %blockIndex) #0 { -entry: - %s_data.addr = alloca i32*, align 8 - %g_blockSums.addr = alloca i32*, align 8 - %blockIndex.addr = alloca i32, align 4 - %index = alloca i32, align 4 - store i32* %s_data, i32** %s_data.addr, align 8 - store i32* %g_blockSums, i32** %g_blockSums.addr, align 8 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %cmp = icmp eq i32 %call, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %call1 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %shl = shl i32 %call1, 1 - %sub = sub i32 %shl, 1 - store i32 %sub, i32* %index, align 4 - %0 = load i32, i32* %index, align 4 - %shr = ashr i32 %0, 4 - %1 = load i32, i32* %index, align 4 - %add = add nsw i32 %1, %shr - store i32 %add, i32* %index, align 4 - %2 = load i32*, i32** %s_data.addr, align 8 - %3 = load i32, i32* %index, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom - %4 = load i32, i32* %arrayidx, align 4 - %5 = load i32*, i32** %g_blockSums.addr, align 8 - %6 = load i32, i32* %blockIndex.addr, align 4 - %idxprom2 = sext i32 %6 to i64 - %arrayidx3 = getelementptr inbounds i32, i32* %5, i64 %idxprom2 - store i32 %4, i32* %arrayidx3, align 4 - %7 = load i32*, i32** %s_data.addr, align 8 - %8 = load i32, i32* %index, align 4 - %idxprom4 = sext i32 %8 to i64 - %arrayidx5 = getelementptr inbounds i32, i32* %7, i64 %idxprom4 - store i32 0, i32* %arrayidx5, align 4 - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define internal void @_ZL16scanRootToLeavesPjj(i32* %s_data, i32 %stride) #0 { -entry: - %s_data.addr = alloca i32*, align 8 - %stride.addr = alloca i32, align 4 - %thid = alloca i32, align 4 - %d = alloca i32, align 4 - %i = alloca i32, align 4 - %ai = alloca i32, align 4 - %bi = alloca i32, align 4 - %t = alloca i32, align 4 - store i32* %s_data, i32** %s_data.addr, align 8 - store i32 %stride, i32* %stride.addr, align 4 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %thid, align 4 - store i32 1, i32* %d, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %d, align 4 - %call1 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %cmp = icmp ule i32 %0, %call1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load i32, i32* %stride.addr, align 4 - %shr = lshr i32 %1, 1 - store i32 %shr, i32* %stride.addr, align 4 - call void @llvm.nvvm.barrier0() - %2 = load i32, i32* %thid, align 4 - %3 = load i32, i32* %d, align 4 - %cmp2 = icmp ult i32 %2, %3 - br i1 %cmp2, label %if.then, label %if.end - -if.then: ; preds = %for.body - %4 = load i32, i32* %stride.addr, align 4 - %call3 = call i32 @_ZL7__mul24ii(i32 2, i32 %4) #2 - %5 = load i32, i32* %thid, align 4 - %call4 = call i32 @_ZL7__mul24ii(i32 %call3, i32 %5) #2 - store i32 %call4, i32* %i, align 4 - %6 = load i32, i32* %i, align 4 - %7 = load i32, i32* %stride.addr, align 4 - %add = add i32 %6, %7 - %sub = sub i32 %add, 1 - store i32 %sub, i32* %ai, align 4 - %8 = load i32, i32* %ai, align 4 - %9 = load i32, i32* %stride.addr, align 4 - %add5 = add i32 %8, %9 - store i32 %add5, i32* %bi, align 4 - %10 = load i32, i32* %ai, align 4 - %shr6 = ashr i32 %10, 4 - %11 = load i32, i32* %ai, align 4 - %add7 = add nsw i32 %11, %shr6 - store i32 %add7, i32* %ai, align 4 - %12 = load i32, i32* %bi, align 4 - %shr8 = ashr i32 %12, 4 - %13 = load i32, i32* %bi, align 4 - %add9 = add nsw i32 %13, %shr8 - store i32 %add9, i32* %bi, align 4 - %14 = load i32*, i32** %s_data.addr, align 8 - %15 = load i32, i32* %ai, align 4 - %idxprom = sext i32 %15 to i64 - %arrayidx = getelementptr inbounds i32, i32* %14, i64 %idxprom - %16 = load i32, i32* %arrayidx, align 4 - store i32 %16, i32* %t, align 4 - %17 = load i32*, i32** %s_data.addr, align 8 - %18 = load i32, i32* %bi, align 4 - %idxprom10 = sext i32 %18 to i64 - %arrayidx11 = getelementptr inbounds i32, i32* %17, i64 %idxprom10 - %19 = load i32, i32* %arrayidx11, align 4 - %20 = load i32*, i32** %s_data.addr, align 8 - %21 = load i32, i32* %ai, align 4 - %idxprom12 = sext i32 %21 to i64 - %arrayidx13 = getelementptr inbounds i32, i32* %20, i64 %idxprom12 - store i32 %19, i32* %arrayidx13, align 4 - %22 = load i32, i32* %t, align 4 - %23 = load i32*, i32** %s_data.addr, align 8 - %24 = load i32, i32* %bi, align 4 - %idxprom14 = sext i32 %24 to i64 - %arrayidx15 = getelementptr inbounds i32, i32* %23, i64 %idxprom14 - %25 = load i32, i32* %arrayidx15, align 4 - %add16 = add i32 %25, %22 - store i32 %add16, i32* %arrayidx15, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %26 = load i32, i32* %d, align 4 - %mul = mul nsw i32 %26, 2 - store i32 %mul, i32* %d, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_ZL7prescanILb1ELb1EEvPjPKjS0_iii(i32* %g_odata, i32* %g_idata, i32* %g_blockSums, i32 %n, i32 %blockIndex, i32 %baseIndex) #0 { -entry: - %g_odata.addr = alloca i32*, align 8 - %g_idata.addr = alloca i32*, align 8 - %g_blockSums.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %blockIndex.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %ai = alloca i32, align 4 - %bi = alloca i32, align 4 - %mem_ai = alloca i32, align 4 - %mem_bi = alloca i32, align 4 - %bankOffsetA = alloca i32, align 4 - %bankOffsetB = alloca i32, align 4 - store i32* %g_odata, i32** %g_odata.addr, align 8 - store i32* %g_idata, i32** %g_idata.addr, align 8 - store i32* %g_blockSums, i32** %g_blockSums.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - %0 = load i32*, i32** %g_idata.addr, align 8 - %1 = load i32, i32* %n.addr, align 4 - %2 = load i32, i32* %baseIndex.addr, align 4 - %cmp = icmp eq i32 %2, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %call1 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %shl = shl i32 %call1, 1 - %call2 = call i32 @_ZL7__mul24ii(i32 %call, i32 %shl) #2 - br label %cond.end - -cond.false: ; preds = %entry - %3 = load i32, i32* %baseIndex.addr, align 4 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %call2, %cond.true ], [ %3, %cond.false ] - call void @_ZL22loadSharedChunkFromMemILb1EEvPjPKjiiRiS3_S3_S3_S3_S3_(i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb1ELb1EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32* %0, i32 %1, i32 %cond, i32* dereferenceable(4) %ai, i32* dereferenceable(4) %bi, i32* dereferenceable(4) %mem_ai, i32* dereferenceable(4) %mem_bi, i32* dereferenceable(4) %bankOffsetA, i32* dereferenceable(4) %bankOffsetB) #2 - %4 = load i32, i32* %blockIndex.addr, align 4 - %5 = load i32*, i32** %g_blockSums.addr, align 8 - call void @_ZL12prescanBlockILb1EEvPjiS0_(i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb1ELb1EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32 %4, i32* %5) #2 - %6 = load i32*, i32** %g_odata.addr, align 8 - %7 = load i32, i32* %n.addr, align 4 - %8 = load i32, i32* %ai, align 4 - %9 = load i32, i32* %bi, align 4 - %10 = load i32, i32* %mem_ai, align 4 - %11 = load i32, i32* %mem_bi, align 4 - %12 = load i32, i32* %bankOffsetA, align 4 - %13 = load i32, i32* %bankOffsetB, align 4 - call void @_ZL21storeSharedChunkToMemILb1EEvPjPKjiiiiiii(i32* %6, i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb1ELb1EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13) #2 - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define internal void @_ZL22loadSharedChunkFromMemILb1EEvPjPKjiiRiS3_S3_S3_S3_S3_(i32* %s_data, i32* %g_idata, i32 %n, i32 %baseIndex, i32* dereferenceable(4) %ai, i32* dereferenceable(4) %bi, i32* dereferenceable(4) %mem_ai, i32* dereferenceable(4) %mem_bi, i32* dereferenceable(4) %bankOffsetA, i32* dereferenceable(4) %bankOffsetB) #0 { -entry: - %s_data.addr = alloca i32*, align 8 - %g_idata.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %ai.addr = alloca i32*, align 8 - %bi.addr = alloca i32*, align 8 - %mem_ai.addr = alloca i32*, align 8 - %mem_bi.addr = alloca i32*, align 8 - %bankOffsetA.addr = alloca i32*, align 8 - %bankOffsetB.addr = alloca i32*, align 8 - %thid = alloca i32, align 4 - store i32* %s_data, i32** %s_data.addr, align 8 - store i32* %g_idata, i32** %g_idata.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - store i32* %ai, i32** %ai.addr, align 8 - store i32* %bi, i32** %bi.addr, align 8 - store i32* %mem_ai, i32** %mem_ai.addr, align 8 - store i32* %mem_bi, i32** %mem_bi.addr, align 8 - store i32* %bankOffsetA, i32** %bankOffsetA.addr, align 8 - store i32* %bankOffsetB, i32** %bankOffsetB.addr, align 8 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %thid, align 4 - %0 = load i32, i32* %baseIndex.addr, align 4 - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %add = add i32 %0, %call1 - %1 = load i32*, i32** %mem_ai.addr, align 8 - store i32 %add, i32* %1, align 4 - %2 = load i32*, i32** %mem_ai.addr, align 8 - %3 = load i32, i32* %2, align 4 - %call2 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %add3 = add i32 %3, %call2 - %4 = load i32*, i32** %mem_bi.addr, align 8 - store i32 %add3, i32* %4, align 4 - %5 = load i32, i32* %thid, align 4 - %6 = load i32*, i32** %ai.addr, align 8 - store i32 %5, i32* %6, align 4 - %7 = load i32, i32* %thid, align 4 - %call4 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %add5 = add i32 %7, %call4 - %8 = load i32*, i32** %bi.addr, align 8 - store i32 %add5, i32* %8, align 4 - %9 = load i32*, i32** %ai.addr, align 8 - %10 = load i32, i32* %9, align 4 - %shr = ashr i32 %10, 4 - %11 = load i32*, i32** %bankOffsetA.addr, align 8 - store i32 %shr, i32* %11, align 4 - %12 = load i32*, i32** %bi.addr, align 8 - %13 = load i32, i32* %12, align 4 - %shr6 = ashr i32 %13, 4 - %14 = load i32*, i32** %bankOffsetB.addr, align 8 - store i32 %shr6, i32* %14, align 4 - %15 = load i32*, i32** %g_idata.addr, align 8 - %16 = load i32*, i32** %mem_ai.addr, align 8 - %17 = load i32, i32* %16, align 4 - %idxprom = sext i32 %17 to i64 - %arrayidx = getelementptr inbounds i32, i32* %15, i64 %idxprom - %18 = load i32, i32* %arrayidx, align 4 - %19 = load i32*, i32** %s_data.addr, align 8 - %20 = load i32*, i32** %ai.addr, align 8 - %21 = load i32, i32* %20, align 4 - %22 = load i32*, i32** %bankOffsetA.addr, align 8 - %23 = load i32, i32* %22, align 4 - %add7 = add nsw i32 %21, %23 - %idxprom8 = sext i32 %add7 to i64 - %arrayidx9 = getelementptr inbounds i32, i32* %19, i64 %idxprom8 - store i32 %18, i32* %arrayidx9, align 4 - %24 = load i32*, i32** %bi.addr, align 8 - %25 = load i32, i32* %24, align 4 - %26 = load i32, i32* %n.addr, align 4 - %cmp = icmp slt i32 %25, %26 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %27 = load i32*, i32** %g_idata.addr, align 8 - %28 = load i32*, i32** %mem_bi.addr, align 8 - %29 = load i32, i32* %28, align 4 - %idxprom10 = sext i32 %29 to i64 - %arrayidx11 = getelementptr inbounds i32, i32* %27, i64 %idxprom10 - %30 = load i32, i32* %arrayidx11, align 4 - br label %cond.end - -cond.false: ; preds = %entry - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %30, %cond.true ], [ 0, %cond.false ] - %31 = load i32*, i32** %s_data.addr, align 8 - %32 = load i32*, i32** %bi.addr, align 8 - %33 = load i32, i32* %32, align 4 - %34 = load i32*, i32** %bankOffsetB.addr, align 8 - %35 = load i32, i32* %34, align 4 - %add12 = add nsw i32 %33, %35 - %idxprom13 = sext i32 %add12 to i64 - %arrayidx14 = getelementptr inbounds i32, i32* %31, i64 %idxprom13 - store i32 %cond, i32* %arrayidx14, align 4 - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define internal void @_ZL21storeSharedChunkToMemILb1EEvPjPKjiiiiiii(i32* %g_odata, i32* %s_data, i32 %n, i32 %ai, i32 %bi, i32 %mem_ai, i32 %mem_bi, i32 %bankOffsetA, i32 %bankOffsetB) #0 { -entry: - %g_odata.addr = alloca i32*, align 8 - %s_data.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %ai.addr = alloca i32, align 4 - %bi.addr = alloca i32, align 4 - %mem_ai.addr = alloca i32, align 4 - %mem_bi.addr = alloca i32, align 4 - %bankOffsetA.addr = alloca i32, align 4 - %bankOffsetB.addr = alloca i32, align 4 - store i32* %g_odata, i32** %g_odata.addr, align 8 - store i32* %s_data, i32** %s_data.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %ai, i32* %ai.addr, align 4 - store i32 %bi, i32* %bi.addr, align 4 - store i32 %mem_ai, i32* %mem_ai.addr, align 4 - store i32 %mem_bi, i32* %mem_bi.addr, align 4 - store i32 %bankOffsetA, i32* %bankOffsetA.addr, align 4 - store i32 %bankOffsetB, i32* %bankOffsetB.addr, align 4 - call void @llvm.nvvm.barrier0() - %0 = load i32*, i32** %s_data.addr, align 8 - %1 = load i32, i32* %ai.addr, align 4 - %2 = load i32, i32* %bankOffsetA.addr, align 4 - %add = add nsw i32 %1, %2 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds i32, i32* %0, i64 %idxprom - %3 = load i32, i32* %arrayidx, align 4 - %4 = load i32*, i32** %g_odata.addr, align 8 - %5 = load i32, i32* %mem_ai.addr, align 4 - %idxprom1 = sext i32 %5 to i64 - %arrayidx2 = getelementptr inbounds i32, i32* %4, i64 %idxprom1 - store i32 %3, i32* %arrayidx2, align 4 - %6 = load i32, i32* %bi.addr, align 4 - %7 = load i32, i32* %n.addr, align 4 - %cmp = icmp slt i32 %6, %7 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %8 = load i32*, i32** %s_data.addr, align 8 - %9 = load i32, i32* %bi.addr, align 4 - %10 = load i32, i32* %bankOffsetB.addr, align 4 - %add3 = add nsw i32 %9, %10 - %idxprom4 = sext i32 %add3 to i64 - %arrayidx5 = getelementptr inbounds i32, i32* %8, i64 %idxprom4 - %11 = load i32, i32* %arrayidx5, align 4 - %12 = load i32*, i32** %g_odata.addr, align 8 - %13 = load i32, i32* %mem_bi.addr, align 4 - %idxprom6 = sext i32 %13 to i64 - %arrayidx7 = getelementptr inbounds i32, i32* %12, i64 %idxprom6 - store i32 %11, i32* %arrayidx7, align 4 - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_ZL7prescanILb0ELb0EEvPjPKjS0_iii(i32* %g_odata, i32* %g_idata, i32* %g_blockSums, i32 %n, i32 %blockIndex, i32 %baseIndex) #0 { -entry: - %g_odata.addr = alloca i32*, align 8 - %g_idata.addr = alloca i32*, align 8 - %g_blockSums.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %blockIndex.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %ai = alloca i32, align 4 - %bi = alloca i32, align 4 - %mem_ai = alloca i32, align 4 - %mem_bi = alloca i32, align 4 - %bankOffsetA = alloca i32, align 4 - %bankOffsetB = alloca i32, align 4 - store i32* %g_odata, i32** %g_odata.addr, align 8 - store i32* %g_idata, i32** %g_idata.addr, align 8 - store i32* %g_blockSums, i32** %g_blockSums.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - %0 = load i32*, i32** %g_idata.addr, align 8 - %1 = load i32, i32* %n.addr, align 4 - %2 = load i32, i32* %baseIndex.addr, align 4 - %cmp = icmp eq i32 %2, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %call1 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %shl = shl i32 %call1, 1 - %call2 = call i32 @_ZL7__mul24ii(i32 %call, i32 %shl) #2 - br label %cond.end - -cond.false: ; preds = %entry - %3 = load i32, i32* %baseIndex.addr, align 4 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %call2, %cond.true ], [ %3, %cond.false ] - call void @_ZL22loadSharedChunkFromMemILb0EEvPjPKjiiRiS3_S3_S3_S3_S3_(i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb0ELb0EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32* %0, i32 %1, i32 %cond, i32* dereferenceable(4) %ai, i32* dereferenceable(4) %bi, i32* dereferenceable(4) %mem_ai, i32* dereferenceable(4) %mem_bi, i32* dereferenceable(4) %bankOffsetA, i32* dereferenceable(4) %bankOffsetB) #2 - %4 = load i32, i32* %blockIndex.addr, align 4 - %5 = load i32*, i32** %g_blockSums.addr, align 8 - call void @_ZL12prescanBlockILb0EEvPjiS0_(i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb0ELb0EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32 %4, i32* %5) #2 - %6 = load i32*, i32** %g_odata.addr, align 8 - %7 = load i32, i32* %n.addr, align 4 - %8 = load i32, i32* %ai, align 4 - %9 = load i32, i32* %bi, align 4 - %10 = load i32, i32* %mem_ai, align 4 - %11 = load i32, i32* %mem_bi, align 4 - %12 = load i32, i32* %bankOffsetA, align 4 - %13 = load i32, i32* %bankOffsetB, align 4 - call void @_ZL21storeSharedChunkToMemILb0EEvPjPKjiiiiiii(i32* %6, i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb0ELb0EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13) #2 - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define internal void @_ZL12prescanBlockILb0EEvPjiS0_(i32* %data, i32 %blockIndex, i32* %blockSums) #0 { -entry: - %data.addr = alloca i32*, align 8 - %blockIndex.addr = alloca i32, align 4 - %blockSums.addr = alloca i32*, align 8 - %stride = alloca i32, align 4 - store i32* %data, i32** %data.addr, align 8 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - store i32* %blockSums, i32** %blockSums.addr, align 8 - %0 = load i32*, i32** %data.addr, align 8 - %call = call i32 @_ZL8buildSumPj(i32* %0) #2 - store i32 %call, i32* %stride, align 4 - %1 = load i32*, i32** %data.addr, align 8 - %2 = load i32*, i32** %blockSums.addr, align 8 - %3 = load i32, i32* %blockIndex.addr, align 4 - %cmp = icmp eq i32 %3, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - br label %cond.end - -cond.false: ; preds = %entry - %4 = load i32, i32* %blockIndex.addr, align 4 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %call1, %cond.true ], [ %4, %cond.false ] - call void @_ZL16clearLastElementILb0EEvPjS0_i(i32* %1, i32* %2, i32 %cond) #2 - %5 = load i32*, i32** %data.addr, align 8 - %6 = load i32, i32* %stride, align 4 - call void @_ZL16scanRootToLeavesPjj(i32* %5, i32 %6) #2 - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define internal void @_ZL16clearLastElementILb0EEvPjS0_i(i32* %s_data, i32* %g_blockSums, i32 %blockIndex) #0 { -entry: - %s_data.addr = alloca i32*, align 8 - %g_blockSums.addr = alloca i32*, align 8 - %blockIndex.addr = alloca i32, align 4 - %index = alloca i32, align 4 - store i32* %s_data, i32** %s_data.addr, align 8 - store i32* %g_blockSums, i32** %g_blockSums.addr, align 8 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %cmp = icmp eq i32 %call, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %call1 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %shl = shl i32 %call1, 1 - %sub = sub i32 %shl, 1 - store i32 %sub, i32* %index, align 4 - %0 = load i32, i32* %index, align 4 - %shr = ashr i32 %0, 4 - %1 = load i32, i32* %index, align 4 - %add = add nsw i32 %1, %shr - store i32 %add, i32* %index, align 4 - %2 = load i32*, i32** %s_data.addr, align 8 - %3 = load i32, i32* %index, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom - store i32 0, i32* %arrayidx, align 4 - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_ZL7prescanILb0ELb1EEvPjPKjS0_iii(i32* %g_odata, i32* %g_idata, i32* %g_blockSums, i32 %n, i32 %blockIndex, i32 %baseIndex) #0 { -entry: - %g_odata.addr = alloca i32*, align 8 - %g_idata.addr = alloca i32*, align 8 - %g_blockSums.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %blockIndex.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %ai = alloca i32, align 4 - %bi = alloca i32, align 4 - %mem_ai = alloca i32, align 4 - %mem_bi = alloca i32, align 4 - %bankOffsetA = alloca i32, align 4 - %bankOffsetB = alloca i32, align 4 - store i32* %g_odata, i32** %g_odata.addr, align 8 - store i32* %g_idata, i32** %g_idata.addr, align 8 - store i32* %g_blockSums, i32** %g_blockSums.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - %0 = load i32*, i32** %g_idata.addr, align 8 - %1 = load i32, i32* %n.addr, align 4 - %2 = load i32, i32* %baseIndex.addr, align 4 - %cmp = icmp eq i32 %2, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %call1 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #2 - %shl = shl i32 %call1, 1 - %call2 = call i32 @_ZL7__mul24ii(i32 %call, i32 %shl) #2 - br label %cond.end - -cond.false: ; preds = %entry - %3 = load i32, i32* %baseIndex.addr, align 4 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %call2, %cond.true ], [ %3, %cond.false ] - call void @_ZL22loadSharedChunkFromMemILb1EEvPjPKjiiRiS3_S3_S3_S3_S3_(i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb0ELb1EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32* %0, i32 %1, i32 %cond, i32* dereferenceable(4) %ai, i32* dereferenceable(4) %bi, i32* dereferenceable(4) %mem_ai, i32* dereferenceable(4) %mem_bi, i32* dereferenceable(4) %bankOffsetA, i32* dereferenceable(4) %bankOffsetB) #2 - %4 = load i32, i32* %blockIndex.addr, align 4 - %5 = load i32*, i32** %g_blockSums.addr, align 8 - call void @_ZL12prescanBlockILb0EEvPjiS0_(i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb0ELb1EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32 %4, i32* %5) #2 - %6 = load i32*, i32** %g_odata.addr, align 8 - %7 = load i32, i32* %n.addr, align 4 - %8 = load i32, i32* %ai, align 4 - %9 = load i32, i32* %bi, align 4 - %10 = load i32, i32* %mem_ai, align 4 - %11 = load i32, i32* %mem_bi, align 4 - %12 = load i32, i32* %bankOffsetA, align 4 - %13 = load i32, i32* %bankOffsetB, align 4 - call void @_ZL21storeSharedChunkToMemILb1EEvPjPKjiiiiiii(i32* %6, i32* getelementptr inbounds ([3072 x i32], [3072 x i32]* addrspacecast ([3072 x i32] addrspace(3)* @_ZZL7prescanILb0ELb1EEvPjPKjS0_iiiE6s_data to [3072 x i32]*), i64 0, i64 0), i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13) #2 - ret void -} - -; Function Attrs: alwaysinline convergent inlinehint nounwind -define internal i32 @__nv_mul24(i32 %x, i32 %y) #4 { - %1 = call i32 @llvm.nvvm.mul24.i(i32 %x, i32 %y) - ret i32 %1 -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.mul24.i(i32, i32) #3 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind } -attributes #3 = { nounwind readnone } -attributes #4 = { alwaysinline convergent inlinehint nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !11, !13, !13, !13, !13, !14, !14, !13} -!llvm.ident = !{!15} -!nvvmir.version = !{!16} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (i8*, i64, i32*)* @_Z12histo_kernelPhlPj, !"kernel", i32 1} -!4 = !{void (i32*, i32*, i32*, i32*, i32*, i32*, i32*, i32*)* @_ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_, !"kernel", i32 1} -!5 = !{void (i32*, i32*, i32*, i32*, i32)* @_ZL5pack2PjS_S_S_j, !"kernel", i32 1} -!6 = !{void (i32*, i32*, i32, i32, i32)* @_ZL10uniformAddPjS_iii, !"kernel", i32 1} -!7 = !{void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb1ELb0EEvPjPKjS0_iii, !"kernel", i32 1} -!8 = !{void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb1ELb1EEvPjPKjS0_iii, !"kernel", i32 1} -!9 = !{void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb0ELb0EEvPjPKjS0_iii, !"kernel", i32 1} -!10 = !{void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb0ELb1EEvPjPKjS0_iii, !"kernel", i32 1} -!11 = !{null, !"align", i32 8} -!12 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!13 = !{null, !"align", i32 16} -!14 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!15 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!16 = !{i32 1, i32 4} diff --git a/examples/huffman/main_test_cu-host-x86_64-unknown-linux-gnu.ll b/examples/huffman/main_test_cu-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 27e66c9..0000000 --- a/examples/huffman/main_test_cu-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,12230 +0,0 @@ -; ModuleID = 'main_test_cu-host-x86_64-unknown-linux-gnu.bc' -source_filename = "main_test_cu.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%"class.std::ios_base::Init" = type { i8 } -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque -%class.INode = type <{ i32 (...)**, i32, [4 x i8] }> -%"class.std::priority_queue" = type <{ %"class.std::vector", %struct.NodeCmp, [7 x i8] }> -%"class.std::vector" = type { %"struct.std::_Vector_base" } -%"struct.std::_Vector_base" = type { %"struct.std::_Vector_base >::_Vector_impl" } -%"struct.std::_Vector_base >::_Vector_impl" = type { %class.INode**, %class.INode**, %class.INode** } -%struct.NodeCmp = type { i8 } -%class.LeafNode = type { %class.INode.base, i8, [3 x i8] } -%class.INode.base = type <{ i32 (...)**, i32 }> -%class.InternalNode = type { %class.INode.base, %class.INode*, %class.INode* } -%"class.__gnu_cxx::__normal_iterator" = type { %class.INode** } -%"class.std::allocator" = type { i8 } -%"class.std::vector.0" = type { %"struct.std::_Bvector_base" } -%"struct.std::_Bvector_base" = type { %"struct.std::_Bvector_base >::_Bvector_impl" } -%"struct.std::_Bvector_base >::_Bvector_impl" = type { %"struct.std::_Bit_iterator", %"struct.std::_Bit_iterator", i64* } -%"struct.std::_Bit_iterator" = type { %"struct.std::_Bit_iterator_base.base", [4 x i8] } -%"struct.std::_Bit_iterator_base.base" = type <{ i64*, i32 }> -%"class.std::map" = type { %"class.std::_Rb_tree" } -%"class.std::_Rb_tree" = type { %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl" } -%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl" = type { %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_header" } -%"struct.std::_Rb_tree_key_compare" = type { %"struct.std::less" } -%"struct.std::less" = type { i8 } -%"struct.std::_Rb_tree_header" = type { %"struct.std::_Rb_tree_node_base", i64 } -%"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } -%"struct.std::_Rb_tree_iterator" = type { %"struct.std::_Rb_tree_node_base"* } -%"struct.std::pair" = type { i8, %"class.std::vector.0" } -%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base.base", [4 x i8] } -%"class.std::allocator.13" = type { i8 } -%"class.std::allocator.1" = type { i8 } -%"struct.std::_Bit_reference" = type { i64*, i64 } -%"struct.std::_Bit_iterator_base" = type <{ i64*, i32, [4 x i8] }> -%struct.timeval = type { i64, i64 } -%struct.timezone = type { i32, i32 } -%"struct.std::_Rb_tree_const_iterator" = type { %"struct.std::_Rb_tree_node_base"* } -%"struct.std::random_access_iterator_tag" = type { i8 } -%"struct.std::_Rb_tree_node" = type { %"struct.std::_Rb_tree_node_base", %"struct.std::pair" } -%"class.std::allocator.4" = type { i8 } -%"class.__gnu_cxx::new_allocator.5" = type { i8 } -%"class.__gnu_cxx::new_allocator.2" = type { i8 } -%"struct.std::iterator" = type { i8 } -%"class.std::allocator.7" = type { i8 } -%"class.__gnu_cxx::new_allocator.8" = type { i8 } -%"class.__gnu_cxx::new_allocator" = type { i8 } -%"class.__gnu_cxx::__normal_iterator.10" = type { %class.INode** } -%"struct.__gnu_cxx::__ops::_Iter_comp_iter" = type { %struct.NodeCmp } -%"struct.__gnu_cxx::__ops::_Iter_comp_val" = type { %struct.NodeCmp } -%"class.std::__pair_base" = type { i8 } -%"struct.std::_Select1st" = type { i8 } -%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node" = type { %"class.std::_Rb_tree"* } -%"struct.std::pair.11" = type { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } -%"class.std::__pair_base.12" = type { i8 } -%"class.__gnu_cxx::new_allocator.14" = type { i8 } - -$_Z9gpuAssert9cudaErrorPKcib = comdat any - -$_ZN4dim3C2Ejjj = comdat any - -$_ZNSt6vectorIP5INodeSaIS1_EEC2Ev = comdat any - -$_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpEC2ERKS5_RKS4_ = comdat any - -$_ZNSt6vectorIP5INodeSaIS1_EED2Ev = comdat any - -$__clang_call_terminate = comdat any - -$_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE4pushERKS1_ = comdat any - -$_ZN8LeafNodeC2Eic = comdat any - -$_ZNKSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE4sizeEv = comdat any - -$_ZNKSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE3topEv = comdat any - -$_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE3popEv = comdat any - -$_ZN12InternalNodeC2EP5INodeS1_ = comdat any - -$_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpED2Ev = comdat any - -$_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEEixERS6_ = comdat any - -$_ZNSt6vectorIbSaIbEEaSERKS1_ = comdat any - -$_ZNSt6vectorIbSaIbEEC2ERKS1_ = comdat any - -$_ZNSt6vectorIbSaIbEE9push_backEb = comdat any - -$_ZNSt6vectorIbSaIbEED2Ev = comdat any - -$_Z10initParamsPcjRjS0_S0_j = comdat any - -$_Z8loadDataPcPjS0_S0_jjRd = comdat any - -$_Z15compare_vectorsIjEiPT_S1_j = comdat any - -$_ZN5INodeC2Ei = comdat any - -$_ZN8LeafNodeD2Ev = comdat any - -$_ZN8LeafNodeD0Ev = comdat any - -$_ZN5INodeD2Ev = comdat any - -$_ZN5INodeD0Ev = comdat any - -$_ZN12InternalNodeD2Ev = comdat any - -$_ZN12InternalNodeD0Ev = comdat any - -$_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEEC2Ev = comdat any - -$_ZNSt6vectorIbSaIbEEC2Ev = comdat any - -$_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE5beginEv = comdat any - -$_ZNSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2ERKSt17_Rb_tree_iteratorIS5_E = comdat any - -$_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEneERKS6_ = comdat any - -$_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE3endEv = comdat any - -$_ZSt8distanceISt19_Bit_const_iteratorENSt15iterator_traitsIT_E15difference_typeES2_S2_ = comdat any - -$_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEptEv = comdat any - -$_ZNKSt6vectorIbSaIbEE5beginEv = comdat any - -$_ZNKSt6vectorIbSaIbEE3endEv = comdat any - -$_ZNKSt6vectorIbSaIbEEixEm = comdat any - -$_ZSt3powfi = comdat any - -$_ZNSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEppEv = comdat any - -$_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEED2Ev = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EEC2Ev = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13_Rb_tree_implIS9_Lb0EEC2Ev = comdat any - -$_ZNSaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEC2Ev = comdat any - -$_ZNSt20_Rb_tree_key_compareISt4lessIhEEC2Ev = comdat any - -$_ZNSt15_Rb_tree_headerC2Ev = comdat any - -$_ZNSaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEED2Ev = comdat any - -$_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEC2Ev = comdat any - -$_ZNSt15_Rb_tree_header8_M_resetEv = comdat any - -$_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEED2Ev = comdat any - -$_ZNSt13_Bvector_baseISaIbEEC2Ev = comdat any - -$_ZNSt13_Bvector_baseISaIbEE13_Bvector_implC2Ev = comdat any - -$_ZNSaImEC2Ev = comdat any - -$_ZNSt13_Bit_iteratorC2Ev = comdat any - -$_ZN9__gnu_cxx13new_allocatorImEC2Ev = comdat any - -$_ZNSt18_Bit_iterator_baseC2EPmj = comdat any - -$_ZN9__gnu_cxx13new_allocatorImED2Ev = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE5beginEv = comdat any - -$_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2EPSt18_Rb_tree_node_base = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE3endEv = comdat any - -$_ZSt10__distanceISt19_Bit_const_iteratorENSt15iterator_traitsIT_E15difference_typeES2_S2_St26random_access_iterator_tag = comdat any - -$_ZSt19__iterator_categoryISt19_Bit_const_iteratorENSt15iterator_traitsIT_E17iterator_categoryERKS2_ = comdat any - -$_ZStmiRKSt18_Bit_iterator_baseS1_ = comdat any - -$_ZNKSt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEE9_M_valptrEv = comdat any - -$_ZSt11__addressofIKSt4pairIKhSt6vectorIbSaIbEEEEPT_RS7_ = comdat any - -$_ZNSt19_Bit_const_iteratorC2ERKSt13_Bit_iterator = comdat any - -$_ZNSt19_Bit_const_iteratorC2EPmj = comdat any - -$_ZNKSt19_Bit_const_iteratordeEv = comdat any - -$_ZNSt14_Bit_referenceC2EPmm = comdat any - -$_ZNKSt14_Bit_referencecvbEv = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EED2Ev = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_M_eraseEPSt13_Rb_tree_nodeIS5_E = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_M_beginEv = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13_Rb_tree_implIS9_Lb0EED2Ev = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_rightEPSt18_Rb_tree_node_base = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE7_S_leftEPSt18_Rb_tree_node_base = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS5_E = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE15_M_destroy_nodeEPSt13_Rb_tree_nodeIS5_E = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_put_nodeEPSt13_Rb_tree_nodeIS5_E = comdat any - -$_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13get_allocatorEv = comdat any - -$_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEE7destroyEPS6_ = comdat any - -$_ZNSt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEE9_M_valptrEv = comdat any - -$_ZNSaISt4pairIKhSt6vectorIbSaIbEEEED2Ev = comdat any - -$_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE21_M_get_Node_allocatorEv = comdat any - -$_ZNSaISt4pairIKhSt6vectorIbSaIbEEEEC2ISt13_Rb_tree_nodeIS4_EEERKSaIT_E = comdat any - -$_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEEC2Ev = comdat any - -$_ZNSt4pairIKhSt6vectorIbSaIbEEED2Ev = comdat any - -$_ZSt11__addressofISt4pairIKhSt6vectorIbSaIbEEEEPT_RS6_ = comdat any - -$_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEED2Ev = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEE10deallocateERS9_PS8_m = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE21_M_get_Node_allocatorEv = comdat any - -$_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEE10deallocateEPS8_m = comdat any - -$_ZSt4ceilf = comdat any - -$_Z12isPowerOfTwoi = comdat any - -$_Z9floorPow2i = comdat any - -$_ZSt5frexpfPi = comdat any - -$_ZNSt12_Vector_baseIP5INodeSaIS1_EEC2Ev = comdat any - -$_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implC2Ev = comdat any - -$_ZNSaIP5INodeEC2Ev = comdat any - -$_ZN9__gnu_cxx13new_allocatorIP5INodeEC2Ev = comdat any - -$_ZSt8_DestroyIPP5INodeS1_EvT_S3_RSaIT0_E = comdat any - -$_ZNSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv = comdat any - -$_ZNSt12_Vector_baseIP5INodeSaIS1_EED2Ev = comdat any - -$_ZSt8_DestroyIPP5INodeEvT_S3_ = comdat any - -$_ZNSt12_Destroy_auxILb1EE9__destroyIPP5INodeEEvT_S5_ = comdat any - -$_ZNSt12_Vector_baseIP5INodeSaIS1_EE13_M_deallocateEPS1_m = comdat any - -$_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implD2Ev = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE10deallocateERS3_PS2_m = comdat any - -$_ZN9__gnu_cxx13new_allocatorIP5INodeE10deallocateEPS2_m = comdat any - -$_ZNSaIP5INodeED2Ev = comdat any - -$_ZN9__gnu_cxx13new_allocatorIP5INodeED2Ev = comdat any - -$_ZNSt6vectorIP5INodeSaIS1_EEC2ERKS3_ = comdat any - -$_ZSt9make_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEE7NodeCmpEvT_SA_T0_ = comdat any - -$_ZNSt6vectorIP5INodeSaIS1_EE5beginEv = comdat any - -$_ZNSt6vectorIP5INodeSaIS1_EE3endEv = comdat any - -$_ZNKSt6vectorIP5INodeSaIS1_EE4sizeEv = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE17_S_select_on_copyERKS3_ = comdat any - -$_ZNKSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv = comdat any - -$_ZNSt12_Vector_baseIP5INodeSaIS1_EEC2EmRKS2_ = comdat any - -$_ZSt22__uninitialized_copy_aIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_S3_ET0_T_SC_SB_RSaIT1_E = comdat any - -$_ZNKSt6vectorIP5INodeSaIS1_EE5beginEv = comdat any - -$_ZNKSt6vectorIP5INodeSaIS1_EE3endEv = comdat any - -$_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implC2ERKS2_ = comdat any - -$_ZNSt12_Vector_baseIP5INodeSaIS1_EE17_M_create_storageEm = comdat any - -$_ZNSaIP5INodeEC2ERKS1_ = comdat any - -$_ZN9__gnu_cxx13new_allocatorIP5INodeEC2ERKS3_ = comdat any - -$_ZNSt12_Vector_baseIP5INodeSaIS1_EE11_M_allocateEm = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE8allocateERS3_m = comdat any - -$_ZN9__gnu_cxx13new_allocatorIP5INodeE8allocateEmPKv = comdat any - -$_ZNK9__gnu_cxx13new_allocatorIP5INodeE8max_sizeEv = comdat any - -$_ZSt18uninitialized_copyIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_ET0_T_SC_SB_ = comdat any - -$_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS5_SaIS5_EEEEPS5_EET0_T_SE_SD_ = comdat any - -$_ZSt4copyIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_ET0_T_SC_SB_ = comdat any - -$_ZSt14__copy_move_a2ILb0EN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_ET1_T0_SC_SB_ = comdat any - -$_ZSt12__miter_baseIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEET_SA_ = comdat any - -$_ZSt13__copy_move_aILb0EPKP5INodePS1_ET1_T0_S6_S5_ = comdat any - -$_ZSt12__niter_baseIPKP5INodeSt6vectorIS1_SaIS1_EEET_N9__gnu_cxx17__normal_iteratorIS7_T0_EE = comdat any - -$_ZSt12__niter_baseIPP5INodeET_S3_ = comdat any - -$_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mIP5INodeEEPT_PKS5_S8_S6_ = comdat any - -$_ZNK9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS2_SaIS2_EEE4baseEv = comdat any - -$_ZN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS2_SaIS2_EEEC2ERKS4_ = comdat any - -$_ZN9__gnu_cxx5__ops15_Iter_comp_iterI7NodeCmpEC2ES2_ = comdat any - -$_ZSt11__make_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEENS0_5__ops15_Iter_comp_iterI7NodeCmpEEEvT_SD_RT0_ = comdat any - -$_ZN9__gnu_cxxmiIPP5INodeSt6vectorIS2_SaIS2_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKSA_SD_ = comdat any - -$_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl = comdat any - -$_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv = comdat any - -$_ZSt13__adjust_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEElS3_NS0_5__ops15_Iter_comp_iterI7NodeCmpEEEvT_T0_SE_T1_T2_ = comdat any - -$_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEE4baseEv = comdat any - -$_ZN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEC2ERKS3_ = comdat any - -$_ZN9__gnu_cxx5__ops15_Iter_comp_iterI7NodeCmpEclINS_17__normal_iteratorIPP5INodeSt6vectorIS7_SaIS7_EEEESC_EEbT_T0_ = comdat any - -$_ZN9__gnu_cxx5__ops14_Iter_comp_valI7NodeCmpEC2ERKNS0_15_Iter_comp_iterIS2_EE = comdat any - -$_ZSt11__push_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEElS3_NS0_5__ops14_Iter_comp_valI7NodeCmpEEEvT_T0_SE_T1_RT2_ = comdat any - -$_ZNK7NodeCmpclEPK5INodeS2_ = comdat any - -$_ZN9__gnu_cxx5__ops14_Iter_comp_valI7NodeCmpEclINS_17__normal_iteratorIPP5INodeSt6vectorIS7_SaIS7_EEEES7_EEbT_RT0_ = comdat any - -$_ZNSt6vectorIP5INodeSaIS1_EE9push_backERKS1_ = comdat any - -$_ZSt9push_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEE7NodeCmpEvT_SA_T0_ = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE9constructIS2_EEvRS3_PS2_RKT_ = comdat any - -$_ZNSt6vectorIP5INodeSaIS1_EE17_M_realloc_insertEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_ = comdat any - -$_ZN9__gnu_cxx13new_allocatorIP5INodeE9constructEPS2_RKS2_ = comdat any - -$_ZNKSt6vectorIP5INodeSaIS1_EE12_M_check_lenEmPKc = comdat any - -$_ZSt34__uninitialized_move_if_noexcept_aIPP5INodeS2_SaIS1_EET0_T_S5_S4_RT1_ = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE7destroyERS3_PS2_ = comdat any - -$_ZNKSt6vectorIP5INodeSaIS1_EE8max_sizeEv = comdat any - -$_ZSt3maxImERKT_S2_S2_ = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE8max_sizeERKS3_ = comdat any - -$_ZSt22__uninitialized_copy_aIPP5INodeS2_S1_ET0_T_S4_S3_RSaIT1_E = comdat any - -$_ZSt18uninitialized_copyIPP5INodeS2_ET0_T_S4_S3_ = comdat any - -$_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIPP5INodeS4_EET0_T_S6_S5_ = comdat any - -$_ZSt4copyIPP5INodeS2_ET0_T_S4_S3_ = comdat any - -$_ZSt14__copy_move_a2ILb0EPP5INodeS2_ET1_T0_S4_S3_ = comdat any - -$_ZSt12__miter_baseIPP5INodeET_S3_ = comdat any - -$_ZSt13__copy_move_aILb0EPP5INodeS2_ET1_T0_S4_S3_ = comdat any - -$_ZN9__gnu_cxx13new_allocatorIP5INodeE7destroyEPS2_ = comdat any - -$_ZN9__gnu_cxx5__ops14_Iter_comp_valI7NodeCmpEC2ES2_ = comdat any - -$_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEmiEl = comdat any - -$_ZNKSt6vectorIP5INodeSaIS1_EE5frontEv = comdat any - -$_ZNK9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS2_SaIS2_EEEdeEv = comdat any - -$_ZSt8pop_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEE7NodeCmpEvT_SA_T0_ = comdat any - -$_ZNSt6vectorIP5INodeSaIS1_EE8pop_backEv = comdat any - -$_ZN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEmmEv = comdat any - -$_ZSt10__pop_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEENS0_5__ops15_Iter_comp_iterI7NodeCmpEEEvT_SD_SD_RT0_ = comdat any - -$_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE11lower_boundERS6_ = comdat any - -$_ZNKSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEeqERKS6_ = comdat any - -$_ZNKSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE8key_compEv = comdat any - -$_ZNKSt4lessIhEclERKhS2_ = comdat any - -$_ZNKSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEdeEv = comdat any - -$_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE6insertESt17_Rb_tree_iteratorIS7_ERKS7_ = comdat any - -$_ZNSt4pairIKhSt6vectorIbSaIbEEEC2ERS0_RKS3_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11lower_boundERS1_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE14_M_lower_boundEPSt13_Rb_tree_nodeIS5_EPSt18_Rb_tree_node_baseRS1_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_M_endEv = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt13_Rb_tree_nodeIS5_E = comdat any - -$_ZNKSt10_Select1stISt4pairIKhSt6vectorIbSaIbEEEEclERKS5_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_valueEPKSt13_Rb_tree_nodeIS5_E = comdat any - -$_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8key_compEv = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE17_M_insert_unique_ESt23_Rb_tree_const_iteratorIS5_ERKS5_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_Alloc_nodeC2ERSB_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE17_M_insert_unique_INSB_11_Alloc_nodeEEESt17_Rb_tree_iteratorIS5_ESt23_Rb_tree_const_iteratorIS5_ERKS5_RT_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS5_ERS1_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE10_M_insert_INSB_11_Alloc_nodeEEESt17_Rb_tree_iteratorIS5_EPSt18_Rb_tree_node_baseSH_RKS5_RT_ = comdat any - -$_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEE13_M_const_castEv = comdat any - -$_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE4sizeEv = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt18_Rb_tree_node_base = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE12_M_rightmostEv = comdat any - -$_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE24_M_get_insert_unique_posERS1_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_leftmostEv = comdat any - -$_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEmmEv = comdat any - -$_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEppEv = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_valueEPKSt18_Rb_tree_node_base = comdat any - -$_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_Alloc_nodeclIS5_EEPSt13_Rb_tree_nodeIS5_ERKT_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE14_M_create_nodeERKS5_ = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_get_nodeEv = comdat any - -$_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE17_M_construct_nodeEPSt13_Rb_tree_nodeIS5_ERKS5_ = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEE8allocateERS9_m = comdat any - -$_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEE8allocateEmPKv = comdat any - -$_ZNK9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEE8max_sizeEv = comdat any - -$_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEE9constructEPS6_RKS6_ = comdat any - -$_ZNSt4pairIKhSt6vectorIbSaIbEEEC2ERKS4_ = comdat any - -$_ZNKSt6vectorIbSaIbEE4sizeEv = comdat any - -$_ZNKSt6vectorIbSaIbEE8capacityEv = comdat any - -$_ZNSt13_Bvector_baseISaIbEE13_M_deallocateEv = comdat any - -$_ZNSt6vectorIbSaIbEE13_M_initializeEm = comdat any - -$_ZNSt6vectorIbSaIbEE15_M_copy_alignedESt19_Bit_const_iteratorS2_St13_Bit_iterator = comdat any - -$_ZNSt6vectorIbSaIbEE5beginEv = comdat any - -$_ZNKSt13_Bvector_baseISaIbEE13_Bvector_impl11_M_end_addrEv = comdat any - -$_ZSt11__addressofImEPT_RS0_ = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaImEE10deallocateERS1_Pmm = comdat any - -$_ZN9__gnu_cxx13new_allocatorImE10deallocateEPmm = comdat any - -$_ZNSt13_Bvector_baseISaIbEE11_M_allocateEm = comdat any - -$_ZNSt13_Bvector_baseISaIbEE8_S_nwordEm = comdat any - -$_ZNSt13_Bit_iteratorC2EPmj = comdat any - -$_ZNKSt13_Bit_iteratorplEl = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaImEE8allocateERS1_m = comdat any - -$_ZN9__gnu_cxx13new_allocatorImE8allocateEmPKv = comdat any - -$_ZNK9__gnu_cxx13new_allocatorImE8max_sizeEv = comdat any - -$_ZNSt13_Bit_iteratorpLEl = comdat any - -$_ZNSt18_Bit_iterator_base7_M_incrEl = comdat any - -$_ZSt4copyIPmS0_ET0_T_S2_S1_ = comdat any - -$_ZSt4copyISt19_Bit_const_iteratorSt13_Bit_iteratorET0_T_S3_S2_ = comdat any - -$_ZSt14__copy_move_a2ILb0EPmS0_ET1_T0_S2_S1_ = comdat any - -$_ZSt12__miter_baseIPmET_S1_ = comdat any - -$_ZSt13__copy_move_aILb0EPmS0_ET1_T0_S2_S1_ = comdat any - -$_ZSt12__niter_baseIPmET_S1_ = comdat any - -$_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mImEEPT_PKS3_S6_S4_ = comdat any - -$_ZSt14__copy_move_a2ILb0ESt19_Bit_const_iteratorSt13_Bit_iteratorET1_T0_S3_S2_ = comdat any - -$_ZSt12__miter_baseISt19_Bit_const_iteratorET_S1_ = comdat any - -$_ZSt13__copy_move_aILb0ESt19_Bit_const_iteratorSt13_Bit_iteratorET1_T0_S3_S2_ = comdat any - -$_ZSt12__niter_baseISt19_Bit_const_iteratorET_S1_ = comdat any - -$_ZSt12__niter_baseISt13_Bit_iteratorET_S1_ = comdat any - -$_ZNSt11__copy_moveILb0ELb0ESt26random_access_iterator_tagE8__copy_mISt19_Bit_const_iteratorSt13_Bit_iteratorEET0_T_S6_S5_ = comdat any - -$_ZNKSt13_Bit_iteratordeEv = comdat any - -$_ZNSt14_Bit_referenceaSEb = comdat any - -$_ZNSt19_Bit_const_iteratorppEv = comdat any - -$_ZNSt13_Bit_iteratorppEv = comdat any - -$_ZNSt18_Bit_iterator_base10_M_bump_upEv = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaImEE17_S_select_on_copyERKS1_ = comdat any - -$_ZNKSt13_Bvector_baseISaIbEE20_M_get_Bit_allocatorEv = comdat any - -$_ZNSaIbEC2ImEERKSaIT_E = comdat any - -$_ZNSt13_Bvector_baseISaIbEEC2ERKS0_ = comdat any - -$_ZNSaIbED2Ev = comdat any - -$_ZNSt13_Bvector_baseISaIbEED2Ev = comdat any - -$_ZN9__gnu_cxx13new_allocatorIbEC2Ev = comdat any - -$_ZNSaImEC2IbEERKSaIT_E = comdat any - -$_ZNSt13_Bvector_baseISaIbEE13_Bvector_implC2ERKSaImE = comdat any - -$_ZNSaImED2Ev = comdat any - -$_ZNSaImEC2ERKS_ = comdat any - -$_ZN9__gnu_cxx13new_allocatorImEC2ERKS1_ = comdat any - -$_ZN9__gnu_cxx13new_allocatorIbED2Ev = comdat any - -$_ZNSt13_Bvector_baseISaIbEE13_Bvector_implD2Ev = comdat any - -$_ZNSt13_Bit_iteratorppEi = comdat any - -$_ZNSt6vectorIbSaIbEE13_M_insert_auxESt13_Bit_iteratorb = comdat any - -$_ZNSt6vectorIbSaIbEE3endEv = comdat any - -$_ZSt13copy_backwardISt13_Bit_iteratorS0_ET0_T_S2_S1_ = comdat any - -$_ZNKSt6vectorIbSaIbEE12_M_check_lenEmPKc = comdat any - -$_ZSt4copyISt13_Bit_iteratorS0_ET0_T_S2_S1_ = comdat any - -$_ZSt23__copy_move_backward_a2ILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_ = comdat any - -$_ZSt12__miter_baseISt13_Bit_iteratorET_S1_ = comdat any - -$_ZSt22__copy_move_backward_aILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_ = comdat any - -$_ZNSt20__copy_move_backwardILb0ELb0ESt26random_access_iterator_tagE13__copy_move_bISt13_Bit_iteratorS3_EET0_T_S5_S4_ = comdat any - -$_ZNSt13_Bit_iteratormmEv = comdat any - -$_ZNSt14_Bit_referenceaSERKS_ = comdat any - -$_ZNSt18_Bit_iterator_base12_M_bump_downEv = comdat any - -$_ZNKSt6vectorIbSaIbEE8max_sizeEv = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaImEE8max_sizeERKS1_ = comdat any - -$_ZSt14__copy_move_a2ILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_ = comdat any - -$_ZSt13__copy_move_aILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_ = comdat any - -$_ZNSt11__copy_moveILb0ELb0ESt26random_access_iterator_tagE8__copy_mISt13_Bit_iteratorS3_EET0_T_S5_S4_ = comdat any - -$_ZTS5INode = comdat any - -$_ZTI5INode = comdat any - -$_ZTS8LeafNode = comdat any - -$_ZTI8LeafNode = comdat any - -$_ZTS12InternalNode = comdat any - -$_ZTI12InternalNode = comdat any - -$_ZTV8LeafNode = comdat any - -$_ZTV5INode = comdat any - -$_ZTV12InternalNode = comdat any - -@.str = private unnamed_addr constant [19 x i8] c"CUDA initialized.\0A\00", align 1 -@_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1 -@__dso_handle = external hidden global i8 -@.str.1 = private unnamed_addr constant [3 x i8] c"rb\00", align 1 -@.str.2 = private unnamed_addr constant [23 x i8] c"Cannot read input file\00", align 1 -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str.3 = private unnamed_addr constant [10 x i8] c"./hist.cu\00", align 1 -@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8* -@_ZTS5INode = linkonce_odr dso_local constant [7 x i8] c"5INode\00", comdat, align 1 -@_ZTI5INode = linkonce_odr dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([7 x i8], [7 x i8]* @_ZTS5INode, i32 0, i32 0) }, comdat, align 8 -@_ZTVN10__cxxabiv120__si_class_type_infoE = external dso_local global i8* -@_ZTS8LeafNode = linkonce_odr dso_local constant [10 x i8] c"8LeafNode\00", comdat, align 1 -@_ZTI8LeafNode = linkonce_odr dso_local constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @_ZTS8LeafNode, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI5INode to i8*) }, comdat, align 8 -@_ZTS12InternalNode = linkonce_odr dso_local constant [15 x i8] c"12InternalNode\00", comdat, align 1 -@_ZTI12InternalNode = linkonce_odr dso_local constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([15 x i8], [15 x i8]* @_ZTS12InternalNode, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI5INode to i8*) }, comdat, align 8 -@.str.4 = private unnamed_addr constant [27 x i8] c"CUDA! Starting VLC Tests!\0A\00", align 1 -@.str.5 = private unnamed_addr constant [98 x i8] c"Parameters: num_elements: %d, num_blocks: %d, num_block_threads: %d\0A----------------------------\0A\00", align 1 -@.str.6 = private unnamed_addr constant [42 x i8] c"Cuda error in file '%s' in line %i : %s.\0A\00", align 1 -@.str.7 = private unnamed_addr constant [16 x i8] c"main_test_cu.cu\00", align 1 -@.str.8 = private unnamed_addr constant [34 x i8] c"CPU Encoding time (CPU): %f (ms)\0A\00", align 1 -@.str.9 = private unnamed_addr constant [23 x i8] c"CPU Encoded to %d [B]\0A\00", align 1 -@.str.10 = private unnamed_addr constant [40 x i8] c"Num_blocks to be passed to scan is %d.\0A\00", align 1 -@.str.11 = private unnamed_addr constant [46 x i8] c"Cuda error: %s in file '%s' in line %i : %s.\0A\00", align 1 -@.str.12 = private unnamed_addr constant [31 x i8] c"Pack2 Kernel execution failed\0A\00", align 1 -@.str.13 = private unnamed_addr constant [21 x i8] c"GPUassert: %s %s %d\0A\00", align 1 -@_ZTV8LeafNode = linkonce_odr dso_local unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI8LeafNode to i8*), i8* bitcast (void (%class.LeafNode*)* @_ZN8LeafNodeD2Ev to i8*), i8* bitcast (void (%class.LeafNode*)* @_ZN8LeafNodeD0Ev to i8*)] }, comdat, align 8 -@_ZTV5INode = linkonce_odr dso_local unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI5INode to i8*), i8* bitcast (void (%class.INode*)* @_ZN5INodeD2Ev to i8*), i8* bitcast (void (%class.INode*)* @_ZN5INodeD0Ev to i8*)] }, comdat, align 8 -@_ZTV12InternalNode = linkonce_odr dso_local unnamed_addr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI12InternalNode to i8*), i8* bitcast (void (%class.InternalNode*)* @_ZN12InternalNodeD2Ev to i8*), i8* bitcast (void (%class.InternalNode*)* @_ZN12InternalNodeD0Ev to i8*)] }, comdat, align 8 -@.str.14 = private unnamed_addr constant [15 x i8] c"No input file\0A\00", align 1 -@.str.15 = private unnamed_addr constant [28 x i8] c"\0A%s, %u bytes, entropy %f\0A\0A\00", align 1 -@_ZL18g_numEltsAllocated = internal global i32 0, align 4 -@.str.16 = private unnamed_addr constant [24 x i8] c"g_numEltsAllocated == 0\00", align 1 -@.str.17 = private unnamed_addr constant [10 x i8] c"./scan.cu\00", align 1 -@__PRETTY_FUNCTION__._ZL17preallocBlockSumsj = private unnamed_addr constant [37 x i8] c"void preallocBlockSums(unsigned int)\00", align 1 -@_ZL15g_scanBlockSums = internal global i32** null, align 8 -@_ZL20g_numLevelsAllocated = internal global i32 0, align 4 -@.str.18 = private unnamed_addr constant [18 x i8] c"preallocBlockSums\00", align 1 -@.str.19 = private unnamed_addr constant [37 x i8] c"prescanArrayRecursive before kernels\00", align 1 -@.str.20 = private unnamed_addr constant [21 x i8] c"prescanWithBlockSums\00", align 1 -@.str.21 = private unnamed_addr constant [24 x i8] c"prescanNP2WithBlockSums\00", align 1 -@.str.22 = private unnamed_addr constant [11 x i8] c"uniformAdd\00", align 1 -@.str.23 = private unnamed_addr constant [8 x i8] c"prescan\00", align 1 -@.str.24 = private unnamed_addr constant [11 x i8] c"prescanNP2\00", align 1 -@.str.25 = private unnamed_addr constant [17 x i8] c"deallocBlockSums\00", align 1 -@.str.26 = private unnamed_addr constant [26 x i8] c"vector::_M_realloc_insert\00", align 1 -@.str.27 = private unnamed_addr constant [28 x i8] c"vector::_M_insert_aux\00", align 1 -@.str.28 = private unnamed_addr constant [21 x i8] c"Comparing vectors: \0A\00", align 1 -@.str.29 = private unnamed_addr constant [36 x i8] c"Diff: data1[%d]=%d, data1[%d]=%d.\0A\00", align 1 -@.str.30 = private unnamed_addr constant [29 x i8] c"PASS! vectors are matching!\0A\00", align 1 -@.str.31 = private unnamed_addr constant [33 x i8] c"FAIL! vectors are NOT matching!\0A\00", align 1 -@0 = private unnamed_addr constant [22 x i8] c"_Z12histo_kernelPhlPj\00", align 1 -@1 = private unnamed_addr constant [50 x i8] c"_ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_\00", align 1 -@2 = private unnamed_addr constant [34 x i8] c"_ZL7prescanILb1ELb0EEvPjPKjS0_iii\00", align 1 -@3 = private unnamed_addr constant [34 x i8] c"_ZL7prescanILb1ELb1EEvPjPKjS0_iii\00", align 1 -@4 = private unnamed_addr constant [23 x i8] c"_ZL10uniformAddPjS_iii\00", align 1 -@5 = private unnamed_addr constant [34 x i8] c"_ZL7prescanILb0ELb0EEvPjPKjS0_iii\00", align 1 -@6 = private unnamed_addr constant [34 x i8] c"_ZL7prescanILb0ELb1EEvPjPKjS0_iii\00", align 1 -@7 = private unnamed_addr constant [19 x i8] c"_ZL5pack2PjS_S_S_j\00", align 1 -@8 = private constant [176489 x i8] c"P\EDU\BA\01\00\10\00X\B1\02\00\00\00\00\00\02\00\01\01@\00\00\00h\82\02\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\C0\81\02\00\00\00\00\00\80x\02\00\00\00\00\00=\05=\00@\008\00\03\00@\00%\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._ZL7prescanILb0ELb1EEvPjPKjS0_iii\00.nv.info._ZL7prescanILb0ELb1EEvPjPKjS0_iii\00.nv.shared._ZL7prescanILb0ELb1EEvPjPKjS0_iii\00.nv.global\00.nv.constant0._ZL7prescanILb0ELb1EEvPjPKjS0_iii\00.text._ZL7prescanILb0ELb0EEvPjPKjS0_iii\00.nv.info._ZL7prescanILb0ELb0EEvPjPKjS0_iii\00.nv.shared._ZL7prescanILb0ELb0EEvPjPKjS0_iii\00.nv.constant0._ZL7prescanILb0ELb0EEvPjPKjS0_iii\00.text._ZL7prescanILb1ELb1EEvPjPKjS0_iii\00.nv.info._ZL7prescanILb1ELb1EEvPjPKjS0_iii\00.nv.shared._ZL7prescanILb1ELb1EEvPjPKjS0_iii\00.nv.constant0._ZL7prescanILb1ELb1EEvPjPKjS0_iii\00.text._ZL7prescanILb1ELb0EEvPjPKjS0_iii\00.nv.info._ZL7prescanILb1ELb0EEvPjPKjS0_iii\00.nv.shared._ZL7prescanILb1ELb0EEvPjPKjS0_iii\00.nv.constant0._ZL7prescanILb1ELb0EEvPjPKjS0_iii\00.text._ZL10uniformAddPjS_iii\00.nv.info._ZL10uniformAddPjS_iii\00.nv.shared._ZL10uniformAddPjS_iii\00.nv.constant0._ZL10uniformAddPjS_iii\00.text._ZL5pack2PjS_S_S_j\00.nv.info._ZL5pack2PjS_S_S_j\00.nv.shared._ZL5pack2PjS_S_S_j\00.nv.constant0._ZL5pack2PjS_S_S_j\00.text._ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_\00.nv.info._ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_\00.nv.shared._ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_\00.nv.constant0._ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_\00.text._Z12histo_kernelPhlPj\00.nv.info._Z12histo_kernelPhlPj\00.nv.shared._Z12histo_kernelPhlPj\00.nv.constant0._Z12histo_kernelPhlPj\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_ZL7prescanILb0ELb1EEvPjPKjS0_iii\00.text._ZL7prescanILb0ELb1EEvPjPKjS0_iii\00.nv.info._ZL7prescanILb0ELb1EEvPjPKjS0_iii\00.nv.shared._ZL7prescanILb0ELb1EEvPjPKjS0_iii\00.nv.global\00threadIdx\00blockIdx\00blockDim\00gridDim\00$_ZL7prescanILb0ELb1EEvPjPKjS0_iii$_ZL12prescanBlockILb0EEvPjiS0_\00$_ZL7prescanILb0ELb1EEvPjPKjS0_iii$_ZL16clearLastElementILb0EEvPjS0_i\00$_ZL7prescanILb0ELb1EEvPjPKjS0_iii$_ZL16scanRootToLeavesPjj\00$_ZL7prescanILb0ELb1EEvPjPKjS0_iii$_ZL21storeSharedChunkToMemILb1EEvPjPKjiiiiiii\00$_ZL7prescanILb0ELb1EEvPjPKjS0_iii$_ZL22loadSharedChunkFromMemILb1EEvPjPKjiiRiS3_S3_S3_S3_S3_\00$_ZL7prescanILb0ELb1EEvPjPKjS0_iii$_ZL8buildSumPj\00$___ZZL7prescanILb0ELb1EEvPjPKjS0_iiiE6s_data__2725\00.nv.constant0._ZL7prescanILb0ELb1EEvPjPKjS0_iii\00_param\00_ZL7prescanILb0ELb0EEvPjPKjS0_iii\00.text._ZL7prescanILb0ELb0EEvPjPKjS0_iii\00.nv.info._ZL7prescanILb0ELb0EEvPjPKjS0_iii\00.nv.shared._ZL7prescanILb0ELb0EEvPjPKjS0_iii\00$_ZL7prescanILb0ELb0EEvPjPKjS0_iii$_ZL12prescanBlockILb0EEvPjiS0_\00$_ZL7prescanILb0ELb0EEvPjPKjS0_iii$_ZL16clearLastElementILb0EEvPjS0_i\00$_ZL7prescanILb0ELb0EEvPjPKjS0_iii$_ZL16scanRootToLeavesPjj\00$_ZL7prescanILb0ELb0EEvPjPKjS0_iii$_ZL21storeSharedChunkToMemILb0EEvPjPKjiiiiiii\00$_ZL7prescanILb0ELb0EEvPjPKjS0_iii$_ZL22loadSharedChunkFromMemILb0EEvPjPKjiiRiS3_S3_S3_S3_S3_\00$_ZL7prescanILb0ELb0EEvPjPKjS0_iii$_ZL8buildSumPj\00$___ZZL7prescanILb0ELb0EEvPjPKjS0_iiiE6s_data__2426\00.nv.constant0._ZL7prescanILb0ELb0EEvPjPKjS0_iii\00_ZL7prescanILb1ELb1EEvPjPKjS0_iii\00.text._ZL7prescanILb1ELb1EEvPjPKjS0_iii\00.nv.info._ZL7prescanILb1ELb1EEvPjPKjS0_iii\00.nv.shared._ZL7prescanILb1ELb1EEvPjPKjS0_iii\00$_ZL7prescanILb1ELb1EEvPjPKjS0_iii$_ZL12prescanBlockILb1EEvPjiS0_\00$_ZL7prescanILb1ELb1EEvPjPKjS0_iii$_ZL16clearLastElementILb1EEvPjS0_i\00$_ZL7prescanILb1ELb1EEvPjPKjS0_iii$_ZL16scanRootToLeavesPjj\00$_ZL7prescanILb1ELb1EEvPjPKjS0_iii$_ZL21storeSharedChunkToMemILb1EEvPjPKjiiiiiii\00$_ZL7prescanILb1ELb1EEvPjPKjS0_iii$_ZL22loadSharedChunkFromMemILb1EEvPjPKjiiRiS3_S3_S3_S3_S3_\00$_ZL7prescanILb1ELb1EEvPjPKjS0_iii$_ZL8buildSumPj\00$___ZZL7prescanILb1ELb1EEvPjPKjS0_iiiE6s_data__2059\00.nv.constant0._ZL7prescanILb1ELb1EEvPjPKjS0_iii\00_ZL7prescanILb1ELb0EEvPjPKjS0_iii\00.text._ZL7prescanILb1ELb0EEvPjPKjS0_iii\00.nv.info._ZL7prescanILb1ELb0EEvPjPKjS0_iii\00.nv.shared._ZL7prescanILb1ELb0EEvPjPKjS0_iii\00$_ZL7prescanILb1ELb0EEvPjPKjS0_iii$_ZL12prescanBlockILb1EEvPjiS0_\00$_ZL7prescanILb1ELb0EEvPjPKjS0_iii$_ZL16clearLastElementILb1EEvPjS0_i\00$_ZL7prescanILb1ELb0EEvPjPKjS0_iii$_ZL16scanRootToLeavesPjj\00$_ZL7prescanILb1ELb0EEvPjPKjS0_iii$_ZL21storeSharedChunkToMemILb0EEvPjPKjiiiiiii\00$_ZL7prescanILb1ELb0EEvPjPKjS0_iii$_ZL22loadSharedChunkFromMemILb0EEvPjPKjiiRiS3_S3_S3_S3_S3_\00$_ZL7prescanILb1ELb0EEvPjPKjS0_iii$_ZL8buildSumPj\00$___ZZL7prescanILb1ELb0EEvPjPKjS0_iiiE6s_data__1377\00.nv.constant0._ZL7prescanILb1ELb0EEvPjPKjS0_iii\00_ZL10uniformAddPjS_iii\00.text._ZL10uniformAddPjS_iii\00.nv.info._ZL10uniformAddPjS_iii\00.nv.shared._ZL10uniformAddPjS_iii\00$___ZZL10uniformAddPjS_iiiE3uni__1283\00.nv.constant0._ZL10uniformAddPjS_iii\00_ZL5pack2PjS_S_S_j\00.text._ZL5pack2PjS_S_S_j\00.nv.info._ZL5pack2PjS_S_S_j\00.nv.shared._ZL5pack2PjS_S_S_j\00$_ZL5pack2PjS_S_S_j$_ZL8atomicOrPjj\00.nv.constant0._ZL5pack2PjS_S_S_j\00_ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_\00.text._ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_\00.nv.info._ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_\00.nv.shared._ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_\00$_ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_$_ZL8atomicOrPjj\00$___ZZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_E2sm__437\00$___ZZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_E5kcmax__439\00.nv.constant0._ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_\00_Z12histo_kernelPhlPj\00.text._Z12histo_kernelPhlPj\00.nv.info._Z12histo_kernelPhlPj\00.nv.shared._Z12histo_kernelPhlPj\00$_Z12histo_kernelPhlPj$_ZL9atomicAddPjj\00$___ZZ12histo_kernelPhlPjE4temp__294\00.nv.constant0._Z12histo_kernelPhlPj\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00T\00\00\00\03\00\15\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A7\00\00\00\03\00\1D\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\D4\00\00\00\03\00\1E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\DF\00\00\00\01\00\1E\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\E9\00\00\00\01\00\1E\00\03\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\F2\00\00\00\01\00\1E\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\FB\00\00\00\01\00\1E\00\02\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\03\01\00\00\02\00\15\00\80\0F\00\00\00\00\00\00\A0\06\00\00\00\00\00\00E\01\00\00\02\00\15\00 \16\00\00\00\00\00\00\10\05\00\00\00\00\00\00\8B\01\00\00\02\00\15\000\1B\00\00\00\00\00\00\A0\14\00\00\00\00\00\00\C7\01\00\00\02\00\15\00\D0/\00\00\00\00\00\00p\0C\00\00\00\00\00\00\18\02\00\00\02\00\15\00@<\00\00\00\00\00\00\F0\18\00\00\00\00\00\00v\02\00\00\02\00\15\000U\00\00\00\00\00\00\90\11\00\00\00\00\00\00\DC\02\00\00\03\00\0D\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\005\03\00\00\03\00\16\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\88\03\00\00\03\00\1F\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\B5\03\00\00\02\00\16\00\80\0F\00\00\00\00\00\00\A0\06\00\00\00\00\00\00\F7\03\00\00\02\00\16\00 \16\00\00\00\00\00\00\10\05\00\00\00\00\00\00=\04\00\00\02\00\16\000\1B\00\00\00\00\00\00\A0\14\00\00\00\00\00\00y\04\00\00\02\00\16\00\D0/\00\00\00\00\00\00\88\0B\00\00\00\00\00\00\CA\04\00\00\02\00\16\00X;\00\00\00\00\00\00(\17\00\00\00\00\00\00(\05\00\00\02\00\16\00\80R\00\00\00\00\00\00\80\11\00\00\00\00\00\00\8E\05\00\00\03\00\0E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\E0\05\00\00\03\00\17\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\003\06\00\00\03\00 \00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00`\06\00\00\02\00\17\00\80\0F\00\00\00\00\00\00\A0\06\00\00\00\00\00\00\A2\06\00\00\02\00\17\00 \16\00\00\00\00\00\00\F8\07\00\00\00\00\00\00\E8\06\00\00\02\00\17\00\18\1E\00\00\00\00\00\00\A0\14\00\00\00\00\00\00$\07\00\00\02\00\17\00\B82\00\00\00\00\00\00x\0C\00\00\00\00\00\00u\07\00\00\02\00\17\000?\00\00\00\00\00\00\E8\18\00\00\00\00\00\00\D3\07\00\00\02\00\17\00\18X\00\00\00\00\00\00h\11\00\00\00\00\00\009\08\00\00\03\00\0F\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\8B\08\00\00\03\00\18\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\DE\08\00\00\03\00!\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\0B\09\00\00\02\00\18\00\80\0F\00\00\00\00\00\00\A0\06\00\00\00\00\00\00M\09\00\00\02\00\18\00 \16\00\00\00\00\00\00\F8\07\00\00\00\00\00\00\93\09\00\00\02\00\18\00\18\1E\00\00\00\00\00\00\A0\14\00\00\00\00\00\00\CF\09\00\00\02\00\18\00\B82\00\00\00\00\00\00\88\0B\00\00\00\00\00\00 \0A\00\00\02\00\18\00@>\00\00\00\00\00\000\17\00\00\00\00\00\00~\0A\00\00\02\00\18\00pU\00\00\00\00\00\00\90\11\00\00\00\00\00\00\E4\0A\00\00\03\00\10\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00+\0B\00\00\03\00\19\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00h\0B\00\00\03\00\22\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\B0\0B\00\00\03\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\E8\0B\00\00\03\00\1A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00;\0C\00\00\02\00\1A\00\98)\00\00\00\00\00\00h\04\00\00\00\00\00\00_\0C\00\00\03\00\12\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\B2\0C\00\00\03\00\1B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00%\0D\00\00\03\00#\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00b\0D\00\00\02\00\1B\00\B0Z\00\00\00\00\00\00\90\04\00\00\00\00\00\00&\0E\00\00\03\00\13\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00|\0E\00\00\03\00\1C\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\B7\0E\00\00\03\00$\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\D8\0E\00\00\02\00\1C\00`\0C\00\00\00\00\00\00\A0\04\00\00\00\00\00\00%\0F\00\00\03\00\14\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\15\00\00\00\00\00\00\00\00\00\C0f\00\00\00\00\00\00\13\03\00\00\12\10\16\00\00\00\00\00\00\00\00\00\00d\00\00\00\00\00\00\BE\05\00\00\12\10\17\00\00\00\00\00\00\00\00\00\80i\00\00\00\00\00\00i\08\00\00\12\10\18\00\00\00\00\00\00\00\00\00\00g\00\00\00\00\00\00\14\0B\00\00\12\10\19\00\00\00\00\00\00\00\00\00\00\0F\00\00\00\00\00\00\D5\0B\00\00\12\10\1A\00\00\00\00\00\00\00\00\00\00.\00\00\00\00\00\00\80\0C\00\00\12\10\1B\00\00\00\00\00\00\00\00\00@_\00\00\00\00\00\00f\0E\00\00\12\10\1C\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\04/\08\00?\00\00\00\0C\00\00\00\04#\08\006\00\00\00\00\00\00\00\04\12\08\006\00\00\00\00\00\00\00\04\11\08\006\00\00\00\00\00\00\00\04#\08\00?\00\00\00\00\00\00\00\04\12\08\00?\00\00\00@\00\00\00\04\11\08\00?\00\00\00@\00\00\00\04/\08\00>\00\00\00\18\00\00\00\04#\08\002\00\00\00\00\00\00\00\04\12\08\002\00\00\00\00\00\00\00\04\11\08\002\00\00\00\00\00\00\00\04#\08\00>\00\00\00\00\00\00\00\04\12\08\00>\00\00\00\C8\00\00\00\04\11\08\00>\00\00\00\C8\00\00\00\04/\08\00=\00\00\00\0F\00\00\00\04#\08\00.\00\00\00\00\00\00\00\04\12\08\00.\00\00\00\00\00\00\00\04\11\08\00.\00\00\00\00\00\00\00\04#\08\00=\00\00\00\00\00\00\00\04\12\08\00=\00\00\00h\00\00\00\04\11\08\00=\00\00\00h\00\00\00\04/\08\00<\00\00\00\0E\00\00\00\04#\08\00<\00\00\00\00\00\00\00\04\12\08\00<\00\00\00(\00\00\00\04\11\08\00<\00\00\00(\00\00\00\04/\08\00;\00\00\00\1C\00\00\00\04#\08\00(\00\00\00\00\00\00\00\04\12\08\00(\00\00\00\00\00\00\00\04\11\08\00(\00\00\00\00\00\00\00\04#\08\00'\00\00\00\00\00\00\00\04\12\08\00'\00\00\00\00\00\00\00\04\11\08\00'\00\00\00\00\00\00\00\04#\08\00&\00\00\00\00\00\00\00\04\12\08\00&\00\00\00\00\00\00\00\04\11\08\00&\00\00\00\00\00\00\00\04#\08\00%\00\00\00\00\00\00\00\04\12\08\00%\00\00\00\00\00\00\00\04\11\08\00%\00\00\00\00\00\00\00\04#\08\00$\00\00\00\00\00\00\00\04\12\08\00$\00\00\00\00\00\00\00\04\11\08\00$\00\00\00\00\00\00\00\04#\08\00#\00\00\00\00\00\00\00\04\12\08\00#\00\00\00\00\00\00\00\04\11\08\00#\00\00\00\00\00\00\00\04#\08\00;\00\00\00\00\00\00\00\04\12\08\00;\00\00\00\A0\00\00\00\04\11\08\00;\00\00\00\A0\00\00\00\04/\08\00:\00\00\00\1C\00\00\00\04#\08\00\1F\00\00\00\00\00\00\00\04\12\08\00\1F\00\00\00\00\00\00\00\04\11\08\00\1F\00\00\00\00\00\00\00\04#\08\00\1E\00\00\00\00\00\00\00\04\12\08\00\1E\00\00\00\00\00\00\00\04\11\08\00\1E\00\00\00\00\00\00\00\04#\08\00\1D\00\00\00\00\00\00\00\04\12\08\00\1D\00\00\00\00\00\00\00\04\11\08\00\1D\00\00\00\00\00\00\00\04#\08\00\1C\00\00\00\00\00\00\00\04\12\08\00\1C\00\00\00\00\00\00\00\04\11\08\00\1C\00\00\00\00\00\00\00\04#\08\00\1B\00\00\00\00\00\00\00\04\12\08\00\1B\00\00\00\00\00\00\00\04\11\08\00\1B\00\00\00\00\00\00\00\04#\08\00\1A\00\00\00\00\00\00\00\04\12\08\00\1A\00\00\00\00\00\00\00\04\11\08\00\1A\00\00\00\00\00\00\00\04#\08\00:\00\00\00\00\00\00\00\04\12\08\00:\00\00\00\A0\00\00\00\04\11\08\00:\00\00\00\A0\00\00\00\04/\08\009\00\00\00\1C\00\00\00\04#\08\00\16\00\00\00\00\00\00\00\04\12\08\00\16\00\00\00\00\00\00\00\04\11\08\00\16\00\00\00\00\00\00\00\04#\08\00\15\00\00\00\00\00\00\00\04\12\08\00\15\00\00\00\00\00\00\00\04\11\08\00\15\00\00\00\00\00\00\00\04#\08\00\14\00\00\00\00\00\00\00\04\12\08\00\14\00\00\00\00\00\00\00\04\11\08\00\14\00\00\00\00\00\00\00\04#\08\00\13\00\00\00\00\00\00\00\04\12\08\00\13\00\00\00\00\00\00\00\04\11\08\00\13\00\00\00\00\00\00\00\04#\08\00\12\00\00\00\00\00\00\00\04\12\08\00\12\00\00\00\00\00\00\00\04\11\08\00\12\00\00\00\00\00\00\00\04#\08\00\11\00\00\00\00\00\00\00\04\12\08\00\11\00\00\00\00\00\00\00\04\11\08\00\11\00\00\00\00\00\00\00\04#\08\009\00\00\00\00\00\00\00\04\12\08\009\00\00\00\A0\00\00\00\04\11\08\009\00\00\00\A0\00\00\00\04/\08\008\00\00\00\1C\00\00\00\04#\08\00\0D\00\00\00\00\00\00\00\04\12\08\00\0D\00\00\00\00\00\00\00\04\11\08\00\0D\00\00\00\00\00\00\00\04#\08\00\0C\00\00\00\00\00\00\00\04\12\08\00\0C\00\00\00\00\00\00\00\04\11\08\00\0C\00\00\00\00\00\00\00\04#\08\00\0B\00\00\00\00\00\00\00\04\12\08\00\0B\00\00\00\00\00\00\00\04\11\08\00\0B\00\00\00\00\00\00\00\04#\08\00\0A\00\00\00\00\00\00\00\04\12\08\00\0A\00\00\00\00\00\00\00\04\11\08\00\0A\00\00\00\00\00\00\00\04#\08\00\09\00\00\00\00\00\00\00\04\12\08\00\09\00\00\00\00\00\00\00\04\11\08\00\09\00\00\00\00\00\00\00\04#\08\00\08\00\00\00\00\00\00\00\04\12\08\00\08\00\00\00\00\00\00\00\04\11\08\00\08\00\00\00\00\00\00\00\04#\08\008\00\00\00\00\00\00\00\04\12\08\008\00\00\00\A0\00\00\00\04\11\08\008\00\00\00\A0\00\00\00\010\00\00\01*\00\00\04\0A\08\00\0E\00\00\00@\01$\00\03\19$\00\04\17\0C\00\00\00\00\00\05\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00x\06\00\00\10\14\00\00\04\1C\04\00x\0F\00\00\04\1E\04\00`\00\00\00\010\00\00\01*\00\00\04\0A\08\00\17\00\00\00@\01$\00\03\19$\00\04\17\0C\00\00\00\00\00\05\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00x\06\00\00\10\14\00\00\04\1C\04\00x\0F\00\00\04\1E\04\00`\00\00\00\010\00\00\01*\00\00\04\0A\08\00 \00\00\00@\01$\00\03\19$\00\04\17\0C\00\00\00\00\00\05\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00x\06\00\00\10\14\00\00\04\1C\04\00x\0F\00\00\04\1E\04\00`\00\00\00\010\00\00\01*\00\00\04\0A\08\00)\00\00\00@\01$\00\03\19$\00\04\17\0C\00\00\00\00\00\05\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00x\06\00\00\10\14\00\00\04\1C\04\00x\0F\00\00\04\1E\04\00`\00\00\00\010\00\00\01*\00\00\04\0A\08\00,\00\00\00@\01\1C\00\03\19\1C\00\04\17\0C\00\00\00\00\00\04\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\14\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00\D0\04\00\00\B8\06\00\00\04\1C\04\00\B8\0E\00\00\04\1E\04\00 \00\00\00\010\00\00\01*\00\00\04\0A\08\00/\00\00\00@\01$\00\03\19$\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\D0\04\00\00\04\1C\04\00\90)\00\00\04\1E\04\00P\00\00\00\010\00\00\01*\00\00\04\0A\08\003\00\00\00@\01@\00\03\19@\00\04\17\0C\00\00\00\00\00\07\008\00\00\F0!\00\04\17\0C\00\00\00\00\00\06\000\00\00\F0!\00\04\17\0C\00\00\00\00\00\05\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00\D0\07\00\00X9\00\00\04\1C\04\00\A8Z\00\00\04\1E\04\00P\00\00\00\010\00\00\01*\00\00\04\0A\08\007\00\00\00@\01\18\00\03\19\18\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\98\04\00\00\04\1C\04\00X\0C\00\00\04\1E\04\00@\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03\00\00\18 \00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\E5\04\00\00\01\00\00\00\06\00\10\00\00\00\00\00\00\00\00\00\00\00\00\00\80g\02\00\00\00\00\00\00\11\00\00\00\00\00\00\03\00\00\00?\00\00\0C \00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\85\00\00\00\08\00\00\00\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\80x\02\00\00\00\00\00\000\00\00\00\00\00\00\00\00\00\00\15\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\B2\00\00\00\08\00\00\00\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\80x\02\00\00\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00@\01\00\00\08\00\00\00\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\80x\02\00\00\00\00\00\000\00\00\00\00\00\00\00\00\00\00\16\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\F0\01\00\00\08\00\00\00\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\80x\02\00\00\00\00\00\000\00\00\00\00\00\00\00\00\00\00\17\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A0\02\00\00\08\00\00\00\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\80x\02\00\00\00\00\00\000\00\00\00\00\00\00\00\00\00\00\18\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00:\03\00\00\08\00\00\00\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\80x\02\00\00\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\19\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00h\04\00\00\08\00\00\00\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\80x\02\00\00\00\00\00\040\00\00\00\00\00\00\00\00\00\00\1B\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00 \05\00\00\08\00\00\00\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\80x\02\00\00\00\00\00\00\04\00\00\00\00\00\00\00\00\00\00\1C\00\00\00\04\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\06\00\00\00\05\00\00\00\C0\81\02\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A8\00\00\00\00\00\00\00\A8\00\00\00\00\00\00\00\08\00\00\00\00\00\00\00\01\00\00\00\05\00\00\00\C4$\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A8S\02\00\00\00\00\00\A8S\02\00\00\00\00\00\08\00\00\00\00\00\00\00\01\00\00\00\06\00\00\00\80x\02\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\040\00\00\00\00\00\00\08\00\00\00\00\00\00\00\01\00\01\01H\00\00\00h.\00\00\00\00\00\00d.\00\00@\00\00\00\04\00\06\00=\00\00\00\00\00\00\00\00\00\00\00\11 \00\00\00\00\00\00\00\00\00\00\00\00\00\00&\18\01\00\00\00\00\00\00\00\00\00\00\00\00\00\F0 \0A\0A\0A\0A.version 6.4\0A.target sm_61\0A.address_size 64.\00\F0\03func (.param .b32 \12\00\F5\0E_retval0) _ZL9atomicAddPjj\0A(\0A-\00-64\1F\00\11_\1C\00H_0,\0AS\00\0F&\00\04_1\0A)\0A;\83\00\12\128>\00/Or\82\00\02\08\1E\00\0F\81\00\06\0E%\00\09\80\00\F8\1F_ZL22loadSharedChunkFromMemILb0EEvPjPKjiiRiS3_\03\00\0E\90\00\0FI\00$\0E\BB\00\0FP\002\1D1\0B\01\0FP\00+\1F2P\00<\1F3\F0\00<\1F4P\00<\1F5P\00<\1F6P\00<\1F7P\00<\1F8P\00<\1A9f\03\F1\0312prescanBlockILb1C\00>iS0J\03\0F-\00\09\0F\E9\03\02\0F4\00\13/1,h\00\1F\1B2,\04g1store\14\01-To\12\01\01\01\00\0E\1F\04\0F<\00\17\0E\12\04\0FC\00%\0E\05\04\0FC\00\1E\1F2C\00/\1F3C\00/\1F4C\00/\1F5C\00/\1F6C\00/\1F7C\00/\1F8@\07\17\9FbuildSumP?\07\01\06\1D\00\04q\02\0B\B3\03\F5\026clearLastElement\22\03>S0_\E2\02\0F1\00\0D\0E\D7\02\0F8\00\1A\0E\CC\02\0F8\00\13\1C2\D6\00\00\F1\03\CERootToLeaves]\08\0F'\00\02\0Ff\08\05\0F.\00\03\0Fo\08!\1F1o\085\1F1o\08<\1F1o\08<\1F1o\08<\1F1o\08<\1F1o\08<\1F1o\08<\1F1o\08<\1F1o\08<\1F1o\08<\1F1o\08\1E\0FU\05\07\1F1\A9\07(\1F1\A9\07/\1F1\A9\07/\1F1\A9\07/\1F1\A9\07/\1F1\A9\07/\1F1\A9\07/\1F1\A9\07/\1F1\A9\07/\1F1\A9\07\0F\0Fn\0A\01\1F0\03\0B\19\1F0\03\0B \1F0\03\0B \1E0\03\0B\0F\16\08\0F\1F0\16\08\1D\1F0\16\08$\1F0\16\08$\1F0\16\08\03\FF\14\0A.global .align 1 .b8 threadIdx[1];#\00\03\11b.\01\0F\22\00\0E?Dim\22\00\07Dgrid!\00\04\01\00o.weak \DC\09\0E\AAcudaMalloco\01\06\18\00\0EW\01\0F \00\02\00\9D\08f{\0A.loc\9E\00\118\9E\00!__\15\00\A0_depot0[16\C7\002regI\00;%SP\0F\00\15L\10\00\8932 %r<2>!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F \06\03\0F,\00\0A\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\07B\10\0F\1C\02\10?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F1\03OccupancyMaxActive\AB\09\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F6\15visible .entry _Z12histo_kernelPhlPj\9E\04\00\98\00\0F#\00\02\0E}\04\0F+\00\0D\1F1+\00\10\0F\8F\0C\1B\1F6\EB\07\18xpred %pu\0A\02\B5\03.19\DE\0E\100\B7\031\09.s\16\14\04\81\00\124\81\00\1FZ\B8\00\01\A0E4temp[102e\03\0F\F1\03\08\1F69\08\1D\0F'\01\02\0F(\03\0C\0F\86\01\02\0F\07\03\0C\0F\E5\01\0A\13]\B6\00#to\F5\12\04:\00\144\AC\02\01\1F\00\0A\1C\00\115\1C\00\1F4;\00\05\146*\03\0F;\00\00\117\1C\00\1F6\\\03\02\1F7\\\03\09)64\\\03+d5\D5\0Ba%tid.xt\00\00-\00\03\19\00#d8\89\03\05\B9\01Ord9,\EB\01\0A\03\B7\00\02\22\02\05H\01\110u\00\00\93\03$hlN\02311,c\00\832;\0Aadd.s\18\00'2,4\00+11\B0\08\170\00\04&rd#\0E\ABbar.sync 0\0B\04B%cta\DD\00\06\17\00\00\9F\01\13n\F3\00\81mul.lo.s\19\00#5,5\00#r4\95\00\01\17\00#6,'\01*r5\7F\04\03\96\04\186]\00\00\A5\01\16nu\00\08_\00#8,}\00+r7H\00\02\AF\01\F2\048;\0Abra.uni LBB6_1;\0A\08\004:\0Al\8C\00Ed13,}\00\01\B4\02\04f\01$4,\FB\01\92;\0Asetp.geU\014p1,9\00\01(\00\A2;\0A@%p1 brag\00\1B3w\00\132w\00'2:_\00422,p\02\17;\8E\00\192\8E\00\07\C8\01424,7\00\01'\00\02\AB\00\118\0D\00!5,\BE\01\013\00\07\13\02426,\1F\00\192u\02/27v\02 228,<\00\0A\97\00(9,\1D\00\09\BA\01\00!\01v1;\0A{ \0A\09\0B\05\00`\00\03\F0\03Ireg;\BC\09\01\0B\00\180j\06\00i\03\02\16\00\04\98\03929;\A4\09\01\0B\00\1C1\9E\06\02\16\00\04=\0F\1A33\00\03\B6\06a;\0Acall\A7\01\14(g\0B<, \0Az(R, \0A(\0A\84\00\22, \09\0071\0A)\DE\04\02\E1\05\01/\02\06\10\07\84;\0A} \0A\09ld\0B\01%6,\9A\02&;\0A\17\00\1D7\EE\01\02\18\00#8,\1E\00\00;\00\0F$\03\02\1F1\DD\02\05)3:\BD\03\07\D1\02%5,\B6\04\08\A7\01\1F9\B0\04\06\111A\02\0Bh\04$7,\1B\00\0Bi\04$8,f\00\01'\00\09r\02\1F1\E8\04!\132\B4\04\1A9r\02(1,\1D\00\187O\01\130\09\03/1]w\02=/18w\02\1F\1F0w\02Q\1F1w\02\00\04\86\09\0F\AB+\1E\0F\AA+<\0F\\\09\10\1F7G\11\1F\1F5\FF\0C\1E\1F7\FF\0C\18\02\A5\08\0E\D3\00\0F\D4\08\0A\0F(\01\03\1F]-\08\03\1F1\D0\03\03\08\AB\03\182\AA\03\05\DF\02\182-\04\0F\0F\0C\00\1F2\05\16\01\182X\00\1A3t\06\05\D5\07\05\E8\06\00\C7\00\00\F4-\06\D9\07\00P\03\143\DF\0B\0F\03\0C\0E\1F4\03\0C\0A\D3L26vlc_encode\05\04\91_sm64huff\05\1FES1_S\02\00\0C\1F\0C\0F?\00\1C\0E;\0C\0FG\00)\1F1G\003\1F2G\003\1F3G\003\1F4G\003\1F5G\003\1F6G\003\1F7z\04\13_8[168\D7\0D\1D-13\D8\0D\1E6\90\04?143\DA\0D\0A\0F\D8\00\1C\BFE2sm[12288]V\00\03\00\AD\03\0FW\00 o5kcmaxH\0E\0A\1F8H\0E\19\128\0C\05\0F\B5\01#\0F\\\05\00\1F7P\00(\1F6P\00\00\1F6P\00(\1F5P\00\00\1F5P\00(\0F%\13\01\1F4P\00(/3]\D8\0F\00\0FP\00(\0F\F4\0F\02\0FP\00(\0F<\07\02\0FP\00(\0F,\10\07\02\91\0C\1F8\F1\0F\02\06:\0F\0F<\00\03\03E\0F\1F7=\00\03\152C\0F\0F>\00\04\143P\10\0F>\00\01\1345\0E\0F>\00\06\145K\10\0F>\00\01\03\C7\10\1F5>\00\06\197\07\11\0B>\00\158,\0B\0F>\00\04\1F9a\11\08\07\87\0B\0F6\01\03\04\92\0B\0F>\00\03\03\B1\0E\0F6\01\06/23\A2\11\08\04\F6\0E\1F3\F1\08\03\1F4\A5\11\03\0F\A6\11\04/20\04\15\03*18\18\00\03\05\15+d1\1A\12\144\0C\0C\1B4\18\00\03x\00\1B1x\00\145\F1\09\09D\11\1F6D\11\03\1F7D\11\05$8,2\11\1F7\B6\0D\04\078\0E$0,2\00\1A9\0D\0A%64\94\0C\09\17\00\02\BB\00\1A9\1F\10;5, +\01\148[\01\0A\EA\12;1, Q\00#10\FD\0C\0A\E7\12\00\8B\10\0F\F9\06#\0F\1A\0E\03\157o\10\09\E9\01\03\C0\12\0D\A5\10\04\BB\10L, 105\02$12N\02\1F8\DA\10\02|7, 20485\00\168\8F\10\07`\0B\170L\0B\05\E6\0FEd31,o\01\09M\0F432, \00\0AM\0F433,P\00\01'\00\08\E5\0E\132\95\0B)3]{\00&4,\05\01\0AL\00$5,!\00\03L\00\07\F2\13\2235\AF\01\0A'\0C\1F6\80\0C\02?d37\C8\00\05$8, \00\0B\C8\00$9,Q\00\01'\00\08\C8\00\133\C8\00\199\C8\00640,\98\01\09L\00441,!\00\03L\00\07\C8\00%41\EB\11\07L\00\1F2\F0\0C\01$d4\F2\0C\1B61\13444, \00\0A\C7\00445,P\00\01'\00\08\C7\00\03$\0D:45]\0A\03\139x\19\1A4M\15\09-\03%36-\03\07\CC\11\138\CC\11\138\A9\14\03i\00&5,6\00\04\92\14\02M\00\03\92\14\00\22\00\1A3\8D\14;8_4[\00\132[\00\182[\00549,\B5\00\18;s\00\190t\00\07\FA\03!51l\004sub{\04452,\19\00\01:\00\03Z\01\03\1D\00$3,$\00\006\00%hrH\00$4,\81\00\01'\00\02!\01\148M\04\03\B6\04)54\C9\12)31\EC\02\05\09\15\101P\03\05:\00\0A\B7\12\03R\03\01#\00\0B\B9\12\03\1A\03$13\D8\15\193\04\01\03^\15L134]\EC\04\03h%*55\9F\00\195\C3\02\07\9F\00\1F6\9F\00\07%7,#\00\0C\9F\00%8,W\00\02+\00\09\9F\00\146\9F\00+8]=\01\03\DB\03*56\9E\00%9,\D4\05\06\9D\00F157,8\00\0A\9C\00\03r\03#13\A5\01\08\80\00 d1\97\03\05\0F\01F;\0Aor8\00)2,@\00/41U\06\038142\89\00\198\89\00\07\8D\02&9,d\06\09\F6\06460,!\00\01A\00\0F\93\06\04+60\0C\03\133\0C\03\183\0C\03)61\F4\02\08g\00$2,!\00\1F1\C9\03\04/62\CA\03\04>4:\0A\0D\16\02\C9\00\07\FC\04\05\19\00\1A2A\06\1F4y\05\05448, \00\0B\B2\04$9,R\00\01'\00\0A-\05\139\AE\00\0F\E8\19\05.17\E3\00\03\FE\08)17)\00\178\B7\08\07\D7\03\02\8A\01\1F8F\00\00\03\D2\03\1B9(\01\135(\01\175(\01\00y\0A\056\00\03\F2\04\22eq\8E\013p2,\22\00\110\F2\04\162\F2\04\1C1\E7\01\136\\\00\1E6\1A\17\03R\07\188_\01\07\18\00\1E9\81\00$ges\05$2,<\00\01+\00\02y\05\07\88\00\1B8\87\00\137\87\00\187\0B\02\163\E4\00\09\A5\06\1F14\08\00\03\19\00$2, \00\121o\03\03\18\00$3,\1F\00\1B1C\0A\03\E2\04\2213(\03)33\D7\02\02X\04\01$\00*-1C\04\03\DA\0A9135\9B\00\1F6\B4\00\05\1F7\B4\00\05$8, \00\1A1|\00$9, \00\1C2\B5\00\027\04\121<\0891399\00\02\0D\08\01$\00\0D\B5\00\03\DE\02)41\F8\04-24]\03\02\96\05\02?\1B\03\EF\00\0A\FA\04\03B\1B\01#\00\0CW\1D%7,W\00\02+\00\09\15\08\03\A5\09:127l\00&8,\A6\00\0Bl\00%9,#\00\0C\02\06\1B0l\00\199l\00\143\02\06\0B\17\05\02\CF\08\2214\19\02\1A4\F9\09#13\D0\03\1C4\22\08\138\A8\02\198\22\08\195\F4\01\07\DC\01\02c\04\01!\00\0F3\04\05+46c\00\139c\00\199c\00\1A7n\03\06N\04\02\E6\04\01!\00\0FP\04\05/48Q\04\04(10R\04\0AU\03\00\D0\03#neQ\04#3,!\00\02Q\04\173Q\04\0D\D5\05\05\A0\09\181\D3\1D)50b\02\07h\1F\172&\05\06\AE\0D\02\FA\0E\00\1D\00--1|\1B\02H\09)23%\02\02d\09-d50\0C\02`\09\22d5)\02\195\C0#,24\E7\1F\125\D1\17,24\D8\00\04\1D\0A812:A\00\0F\DB\01\00\135* \1C5C\00\04T\07\181U\07\00H\03\058\00\09\9C\00\08\F7\0E\0A`\05#4,9\00\00&\00\01\93\01\174\93\01\0C\EF\01$14x\00\09j\07\1C0\B7\02\05T\02\120\B7\02\1F0\B7\02\08?06;1\06\04/071\06\03)08\E8\00\0B1\06$1,<\00\01+\00\021\06\07\AB\0B,16\D5\00\05\C5\02\195\D5\00\1F9\80\05\04/10\80\05\04\03{\00\00 \00\094\06\03\C4\06/114\06\00\02\CE\12\2210\A2\05)12[\04\02\CE\12\01$\00\0C\7F\05\145\D4\0C\09&\0D/15\B4\00\05\1F6\B4\00\05$7, \00\0A4\06\03\1D\1F/174\06\00\03y\08\121\D3\06)189\00\02\C9\12\01$\00\0D\B5\00\137\07\0F\0A2\1F/094\06\04\05j\01\00\EF\00\0B\88#%1,#\00\0B\C8\05\05\8C#\03\EB\05)11n\08\131\C8\05,12i\0C\136\92\02\1A2\D3\06/13\9F\00\05&4,\D9\00\0B\9F\00%5,#\00\0C\9F\00%6,W\00\02+\00\09\9F\00\04\D3\06*16l\00\1F7\0B\01\07%8,#\00\0Cl\00\1B9l\00\0A\A2\0A#11\C5\07\1A2\FB\10\05\F1\00\1A6:\0B\01\DC\01\0F\DE\07\03\1F1\0B\01\06\03\9B\14\01#\00\0C\DE\07%3,W\00\02+\00\09\87\00\144\DE\07*3]b\02$5, \00\01\AE\00\0B]%\133\D4\00\0D5\05\04\A2\0A;16:\1A\00\045\0A\1916\0A\0BP\05\076\03\02\97\08\01!\00\0F\B3\05\04?127\B4\05\06\1F8\22\0B\04\0A!\0B\07\D9\06\1F9\D9\06\02\02\B0\08\142h\03\09g\07#5,P\00\00'\00\01\03\05\165\02\05,20\9B\00\04*\08)19j\07\1F4)\0D\04/55)\0D\04\00'\0F\03 \00\0B:\07$7,R\00\01'\00\08\19\1F\03\91\03)57/\1F\1F2\87\0E\04\02\81\0B\02E\10\193P\13\05\D3\0E\1A5F\11(34\0C\16\0C\01\08\01\22\03)34\C7\00\02\EA\0E\01\1C\00\0A\C7\00\00\95\0E\03j\00\01'\00\09z\02\126\1D\13\193\1D\13/62D\01\04/63D\01\04464, \00\0B}\00$5,R\00\01'\00\08.\01\03>\11\196\CC\0F/36D\01\05#7,5\00\00%\00\08\B1\07\02C\0C\00 \00\195Z\16?66,\FC\1C)\0F]\16\00\136\17*\1A6D\01\02\22\06\1C3y\08$20Q\13(0:^\01\1F8^\01\05\1F9^\01\04\127\DB\01\1D6\DB\01471,R\00\01'\00\08H\01\129^\01)71\DD\08\02:\0D\00\1D\00\0B`)\127\03\04\194\E6\06\1F7\09\02\05\1F7\09\02\05474, \00\0B\AB\00$5,R\00\01'\00\07N\00#41\AB\00\00\09\023and\B1\04\02$\12\00\1D\00\1B3\B0\04\127H\08)42\AC\00\0F\22\11\05\1F7\22\11\05\1377\01\1D7\AC\00$9,R\00\01'\00\09\D1*\1C33\0B\127\90\06/43\13\0A\04-44\D4\11\02\18\00\04\91\04\1A7\FB\03\006\0D(32A\15\00\DE\0C\02\18\00\006\00\037\05\14l=\0A#6,e\00\00)\00\017\05\1767\05\0C\A1\0C$21\95\02\08B\16/48\8C\00\04\1C9\8C\00#1,\17\00\005\00\09\F0\11#63\AD$\0Ap\00\04B\06>22:\B0$\02\13\01\0DE\00\1F2E\00\06+3:t,\003\00\0B\DD$\05s0\06\D2\04,80\DA\14\02\85,\1F0\9B\01\03\00F\0D\04L\00\08\FD\00\02e\0D\02K\0D#51X\03\03b\00$1,i\00\00(\00\0F\0F\16\03)d8\A6\09\1F8_\03\05583,\96\03\09\B3\02\128_\03\1D8_\03485,R\00\01'\00\08\CD\00\193\86\15\06\FD\00\184\F4\01\06\17\00\185\FC\00\06\80\05\02'\07\02\0D\07\195\EC\1B,57%\02\02l\06\02@\07(56\86\08\02\BB\06\02\94\0E/58\8B)=/85\8B)\1E/59\8B)\1C\08\FBS\0F\8A)\14/60\01,\09\05n\07\198\84\02)63\9C\02\06\85\02\02l\07\02)\03/62\D1\16\03\09i\07*65J\00\09-\15#7,\22\00\02\DC\10\177;\04\0D\D0\06\04I\0F\182I\0F.66^\00\14l}\1A#8,\22\00\123~\1A\178_\00\0C\D3\0E$25_\00\185\E5\03\136^\02\08%\04\02\02\01\0C\AD\0A$27A\00\186\DB\04\0Fk\04\06\023\05\0FE\00\06\187\86\00\02\BE\0B/64k\04\02\09\E4\19/86k\04\02/68k\04\03/69k\04\03\02\F4\07\126A\01\1A9k\04$7,i\00\00(\00\03\1F\0A#32 \00\03\15\08\198\8E\17\00x\07\181\89\03\00\A2\07\02\17\00\02`\00\06A\15\02\C3\07\137\C0\0F\06\90\07\02\C2\07\02^\00/74\E1\04\03\1A7\0B\01\0F\EA\08\04(76\DF\04\07z\00#7,\1E\00\0E\F5\12\1189\04)77\0F\05\129\19\09\1D8\19\09491,\81\00\01'\00\08}\00\1F8\0F\05\03\197\89\01\06(\02,80\DE\04\02\EB\05\02\EA\05(79U\01\02\7F\05\02X\08\1F8\E00>/91\DE\04\1E/82\DE\04O/83\DE\04\09/85\DE\04\02/86\DE\04\04\02\DD\02\02\DC\02/85\DE\04\03\1D8\FA\03\04\99\16\182\9A\16/88\F8\04\08#9,\22\00\02\F8\04\169\99\04,30^\00\04j\0E\192j\0E/92\E9\03\02/89q\04\04+90\83\03\02\A5\02\02\C4\02\198\83\03\00[\00\02 \00\0F\FF\15\00\119\BA\03#92\9C\03\03\0A\03$4,\94\00\01#\00\0F\80\08\04\199.\1F/95\A0\03\03/93\A0\03\03\02e\00\129}\08\0C\96\00\02\112\0A\94\0E497,\1C\00\0B\A0\03$8,\81\00\01'\00\08}\00\1F5\A0\03\03/96X\01\05\0D\7F\08\02^\00\02}\00\199\7F\08399,c\00\00&\00\0F\A1\03>\0F\0A2\1F\1F9\7F\08P/10\80\08\01\0A\EF\1E\04\AB\0E/30\0A\1D\05-02\E9\11\03\CC\01\1F9g\0F=\01\B1\00\01Z\00\09\81\00\04\CD\1A.00\E5\22$0,\A4\00\01*\00\02A\17\170\D5\03\0CS\0C$31\F2\00\09\A9\19?101\94\0B\04\0B\01\01\08.\14\00\9A\00\04\22\00\0BA\22504,W\00\02+\00\09\D3\00\04.\14\1B0a8\05\9D\18\1C4\85\00\1F6\A1$\04\03=\18\02\22\00\0C\85\00%8,V\00\02+\00\0A~\14\04\AD\22\1C0\0D\09$32%\01%2:\BE1\0Fc_\1D\0Fb_:\0F\BE/\10\1F984Q\1F984\1F\0E\D2\00\0F74\09\0E%\01\0F64\A7\05V\1A\0F54=\825pack2PjS\04\0D5@\0C \00\0E\F73\0F(\00\0A\1F1(\00\14\1F2(\00\14\08\9A3/32(\00\00\0F\1FD\1BO10[7}@\1E\168\EA\02\8C16 %rs<5\B72\1E9ED\1F5nO\0E\1F0\10\03\18\03\BD1\0F\FD\00\04\0FN1\01\0F1\00\09\1F3/1\01\0F1\00\09\1F2\101\01\0F1\00\09\1F1\F10\01\0F1\00\01\0F\AE@\0F\03k\18\0Fs@\0B\0F;\00\03\1E8\C6/\0E\D90\0F\C4/\04\1F0\C4/\08\04\95\1C\1F0>\00\06\1F2\C4/\08\04\A5\1B\1F2\86D\03\1F3\86/\02/11\C9\04\03\1F9\85/\03\1F7\89D\02\09\DF\0E\08l&\0A\09/\0F\1F@\03\0A\0E/\0A\B7.\0F*\1F8\1A.\03/19\93\00\04\05=2\0C\93\00\03\C7=%18\9B1\0B\16?\03\C7=\09\93\00\03\C3\08\09\F4=\188E\09\0F\83(\01\0B`\0E\04\07\1D/9;j(\00)48\9E\18$21f(\0C\9E\18\03\E6 \0A\91\1F\1F2p.\03\05\B6\1E+40\1D\01\04\DE2\0D\1D\01\195\EDA\08\E8\13\04\87\1F?25]\E21\02\09\AA\1E\192\EA.\06\17\00\184\90\1C\064\01\02w\00\03\97$\1F4*2\02)25\F1\00\1C6\0F\14\03\1E\0Fd\0FP/68r\06\0F\05\C7\09\00\1A/\0F\A4\18\09\9110uniform\02N]S_iii\A8\18\0E$\00\0E\05n\0F,\00\0F\1D1`\18\0F,\00\07\1F2,\00\18\1F3,\00\11\0F\BC\18\1C\1E1\DC\\\0F\BC\18\0E\0DaK.31\C0c/3>\09K\0B\0F\BB\00\00_E3uni\ECJ\0A/11\DF\18\18\03L\18\0F$\01\08\1E]Z]\0F4\00\0D\0F\E5\\\01\0F4\00\06\0F\A9I\0F\0FE\02\00\0F\93\1C\0F\0F\A6\02\00\0F\F1\18\0E\1E3<\18\0E\BBY\0F\F6H\03\1F5\FDG\08\05H\19\0F^\18\00\1F6]\18\03\1F4&\1D\05\0F\1EX\00\1F2r\18\02\1C3\D9X\08\9D8\0Ag\13\1E4f\13\1C1f\13\141f\13)1_\83 \1F7\8CU\03\185\CD\18\06#\06\08=V\07\8C\11\15,\FB\05\0FkZ\03\0BH'\1F,!Z\03&0,\99\1A\09\EB\0F\04\B7 \0A\0A\22\00>\19\0Fd\03\0B\0F\ED!\01\06\02%\1A\08\89F\0A\BBE+32RB\16,7\00\1C5\81A\0F\A3\1A\00&8,6\00\1F7\A3\1A\03\1F8\D4A\00\06\C6\1B\0F\B9\01\1F\06\B7L'ld[\19\01\BF\01\0B\D0\22\1F5\AE\1A\03\1E6\AE\1A\0FJX\1D\0B\A9\19\00#\00\09I\01\06\A8\19\02\D9\19\08d\02\158\A9\19\070\00\172`Y\1F01\19\01-24\E7,%2,:\00\01\DA77elpP\19\009>\10,&\00\08N\00\175)\01\09\0B\1C\03\D8<\1E4S\19/19F\01\01\05Q\19\0B\94D\02P\19\152\C9\00\0B\06\04\02\12\01\1A2@\0E\07AY\0B\84\19'2,\0DN\04\DB\1A\07s\18\00#\00\1F]\E97\03\02\18\19\0705\132O\05\1F0\DD!\0B\137Ct\01\0Au\07\B7|.S0D\09\0F/\00\0E\0EO\09\0F7\00\19\1F17\00#\0Ee\09\0F7\00\12\1F37\00#\1F47\00#\1F5\B2\09\14?2[7\EBb/.23\B2\09\1F1\11U\09\0F\C5\00\0D\85E6s_data\05U\0F\C6\09\09\1F2\A5\22\1F\0FC\01\13\0E\9D\09\1F6?\00\18\0F\10\0A\00\1F5?\00\18\0F\01#\02\0F@\00\18\0F\10#\02\0F@\00\18\0F\1F#\02\0F@\00\18/0]\02\0A\0B\1F4\02\0A\0B\0F;\00\03\1F7\F2\22\08\03\F77\0F&T\0B\1F2&T\0C\0F\D7\22\01\1F0z\0A\03\1F8\D7\22\03\1F6e\0A\02\1F5\BC\07\02\1F6\13\22\02\197\1E\06\0F\7F!\02\181_\08\0A\C0\09\05v \0E&\1E\1E8\BF\0A\1C2\BF\0A\142\BF\0A?2_1\8C\09\9A\1A2\8B\09\0B\8CE\1Dr\E7\00\143\E7\00.2:\E4(\01B\01\0CC\00\1F3C\00\06\1D3\F4\19.22C\0B\0F\F6\04\16\0FQ\0B\0D\01\AF]\07\A9%\00\EE\04*44\18\00\144\18\00\1A8\18\00\135\18\00*52\18\00\146\18\00\1A6\18\00\137\18\00*60\18\00\148\18\00\0FQ\15>/124\00\00\1F14\00\02\1F1p\12\09\1F2p\12\02\162`s\0D2\00\1F32\00\02\163\B5*\0D\97\00\1F4\97\00\02\184)'\0D4\00\1F54\00\02\1554\00\0Fh\00\01\1F64\00\02\1564\00\1F54\00\00\1F74\00\02\1574\00\1F64\00\00\1F84\00\02\1584\00\1F74\00\00\1F94\00\02\189Hc\05\C5\13\1F\0A*\8A'\0F\E5\13\01\04\09\00\142\09\00\143\09\00\144\09\00\145\09\00\146\09\00\147\09\00\148\09\00<9\0A)\BEe\1C4\EB'\0B\AC\0B\01.\0C\0F\F8\02H\0Fqd\14\0F,\02\01\1F2g\00\02\04\F8\02\02\D6Y\08\90\01\0F\D8\8A\08\0Ft\01\0A,\0A)U\18\1F2\0DW\02/15\AD\06\02(16\C7\1B\0E!g\0A#1\1F1\99\18\03\191\BE\1E\0E\DE'\0A\89#\06\0DL\0F\BE\01?/20\B6\04 \1F2\B7\04\1F\1F5\B8\04\1E/163\00\00\1F43\00\02&4+\F3Q\0D3\00\1F53\00\02\04\B8\04\0F/g\02\1F63\00\02\04\B7\04\1F1\8B'\01\1F73\00\02\04\B6\04/203\00\00\1F83\00\02\04\B5\04,21\80\04\0FC\8B\16\0Fs\04@.\0A)\B1g\0FY\92)\0FX\92\FF\FF\FF\13\0F\06\11\11?3[8\94w\1D\1F2\95w\00\1F0P3\0D\1F3^e\1D\0F\02\01/\0Fge\01\0FY\001\1F8pe\01\0FY\001\1F7ye\01\0FY\001\1F6\82e\01\0FY\001\1F5\8Be\01\0FY\001\0FS\1C\01\0FX\001\0Fw\1C\01\0FX\001\0FDf\01\0F\B1\002\0FMf\01\0F\B2\00+\0Fhy>\05l\1C\05\F1}\0F~y\08\0Cyd\1F4xd\03\1F5wd\03\1F6vd\03\0B\C1v%64\A8\12\0A\D0u\04\AE\11\0E'L.3;aE\1B2\E4%\03\08\1D\0B\8AI\199{\12\06\15\18\1E9JE\1F1\80.\03\1C6}\1C\05\F2\1C\0EoP\09\22e\07,\13\191\1A4\08]R\04w\1C\05\B3-\189\0CA\08WR\0C=;\0A\BA\1C\0A\BB>\0FPA\03\06\FD\12\0C\A0\00\183\0A\0D\09\9BY\09\02\13\0B\0C6\0A\A3%'12c\1A\00\E5&\04Y[\04\00\13\1F4\A3\1B\02+56s\00\1F5?c\00/16\AB\0D\03\04\D0>\1A6s\00\145\CD\1C\0As\00\05\FA(\0CC\01\1F7\F26\01\1B84\0E\0Bi\0F\1B4.w\04\0F\1C/9]\E3\1A\1B\1E8\E3\1A\04\097\1A2\03\02\1C2\FE<\06\BB5\0F\8F\01\01\04\BB6\0ALx/25\99\0E\02'18\CC5\072\02\179\9E\1C\02\F09\00\C0\00\05\F5\00\156\E7\1C\07\F2\00\04\12g\0D\F2\00'8,fi*27\A5\1B\1F8\E47\00/29\84\01\02+30\8B\0F\18s\DDf\00 \00\0F\DCf\1B.29\F9,/20\DCf\0B\0A\84\01\1F3K\10\03\04HW>35]\D5f\0E\0C7\04Og/6]\9C[\00\141\BE\15\0B\84\01.37\9E[\0F\06g\0F\1F4\BAf\01%39\CC\0E\0D\07\0E\0F\FA\9C\08\0F\F9\9C\8C\0Fg\0B\11\1F4X\7F2\1D0y\0B\1F7x\0B\0D\0FW\87\1A\03\14\0A\0F\F7\00\10\0EG\09\0F<\00\16\1F14\0A\01\0F=\00\15\0F\B6\08\04\1F3\D9A\01\1F48%\06(ld_\0B\04n\03\0F\17\13;\0F\EA,\1E\06h\9C\07~\11\0F\E0,\01\0F\CC)\00\0F\09&\00\0Al\06\0E\D9\05\0FEC\06\1C7\0C~\0AK\1B\1E7K\1B\1C4K\1B\144K\1B\1A4K\1B\1F1K\1B\03\1F9\D48\00\144d\1A/4_hS\01\0A\9CY\1E9\E8\1A\08A\00\0EdS\1F9\F7+I\0F\04\15\14\0F\03\15\1F\1C3\C0\16\0F\D9\9D\0B\0F\C4\16\1C\1F6x\1C\01\1F8\F6\17G\1F6\FDM\1F\0D\00\01\0EO\9E\0F\13-\02\0E|\14\0F\A9\A2\1E\0F\A8\A2\FF\FFM\0FC\08\11?5[4\0F\92\1D.16\B0y\1F92\08\0D\1F5U$\1C\0F\F4\00\22\0Fa$\00\0FK\00$\1F7m$\00\0FK\00$\1F6\D6\08\00\0FK\00$\0FB%\00\1F3K\00$\0F\0B\13\01\0FK\00$\0F\FE\12\01\0FK\00$\0F\F1\12\01\0F\97\00%\0F\E4\12\01\0F\98\00\1E\0F\D7\12T\0Bf\09\1F3o/\02\1F4\0A%1\0F\DA\84\03\08\8CL\0E\A6\07\09\BC\07\1E9H+\0E9w\0B\ED\0D\154:w\07R?\02\B6&\0D\0Aa\16,\101\09>e\03\94\0E\0A@ \1F7\03\88\02\188\EE/\0F\C0/\1F\09\F4O\0FI\12\01\1F1\97\1F\02\0F\028\03\06c%\0C\\l\0A\EB.\0B2\01\03\19\00\1B4\E4r\03\E7\1E\0D@Q\03d.\1F1k.\00\1F5k.\0B\0A:\01\0A\B1\12\0Fk.#\0E;.,15\9A\0F\0F:\A9\16\0F9\A9\09\0F\F7\98\1B.16\EA\06\0FL5 \1D7\FC\06\0F\FB\06\0E\1F6\0D\99\1D\0E\E7\00\0F\D7P\0B\0F:\8D\00\0F\C63\05\09\C7E\0F\D63\06\0A\D8\0C\05\8F(\0F\EC3\02\08\1E\0D\04D\89\00\09\00\0F\\\\\00'32\130\0A\074\1B1\FD\0D<6_6[\00\142[\00\1F2|U\04\0FKI\03\1D7}\00\0A\16F\04\F7\16\09\14F<6_4\7F\00\143\7F\00/3:#\05\00\0D:\0E\1F2\A8)\16\1F8\03\17\01\0A\A7)\1F133\08\0BuM\0Ey\04\0Az\17\05\C8\10/12\DD\9C\02\0A=\04\1F4\81$\0A.12\90\00\0C\923\0Fb3\01\1F6\A0$\01\0B\05N\1E8\B25\0F\DA\15\01\07p\14\03\BC2\06\9Bl\0B\FD\06/20\BB$\01.40\CC\14\0Ew\00\0C\CD\14\0F\E7M\00/23 L\01)40\E3\17$25\B62\09\CF\8F\0C\CD2\0F\BE\00\01\1F6\CD2\01\05\02I\0C\CD2\0A_\00\169\E42/28\BE\00\03\1F9\C3\11\09\04\B4\15\0F\86M\04\03\96\15\0D\1BG\02\0E\09\08\1F\13\07\E2J9rd5]\00\09\BF&\07\B2W\03\F0Q\0D\CAc\07]\00\1F7sj\00\0A\C0\01'32Zj\0A\98f$8]oj\0A\F0\03\144\F0\03\09\92J\1D3[\08\05\F3K\02\1E\00\0FT\05\03\1D3O\04\145_\00,5:\ECi\06\7F\07\04\D8\01\03\BAJ\0EPK\04\87\05\1F6\88\05\06\08@?\195\BD\00\0FUV\0D\0D\B5\07\0F\96\B0\0F\0F\95\B0\98\0F2\08\11O7[24_\17/\0E~=/153\08\0D\0Fp\8D \0F\FB\00\14\0F\ED<\01\0FA\00\19\0F\F9<\01\0F\82\00\12\0FK\0D;\0A\8FN\0Fb<\07\1E2X\16\1C7X\16\147.\03\1B7\C8O\0F\8B1\02\02\EA\03\0BSs\04\C6\1F\1F-\E6\0D\03\1F5\A6\08\01\0A\1A\06\02af\0B\B9\05\09\98\1F\0FX\00\00\1F8eZ\08\06{\0D\08o\00\0F\7F\0D%\1F9~\0D\09\1F8~\0D\02/16~\0D4\1E9}\0D\0E\B9\0C\1E2\E8\00\0F4\0D\1D\0A\863\0B\D8\97\06\FE\85\0A;\02\142;\02\0D\FB]\0F\EF\B4\05\0F\EE\B4L\0F\D9\04\11O8[56\0B\0D/\0E\90\9B\1F19\1C\0E\1F8\D9\04!\0F\F1\00\00\0F\8E\04\11\0EV\01\0F#^&\0Fo\04\00\0F<\0D\12\1F16\0D\11\04\E4\80/186\0D\07\0A\E1@\0A\F0y\09\C3\84\02yl\0B\F3\04\01\C2\7F\0As\00\03\B8\02\0A\C7\84\09B\0D\07\EFQ\03\9C\04\0E\1F\01\0FU\12\04\0E\1C\0D\0E?\12\0F\8F\0D\04\04B\12\09\8F\0D\0C\85\85\141z\82\1B1{\82\09\A5N\0A\B0Z\0F\91\0D\02/11:7\02\1F0\C6@L/28\93\0D\03\1F4\93\0D\03\1F5\E71\01\0E\07\99\0E\93\0D\0F\F5@\1A\0E\052\1F3\052\03/24;@\00$19d\0D\0C%@/21\93\0D\01\0FkY\08\0B\93\0D\1E4w\00\0FkY\0D)44kY\05d\0D\0F\93\0D\02\02\17\0D\0C{\07*28>}\0F\BE\00\01\09dY\1F9\93\0D\03\06\E1?\09_\00&31\F8?/30\BE\00\02/31\93\0DX\162\93\0D\0F0\\\00\0A\8Dw\1F6\8B\00\03\1F7\C1\0D\04\03H<\0D\DCh\16,p\08,8;c\0D\00\EA(\09\E0\07\1F0\E9\00\04\05\98_\0F\B7\9F\04\1F67E\08\09\A7\88,34)5\0Be(\0B\EF\00/14\F0\00\04\04\0B\15\0E\1F|\196\F6\9E\1F5\E5w\00*16\B7\0E\176\CDw\1A4\C3\00\1F6\FA\0D\02\01*\05\03\09\00\1B:\1A\00\03s\0E\0A#\86-37\F2\15\1A3MY\0F\C4\06\04\1F8\C5\06\06/6:\B1B\19\1F1\B1B\1B\1F1\B1B#\1F1\B1B#\1F1\B1B#\1F1\B1B#\1F1\B1B#\1F1\B1B*\1F9\B1Bs\1F1\B1B(\1F9\B1B.\1F1\B1B+\1F1\B1B+\1F1\B1B,\1F1\B1B,\1F1\B1B,\1F1\B1B\FF\A9\1C9\0E\11\149\C9\05\1F9\B1B\CA\149B\0C\1F9\B1B&\07C\00\0F\B1B%\1F1\B1B\FF\FF\FE\1F1\B1B\FF\FF\FF\FFo\1F1\B1B\83\0F\16\05\09\0F\9A\CC\FF\FF\FF\13\0F0\C3\11\1E0\B1B\0F\06\11 \0E\12\1B/41E\C3\0D\1F0\C3B;\1F1\C3BE\1F1\C3BE\1F1\C3BE\1F1\C3BE\1F1\C3BE\1F1\C3B\22\0F\B1\002\0F\C5/\00\0F\B0\00\10\0F\C3BE\1F1\C3BE\1F1\C3BR\1F3\AE!\02\1F4\C3B\83\1F5\C3B\05\0F\BB\22\0B\0BF_\02\F7\8A\0F\C3B\17\1F7\C3B\10\1F8\C3B\02\0E2\93\0E\B9\A7\0F\C4B\17\0A\EC\1D\1F1\C6B#\0E\BA\12\0F\C7B\01\05RB\0C\A3\00\0F\C7B\0D\1F3\C7B\11\1E4\C7B\0ETB\0F\C7B\16\1F5\C7B\11\1F6\C7B\03\03u\B7\0F\C7B\1D\1F7\C7Bu\1F8\C7B1\1F9\C7B\19.20\C7B\0FZ\1F\00\0F\C7B\02/21\C7B8\1F8\C7B\02/32\F0A\01?29]\BE\1F\01(16\EE\BB\07\8A'\05\F8_\08_\A7\1C2Vw\142Vw\00\09\00\1F:\1A\AA\09\03\17\00\1F1UC\04\04\96B\1B1UC\035p\0D\C5\A6)34Np\1C3m\17\00\22\00\0A\14\16\1E8a>\1425w\00\D8\00\1E:C\9E\0C>\00/24?\00\06\0Etw/28\BCC\02\1F0\A5C\03\0B\B5\01\03Ia\0B\CF\12/37\D3C\03\176\EDn\07\90\02&7,y!\1E6\D3C\03~.\0Ap\01\03\BAC\0D\F8'\04\A9n\03\\\AB\0A\FA'\1D4\E6\C7\0F]\D8 \0F\\\D8\FF\FFM\0FV\0E\11\1E1s6\0FA. \1F8o=\1F/21o=9\1F1o=7\1F1o=7\1F1o=7\1F1o=7\1F1o=7\1F1o=7\1F1o=8\1F1o=8\1F1o=\FF\FF\0C\0FX=\12\0Fp\0A\05\06\0A \09p\0A\0C`n\142`n\1B2`n\0F\E6=\04\0F\EF\0B\04\1E5\E6=\0F\A0l\01\0F\E6=\03\1F6\E6=/\1F7\E6=p\1C7Y\01\142Y\01\1F2\12(\16\1F0\C3j\1B\1F0\C3j#\1F0\C3j#\1F0\C3j#\1F0\C3j#\1F0\C3j#\1F0\C3j-\1F2\C3jp\1F0\C3j+\1F2\C3j+\1F0\C3j+\1F0\C3j+\1F0\C3j,\1F0\C3j,\1F0\C3j,\1F0\C3j\FF\AC\1C2\C3j\142\C3j\1F2\C3j\CA\142\80j\1F2\C3j&\08C\00\0F\C3j!\1F0\C3j\FF\FF\FF\FF\8A\1F0\C3j\FF\FF\FFd\0F\86\03\08\0F\B2\EE\8C\0Ff\0E\11\1F3\BC\\d/23\BC\\1\1F0\BC\\(\1F0\BC\\)\1F0\BC\\\FF\92,23D\0D\143D\0D\1F3\BC\\%$23]\0C\1F3\BC\\#\08A\00\0F\BC\\\DA\1F0\BC\\\FF\1A\0FB\01\0C\0Fa\F4\98\0F\85\06\11\1E4\E2K\0F\85\06S\1F4\E1K4\1F0\E1K-\1F0\E1K-\1F0\E1K}\1C29b\1429b/24\E1K\FE\092\06\0B\F4\BA\04\98\13\0AK\01\142K\01\0F\D5\19\1B\1F1\D5\19\18\0F\E7A#\0F7\00\07\1F17\00#\0F\D5\19\0B\0F\E7A#\0E7\00\0F\D5\19\13\0F\E7A-/25\D5\19s\0F\E7A(/25\D5\19.\1F1\D5\19+\1F1\D5\19(\0F\E7A,\0E\A6\02\0F\D5\19\1C\0F\E7A,\1F0\E7A\FF\AC,25\14\07\145\14\07\1F5\D5\19\CA\145\F4\0C\1F5\D5\19&\07C\00\0F\D5\19\22\0F\E7A\FF\FF\FF\FF\8A\1F0\E7A\FF\FF\FFVP;\0A\0A}\0A\00\00\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([176489 x i8], [176489 x i8]* @8, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_main_test_cu.cu, i8* null }, { i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline optnone uwtable -define dso_local zeroext i1 @_Z8InitCUDAv() #0 { -entry: - %call = call i32 @cudaSetDevice(i32 0) - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str, i64 0, i64 0)) - ret i1 true -} - -declare dso_local i32 @cudaSetDevice(i32) #1 - -declare dso_local i32 @printf(i8*, ...) #1 - -; Function Attrs: noinline uwtable -define internal void @__cxx_global_var_init() #2 section ".text.startup" { -entry: - call void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"* @_ZStL8__ioinit) - %0 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%"class.std::ios_base::Init"*)* @_ZNSt8ios_base4InitD1Ev to void (i8*)*), i8* getelementptr inbounds (%"class.std::ios_base::Init", %"class.std::ios_base::Init"* @_ZStL8__ioinit, i32 0, i32 0), i8* @__dso_handle) #3 - ret void -} - -declare dso_local void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1 - -declare dso_local void @_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1 - -; Function Attrs: nounwind -declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #3 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z12histo_kernelPhlPj(i8* %buffer, i64 %size, i32* %histo) #0 { -entry: - %buffer.addr = alloca i8*, align 8 - %size.addr = alloca i64, align 8 - %histo.addr = alloca i32*, align 8 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i8* %buffer, i8** %buffer.addr, align 8 - store i64 %size, i64* %size.addr, align 8 - store i32* %histo, i32** %histo.addr, align 8 - %kernel_args = alloca i8*, i64 3, align 16 - %0 = bitcast i8** %buffer.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i64* %size.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32** %histo.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %7 = load i64, i64* %shmem_size, align 8 - %8 = load i8*, i8** %stream, align 8 - %9 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %10 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 12, i1 false) - %11 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %12 = load i64, i64* %11, align 8 - %13 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %14 = load i32, i32* %13, align 8 - %15 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %16 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast i8* %8 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i8*, i64, i32*)* @_Z12histo_kernelPhlPj to i8*), i64 %12, i32 %14, i64 %18, i32 %20, i8** %kernel_args, i64 %7, %struct.CUstream_st* %21) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #4 - -; Function Attrs: noinline optnone uwtable -define dso_local i32 @_Z8runHistoPcPjjS0_(i8* %file, i32* %freq, i32 %memSize, i32* %source) #0 { -entry: - %file.addr = alloca i8*, align 8 - %freq.addr = alloca i32*, align 8 - %memSize.addr = alloca i32, align 4 - %source.addr = alloca i32*, align 8 - %f = alloca %struct._IO_FILE*, align 8 - %result = alloca i64, align 8 - %buffer = alloca i8*, align 8 - %blocks = alloca i32, align 4 - %partSize = alloca i32, align 4 - %totalNum = alloca i32, align 4 - %partialNum = alloca i32, align 4 - %dev_buffer0 = alloca i8*, align 8 - %dev_buffer1 = alloca i8*, align 8 - %dev_histo = alloca i32*, align 8 - %i = alloca i32, align 4 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp29 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp29.coerce = alloca { i64, i32 }, align 4 - %agg.tmp34 = alloca %struct.dim3, align 4 - %agg.tmp36 = alloca %struct.dim3, align 4 - %agg.tmp34.coerce = alloca { i64, i32 }, align 4 - %agg.tmp36.coerce = alloca { i64, i32 }, align 4 - store i8* %file, i8** %file.addr, align 8 - store i32* %freq, i32** %freq.addr, align 8 - store i32 %memSize, i32* %memSize.addr, align 4 - store i32* %source, i32** %source.addr, align 8 - %0 = load i8*, i8** %file.addr, align 8 - %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.1, i64 0, i64 0)) - store %struct._IO_FILE* %call, %struct._IO_FILE** %f, align 8 - %1 = load %struct._IO_FILE*, %struct._IO_FILE** %f, align 8 - %tobool = icmp ne %struct._IO_FILE* %1, null - br i1 %tobool, label %if.end, label %if.then - -if.then: ; preds = %entry - %2 = load i8*, i8** %file.addr, align 8 - call void @perror(i8* %2) - call void @exit(i32 1) #16 - unreachable - -if.end: ; preds = %entry - %3 = load %struct._IO_FILE*, %struct._IO_FILE** %f, align 8 - %call1 = call i32 @fseek(%struct._IO_FILE* %3, i64 0, i32 0) - %4 = load i32*, i32** %source.addr, align 8 - %5 = bitcast i32* %4 to i8* - %6 = load i32, i32* %memSize.addr, align 4 - %conv = zext i32 %6 to i64 - %7 = load %struct._IO_FILE*, %struct._IO_FILE** %f, align 8 - %call2 = call i64 @fread(i8* %5, i64 1, i64 %conv, %struct._IO_FILE* %7) - store i64 %call2, i64* %result, align 8 - %8 = load i64, i64* %result, align 8 - %9 = load i32, i32* %memSize.addr, align 4 - %conv3 = zext i32 %9 to i64 - %cmp = icmp ne i64 %8, %conv3 - br i1 %cmp, label %if.then4, label %if.end6 - -if.then4: ; preds = %if.end - %10 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call5 = call i32 @fputs(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i64 0, i64 0), %struct._IO_FILE* %10) - br label %if.end6 - -if.end6: ; preds = %if.then4, %if.end - %11 = load %struct._IO_FILE*, %struct._IO_FILE** %f, align 8 - %call7 = call i32 @fclose(%struct._IO_FILE* %11) - %12 = load i32*, i32** %source.addr, align 8 - %13 = bitcast i32* %12 to i8* - store i8* %13, i8** %buffer, align 8 - store i32 2, i32* %blocks, align 4 - %14 = load i32, i32* %memSize.addr, align 4 - %div = udiv i32 %14, 32 - store i32 %div, i32* %partSize, align 4 - %15 = load i32, i32* %memSize.addr, align 4 - %conv8 = zext i32 %15 to i64 - %div9 = udiv i64 %conv8, 4 - %conv10 = trunc i64 %div9 to i32 - store i32 %conv10, i32* %totalNum, align 4 - %16 = load i32, i32* %partSize, align 4 - %conv11 = sext i32 %16 to i64 - %div12 = udiv i64 %conv11, 4 - %conv13 = trunc i64 %div12 to i32 - store i32 %conv13, i32* %partialNum, align 4 - %17 = load i32, i32* %partSize, align 4 - %conv14 = sext i32 %17 to i64 - %call15 = call i32 @cudaMalloc(i8** %dev_buffer0, i64 %conv14) - %18 = load i32, i32* %partSize, align 4 - %conv16 = sext i32 %18 to i64 - %call17 = call i32 @cudaMalloc(i8** %dev_buffer1, i64 %conv16) - %19 = bitcast i32** %dev_histo to i8** - %call18 = call i32 @cudaMalloc(i8** %19, i64 1024) - %20 = load i32*, i32** %dev_histo, align 8 - %21 = bitcast i32* %20 to i8* - %call19 = call i32 @cudaMemset(i8* %21, i32 0, i64 1024) - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end6 - %22 = load i32, i32* %i, align 4 - %23 = load i32, i32* %totalNum, align 4 - %cmp20 = icmp slt i32 %22, %23 - br i1 %cmp20, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %24 = load i8*, i8** %dev_buffer0, align 8 - %25 = load i8*, i8** %buffer, align 8 - %26 = load i32, i32* %i, align 4 - %idx.ext = sext i32 %26 to i64 - %add.ptr = getelementptr inbounds i8, i8* %25, i64 %idx.ext - %27 = load i32, i32* %partSize, align 4 - %conv21 = sext i32 %27 to i64 - %call22 = call i32 @cudaMemcpy(i8* %24, i8* %add.ptr, i64 %conv21, i32 1) - call void @_Z9gpuAssert9cudaErrorPKcib(i32 %call22, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.3, i64 0, i64 0), i32 88, i1 zeroext true) - %28 = load i8*, i8** %dev_buffer1, align 8 - %29 = load i8*, i8** %buffer, align 8 - %30 = load i32, i32* %i, align 4 - %idx.ext23 = sext i32 %30 to i64 - %add.ptr24 = getelementptr inbounds i8, i8* %29, i64 %idx.ext23 - %31 = load i32, i32* %partialNum, align 4 - %idx.ext25 = sext i32 %31 to i64 - %add.ptr26 = getelementptr inbounds i8, i8* %add.ptr24, i64 %idx.ext25 - %32 = load i32, i32* %partSize, align 4 - %conv27 = sext i32 %32 to i64 - %call28 = call i32 @cudaMemcpy(i8* %28, i8* %add.ptr26, i64 %conv27, i32 1) - call void @_Z9gpuAssert9cudaErrorPKcib(i32 %call28, i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.3, i64 0, i64 0), i32 90, i1 zeroext true) - %33 = load i32, i32* %blocks, align 4 - %mul = mul nsw i32 %33, 2 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp, i32 %mul, i32 1, i32 1) - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp29, i32 256, i32 1, i32 1) - %34 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %35 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %34, i8* align 4 %35, i64 12, i1 false) - %36 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %37 = load i64, i64* %36, align 4 - %38 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %39 = load i32, i32* %38, align 4 - %40 = bitcast { i64, i32 }* %agg.tmp29.coerce to i8* - %41 = bitcast %struct.dim3* %agg.tmp29 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %40, i8* align 4 %41, i64 12, i1 false) - %42 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp29.coerce, i32 0, i32 0 - %43 = load i64, i64* %42, align 4 - %44 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp29.coerce, i32 0, i32 1 - %45 = load i32, i32* %44, align 4 - %call30 = call i32 @__cudaPushCallConfiguration(i64 %37, i32 %39, i64 %43, i32 %45, i64 0, i8* null) - %tobool31 = icmp ne i32 %call30, 0 - br i1 %tobool31, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %for.body - %46 = load i8*, i8** %dev_buffer0, align 8 - %47 = load i32, i32* %partSize, align 4 - %conv32 = sext i32 %47 to i64 - %48 = load i32*, i32** %dev_histo, align 8 - call void @_Z12histo_kernelPhlPj(i8* %46, i64 %conv32, i32* %48) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %for.body - %call33 = call i32 @cudaDeviceSynchronize() - %49 = load i32, i32* %blocks, align 4 - %mul35 = mul nsw i32 %49, 2 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp34, i32 %mul35, i32 1, i32 1) - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp36, i32 256, i32 1, i32 1) - %50 = bitcast { i64, i32 }* %agg.tmp34.coerce to i8* - %51 = bitcast %struct.dim3* %agg.tmp34 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %50, i8* align 4 %51, i64 12, i1 false) - %52 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp34.coerce, i32 0, i32 0 - %53 = load i64, i64* %52, align 4 - %54 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp34.coerce, i32 0, i32 1 - %55 = load i32, i32* %54, align 4 - %56 = bitcast { i64, i32 }* %agg.tmp36.coerce to i8* - %57 = bitcast %struct.dim3* %agg.tmp36 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %56, i8* align 4 %57, i64 12, i1 false) - %58 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp36.coerce, i32 0, i32 0 - %59 = load i64, i64* %58, align 4 - %60 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp36.coerce, i32 0, i32 1 - %61 = load i32, i32* %60, align 4 - %call37 = call i32 @__cudaPushCallConfiguration(i64 %53, i32 %55, i64 %59, i32 %61, i64 0, i8* null) - %tobool38 = icmp ne i32 %call37, 0 - br i1 %tobool38, label %kcall.end41, label %kcall.configok39 - -kcall.configok39: ; preds = %kcall.end - %62 = load i8*, i8** %dev_buffer1, align 8 - %63 = load i32, i32* %partSize, align 4 - %conv40 = sext i32 %63 to i64 - %64 = load i32*, i32** %dev_histo, align 8 - call void @_Z12histo_kernelPhlPj(i8* %62, i64 %conv40, i32* %64) - br label %kcall.end41 - -kcall.end41: ; preds = %kcall.configok39, %kcall.end - %call42 = call i32 @cudaDeviceSynchronize() - br label %for.inc - -for.inc: ; preds = %kcall.end41 - %65 = load i32, i32* %partialNum, align 4 - %mul43 = mul nsw i32 %65, 2 - %66 = load i32, i32* %i, align 4 - %add = add nsw i32 %66, %mul43 - store i32 %add, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %67 = load i32*, i32** %freq.addr, align 8 - %68 = bitcast i32* %67 to i8* - %69 = load i32*, i32** %dev_histo, align 8 - %70 = bitcast i32* %69 to i8* - %call44 = call i32 @cudaMemcpy(i8* %68, i8* %70, i64 1024, i32 2) - %71 = load i32*, i32** %dev_histo, align 8 - %72 = bitcast i32* %71 to i8* - %call45 = call i32 @cudaFree(i8* %72) - %73 = load i8*, i8** %dev_buffer0, align 8 - %call46 = call i32 @cudaFree(i8* %73) - %74 = load i8*, i8** %dev_buffer1, align 8 - %call47 = call i32 @cudaFree(i8* %74) - ret i32 0 -} - -declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #1 - -declare dso_local void @perror(i8*) #1 - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #5 - -declare dso_local i32 @fseek(%struct._IO_FILE*, i64, i32) #1 - -declare dso_local i64 @fread(i8*, i64, i64, %struct._IO_FILE*) #1 - -declare dso_local i32 @fputs(i8*, %struct._IO_FILE*) #1 - -declare dso_local i32 @fclose(%struct._IO_FILE*) #1 - -declare dso_local i32 @cudaMalloc(i8**, i64) #1 - -declare dso_local i32 @cudaMemset(i8*, i32, i64) #1 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_Z9gpuAssert9cudaErrorPKcib(i32 %code, i8* %file, i32 %line, i1 zeroext %abort) #0 comdat { -entry: - %code.addr = alloca i32, align 4 - %file.addr = alloca i8*, align 8 - %line.addr = alloca i32, align 4 - %abort.addr = alloca i8, align 1 - store i32 %code, i32* %code.addr, align 4 - store i8* %file, i8** %file.addr, align 8 - store i32 %line, i32* %line.addr, align 4 - %frombool = zext i1 %abort to i8 - store i8 %frombool, i8* %abort.addr, align 1 - %0 = load i32, i32* %code.addr, align 4 - %cmp = icmp ne i32 %0, 0 - br i1 %cmp, label %if.then, label %if.end3 - -if.then: ; preds = %entry - %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %2 = load i32, i32* %code.addr, align 4 - %call = call i8* @cudaGetErrorString(i32 %2) - %3 = load i8*, i8** %file.addr, align 8 - %4 = load i32, i32* %line.addr, align 4 - %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.13, i64 0, i64 0), i8* %call, i8* %3, i32 %4) - %5 = load i8, i8* %abort.addr, align 1 - %tobool = trunc i8 %5 to i1 - br i1 %tobool, label %if.then2, label %if.end - -if.then2: ; preds = %if.then - %6 = load i32, i32* %code.addr, align 4 - call void @exit(i32 %6) #16 - unreachable - -if.end: ; preds = %if.then - br label %if.end3 - -if.end3: ; preds = %if.end, %entry - ret void -} - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #1 - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #1 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -declare dso_local i32 @cudaDeviceSynchronize() #1 - -declare dso_local i32 @cudaFree(i8*) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z9printBitsji(i32 %val, i32 %numbits) #0 { -entry: - %val.addr = alloca i32, align 4 - %numbits.addr = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 %val, i32* %val.addr, align 4 - store i32 %numbits, i32* %numbits.addr, align 4 - %0 = load i32, i32* %numbits.addr, align 4 - %sub = sub nsw i32 %0, 1 - store i32 %sub, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %1 = load i32, i32* %i, align 4 - %cmp = icmp sge i32 %1, 0 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load i32, i32* %val.addr, align 4 - %3 = load i32, i32* %i, align 4 - %shr = lshr i32 %2, %3 - %and = and i32 %shr, 1 - %add = add i32 48, %and - %call = call i32 @putchar(i32 %add) - br label %for.inc - -for.inc: ; preds = %for.body - %4 = load i32, i32* %i, align 4 - %dec = add nsw i32 %4, -1 - store i32 %dec, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -declare dso_local i32 @putchar(i32) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local %class.INode* @_Z9BuildTreeRA256_j([256 x i32]* dereferenceable(1024) %frequencies) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %frequencies.addr = alloca [256 x i32]*, align 8 - %trees = alloca %"class.std::priority_queue", align 8 - %ref.tmp = alloca %struct.NodeCmp, align 1 - %ref.tmp1 = alloca %"class.std::vector", align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %i = alloca i32, align 4 - %ref.tmp4 = alloca %class.INode*, align 8 - %childR = alloca %class.INode*, align 8 - %childL = alloca %class.INode*, align 8 - %parent = alloca %class.INode*, align 8 - store [256 x i32]* %frequencies, [256 x i32]** %frequencies.addr, align 8 - call void @_ZNSt6vectorIP5INodeSaIS1_EEC2Ev(%"class.std::vector"* %ref.tmp1) - invoke void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpEC2ERKS5_RKS4_(%"class.std::priority_queue"* %trees, %struct.NodeCmp* dereferenceable(1) %ref.tmp, %"class.std::vector"* dereferenceable(24) %ref.tmp1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - call void @_ZNSt6vectorIP5INodeSaIS1_EED2Ev(%"class.std::vector"* %ref.tmp1) - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %invoke.cont - %0 = load i32, i32* %i, align 4 - %cmp = icmp slt i32 %0, 256 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %1 = load [256 x i32]*, [256 x i32]** %frequencies.addr, align 8 - %2 = load i32, i32* %i, align 4 - %idxprom = sext i32 %2 to i64 - %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* %1, i64 0, i64 %idxprom - %3 = load i32, i32* %arrayidx, align 4 - %cmp3 = icmp ne i32 %3, 0 - br i1 %cmp3, label %if.then, label %if.end - -if.then: ; preds = %for.body - %call = invoke i8* @_Znwm(i64 16) #17 - to label %invoke.cont6 unwind label %lpad5 - -invoke.cont6: ; preds = %if.then - %4 = bitcast i8* %call to %class.LeafNode* - %5 = load [256 x i32]*, [256 x i32]** %frequencies.addr, align 8 - %6 = load i32, i32* %i, align 4 - %idxprom7 = sext i32 %6 to i64 - %arrayidx8 = getelementptr inbounds [256 x i32], [256 x i32]* %5, i64 0, i64 %idxprom7 - %7 = load i32, i32* %arrayidx8, align 4 - %8 = load i32, i32* %i, align 4 - %conv = trunc i32 %8 to i8 - invoke void @_ZN8LeafNodeC2Eic(%class.LeafNode* %4, i32 %7, i8 signext %conv) - to label %invoke.cont10 unwind label %lpad9 - -invoke.cont10: ; preds = %invoke.cont6 - %9 = bitcast %class.LeafNode* %4 to %class.INode* - store %class.INode* %9, %class.INode** %ref.tmp4, align 8 - invoke void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE4pushERKS1_(%"class.std::priority_queue"* %trees, %class.INode** dereferenceable(8) %ref.tmp4) - to label %invoke.cont11 unwind label %lpad5 - -invoke.cont11: ; preds = %invoke.cont10 - br label %if.end - -lpad: ; preds = %entry - %10 = landingpad { i8*, i32 } - cleanup - %11 = extractvalue { i8*, i32 } %10, 0 - store i8* %11, i8** %exn.slot, align 8 - %12 = extractvalue { i8*, i32 } %10, 1 - store i32 %12, i32* %ehselector.slot, align 4 - invoke void @_ZNSt6vectorIP5INodeSaIS1_EED2Ev(%"class.std::vector"* %ref.tmp1) - to label %invoke.cont2 unwind label %terminate.lpad - -invoke.cont2: ; preds = %lpad - br label %eh.resume - -lpad5: ; preds = %while.end, %invoke.cont24, %invoke.cont20, %invoke.cont18, %invoke.cont17, %invoke.cont15, %while.body, %while.cond, %invoke.cont10, %if.then - %13 = landingpad { i8*, i32 } - cleanup - %14 = extractvalue { i8*, i32 } %13, 0 - store i8* %14, i8** %exn.slot, align 8 - %15 = extractvalue { i8*, i32 } %13, 1 - store i32 %15, i32* %ehselector.slot, align 4 - br label %ehcleanup - -lpad9: ; preds = %invoke.cont6 - %16 = landingpad { i8*, i32 } - cleanup - %17 = extractvalue { i8*, i32 } %16, 0 - store i8* %17, i8** %exn.slot, align 8 - %18 = extractvalue { i8*, i32 } %16, 1 - store i32 %18, i32* %ehselector.slot, align 4 - call void @_ZdlPv(i8* %call) #18 - br label %ehcleanup - -if.end: ; preds = %invoke.cont11, %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %19 = load i32, i32* %i, align 4 - %inc = add nsw i32 %19, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - br label %while.cond - -while.cond: ; preds = %invoke.cont25, %for.end - %call13 = invoke i64 @_ZNKSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE4sizeEv(%"class.std::priority_queue"* %trees) - to label %invoke.cont12 unwind label %lpad5 - -invoke.cont12: ; preds = %while.cond - %cmp14 = icmp ugt i64 %call13, 1 - br i1 %cmp14, label %while.body, label %while.end - -while.body: ; preds = %invoke.cont12 - %call16 = invoke dereferenceable(8) %class.INode** @_ZNKSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE3topEv(%"class.std::priority_queue"* %trees) - to label %invoke.cont15 unwind label %lpad5 - -invoke.cont15: ; preds = %while.body - %20 = load %class.INode*, %class.INode** %call16, align 8 - store %class.INode* %20, %class.INode** %childR, align 8 - invoke void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE3popEv(%"class.std::priority_queue"* %trees) - to label %invoke.cont17 unwind label %lpad5 - -invoke.cont17: ; preds = %invoke.cont15 - %call19 = invoke dereferenceable(8) %class.INode** @_ZNKSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE3topEv(%"class.std::priority_queue"* %trees) - to label %invoke.cont18 unwind label %lpad5 - -invoke.cont18: ; preds = %invoke.cont17 - %21 = load %class.INode*, %class.INode** %call19, align 8 - store %class.INode* %21, %class.INode** %childL, align 8 - invoke void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE3popEv(%"class.std::priority_queue"* %trees) - to label %invoke.cont20 unwind label %lpad5 - -invoke.cont20: ; preds = %invoke.cont18 - %call22 = invoke i8* @_Znwm(i64 32) #17 - to label %invoke.cont21 unwind label %lpad5 - -invoke.cont21: ; preds = %invoke.cont20 - %22 = bitcast i8* %call22 to %class.InternalNode* - %23 = load %class.INode*, %class.INode** %childR, align 8 - %24 = load %class.INode*, %class.INode** %childL, align 8 - invoke void @_ZN12InternalNodeC2EP5INodeS1_(%class.InternalNode* %22, %class.INode* %23, %class.INode* %24) - to label %invoke.cont24 unwind label %lpad23 - -invoke.cont24: ; preds = %invoke.cont21 - %25 = bitcast %class.InternalNode* %22 to %class.INode* - store %class.INode* %25, %class.INode** %parent, align 8 - invoke void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE4pushERKS1_(%"class.std::priority_queue"* %trees, %class.INode** dereferenceable(8) %parent) - to label %invoke.cont25 unwind label %lpad5 - -invoke.cont25: ; preds = %invoke.cont24 - br label %while.cond - -lpad23: ; preds = %invoke.cont21 - %26 = landingpad { i8*, i32 } - cleanup - %27 = extractvalue { i8*, i32 } %26, 0 - store i8* %27, i8** %exn.slot, align 8 - %28 = extractvalue { i8*, i32 } %26, 1 - store i32 %28, i32* %ehselector.slot, align 4 - call void @_ZdlPv(i8* %call22) #18 - br label %ehcleanup - -while.end: ; preds = %invoke.cont12 - %call27 = invoke dereferenceable(8) %class.INode** @_ZNKSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE3topEv(%"class.std::priority_queue"* %trees) - to label %invoke.cont26 unwind label %lpad5 - -invoke.cont26: ; preds = %while.end - %29 = load %class.INode*, %class.INode** %call27, align 8 - call void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpED2Ev(%"class.std::priority_queue"* %trees) - ret %class.INode* %29 - -ehcleanup: ; preds = %lpad23, %lpad9, %lpad5 - invoke void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpED2Ev(%"class.std::priority_queue"* %trees) - to label %invoke.cont28 unwind label %terminate.lpad - -invoke.cont28: ; preds = %ehcleanup - br label %eh.resume - -eh.resume: ; preds = %invoke.cont28, %invoke.cont2 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val29 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val29 - -terminate.lpad: ; preds = %ehcleanup, %lpad - %30 = landingpad { i8*, i32 } - catch i8* null - %31 = extractvalue { i8*, i32 } %30, 0 - call void @__clang_call_terminate(i8* %31) #16 - unreachable -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIP5INodeSaIS1_EEC2Ev(%"class.std::vector"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - call void @_ZNSt12_Vector_baseIP5INodeSaIS1_EEC2Ev(%"struct.std::_Vector_base"* %0) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpEC2ERKS5_RKS4_(%"class.std::priority_queue"* %this, %struct.NodeCmp* dereferenceable(1) %__x, %"class.std::vector"* dereferenceable(24) %__s) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"class.std::priority_queue"*, align 8 - %__x.addr = alloca %struct.NodeCmp*, align 8 - %__s.addr = alloca %"class.std::vector"*, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %agg.tmp3 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp8 = alloca %struct.NodeCmp, align 1 - store %"class.std::priority_queue"* %this, %"class.std::priority_queue"** %this.addr, align 8 - store %struct.NodeCmp* %__x, %struct.NodeCmp** %__x.addr, align 8 - store %"class.std::vector"* %__s, %"class.std::vector"** %__s.addr, align 8 - %this1 = load %"class.std::priority_queue"*, %"class.std::priority_queue"** %this.addr, align 8 - %c = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - %0 = load %"class.std::vector"*, %"class.std::vector"** %__s.addr, align 8 - call void @_ZNSt6vectorIP5INodeSaIS1_EEC2ERKS3_(%"class.std::vector"* %c, %"class.std::vector"* dereferenceable(24) %0) - %comp = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 1 - %1 = load %struct.NodeCmp*, %struct.NodeCmp** %__x.addr, align 8 - %c2 = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - %call = invoke %class.INode** @_ZNSt6vectorIP5INodeSaIS1_EE5beginEv(%"class.std::vector"* %c2) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - store %class.INode** %call, %class.INode*** %coerce.dive, align 8 - %c4 = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - %call6 = invoke %class.INode** @_ZNSt6vectorIP5INodeSaIS1_EE3endEv(%"class.std::vector"* %c4) - to label %invoke.cont5 unwind label %lpad - -invoke.cont5: ; preds = %invoke.cont - %coerce.dive7 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp3, i32 0, i32 0 - store %class.INode** %call6, %class.INode*** %coerce.dive7, align 8 - %comp9 = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 1 - %coerce.dive10 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - %2 = load %class.INode**, %class.INode*** %coerce.dive10, align 8 - %coerce.dive11 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp3, i32 0, i32 0 - %3 = load %class.INode**, %class.INode*** %coerce.dive11, align 8 - invoke void @_ZSt9make_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEE7NodeCmpEvT_SA_T0_(%class.INode** %2, %class.INode** %3) - to label %invoke.cont12 unwind label %lpad - -invoke.cont12: ; preds = %invoke.cont5 - ret void - -lpad: ; preds = %invoke.cont5, %invoke.cont, %entry - %4 = landingpad { i8*, i32 } - cleanup - %5 = extractvalue { i8*, i32 } %4, 0 - store i8* %5, i8** %exn.slot, align 8 - %6 = extractvalue { i8*, i32 } %4, 1 - store i32 %6, i32* %ehselector.slot, align 4 - invoke void @_ZNSt6vectorIP5INodeSaIS1_EED2Ev(%"class.std::vector"* %c) - to label %invoke.cont13 unwind label %terminate.lpad - -invoke.cont13: ; preds = %lpad - br label %eh.resume - -eh.resume: ; preds = %invoke.cont13 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val14 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val14 - -terminate.lpad: ; preds = %lpad - %7 = landingpad { i8*, i32 } - catch i8* null - %8 = extractvalue { i8*, i32 } %7, 0 - call void @__clang_call_terminate(i8* %8) #16 - unreachable -} - -declare dso_local i32 @__gxx_personality_v0(...) - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIP5INodeSaIS1_EED2Ev(%"class.std::vector"* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - %1 = load %class.INode**, %class.INode*** %_M_start, align 8 - %2 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %2, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 1 - %3 = load %class.INode**, %class.INode*** %_M_finish, align 8 - %4 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call = invoke dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %4) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - invoke void @_ZSt8_DestroyIPP5INodeS1_EvT_S3_RSaIT0_E(%class.INode** %1, %class.INode** %3, %"class.std::allocator"* dereferenceable(1) %call) - to label %invoke.cont3 unwind label %lpad - -invoke.cont3: ; preds = %invoke.cont - %5 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - call void @_ZNSt12_Vector_baseIP5INodeSaIS1_EED2Ev(%"struct.std::_Vector_base"* %5) - ret void - -lpad: ; preds = %invoke.cont, %entry - %6 = landingpad { i8*, i32 } - cleanup - %7 = extractvalue { i8*, i32 } %6, 0 - store i8* %7, i8** %exn.slot, align 8 - %8 = extractvalue { i8*, i32 } %6, 1 - store i32 %8, i32* %ehselector.slot, align 4 - %9 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - invoke void @_ZNSt12_Vector_baseIP5INodeSaIS1_EED2Ev(%"struct.std::_Vector_base"* %9) - to label %invoke.cont4 unwind label %terminate.lpad - -invoke.cont4: ; preds = %lpad - br label %eh.resume - -eh.resume: ; preds = %invoke.cont4 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val5 - -terminate.lpad: ; preds = %lpad - %10 = landingpad { i8*, i32 } - catch i8* null - %11 = extractvalue { i8*, i32 } %10, 0 - call void @__clang_call_terminate(i8* %11) #16 - unreachable -} - -; Function Attrs: noinline noreturn nounwind -define linkonce_odr hidden void @__clang_call_terminate(i8* %0) #7 comdat { - %2 = call i8* @__cxa_begin_catch(i8* %0) #3 - call void @_ZSt9terminatev() #16 - unreachable -} - -declare dso_local i8* @__cxa_begin_catch(i8*) - -declare dso_local void @_ZSt9terminatev() - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE4pushERKS1_(%"class.std::priority_queue"* %this, %class.INode** dereferenceable(8) %__x) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::priority_queue"*, align 8 - %__x.addr = alloca %class.INode**, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp3 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp7 = alloca %struct.NodeCmp, align 1 - store %"class.std::priority_queue"* %this, %"class.std::priority_queue"** %this.addr, align 8 - store %class.INode** %__x, %class.INode*** %__x.addr, align 8 - %this1 = load %"class.std::priority_queue"*, %"class.std::priority_queue"** %this.addr, align 8 - %c = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - %0 = load %class.INode**, %class.INode*** %__x.addr, align 8 - call void @_ZNSt6vectorIP5INodeSaIS1_EE9push_backERKS1_(%"class.std::vector"* %c, %class.INode** dereferenceable(8) %0) - %c2 = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - %call = call %class.INode** @_ZNSt6vectorIP5INodeSaIS1_EE5beginEv(%"class.std::vector"* %c2) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - store %class.INode** %call, %class.INode*** %coerce.dive, align 8 - %c4 = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - %call5 = call %class.INode** @_ZNSt6vectorIP5INodeSaIS1_EE3endEv(%"class.std::vector"* %c4) - %coerce.dive6 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp3, i32 0, i32 0 - store %class.INode** %call5, %class.INode*** %coerce.dive6, align 8 - %comp = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 1 - %coerce.dive8 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - %1 = load %class.INode**, %class.INode*** %coerce.dive8, align 8 - %coerce.dive9 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp3, i32 0, i32 0 - %2 = load %class.INode**, %class.INode*** %coerce.dive9, align 8 - call void @_ZSt9push_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEE7NodeCmpEvT_SA_T0_(%class.INode** %1, %class.INode** %2) - ret void -} - -; Function Attrs: nobuiltin -declare dso_local noalias i8* @_Znwm(i64) #8 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN8LeafNodeC2Eic(%class.LeafNode* %this, i32 %f, i8 signext %c) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %class.LeafNode*, align 8 - %f.addr = alloca i32, align 4 - %c.addr = alloca i8, align 1 - store %class.LeafNode* %this, %class.LeafNode** %this.addr, align 8 - store i32 %f, i32* %f.addr, align 4 - store i8 %c, i8* %c.addr, align 1 - %this1 = load %class.LeafNode*, %class.LeafNode** %this.addr, align 8 - %0 = bitcast %class.LeafNode* %this1 to %class.INode* - %1 = load i32, i32* %f.addr, align 4 - call void @_ZN5INodeC2Ei(%class.INode* %0, i32 %1) - %2 = bitcast %class.LeafNode* %this1 to i32 (...)*** - store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV8LeafNode, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %2, align 8 - %c2 = getelementptr inbounds %class.LeafNode, %class.LeafNode* %this1, i32 0, i32 1 - %3 = load i8, i8* %c.addr, align 1 - store i8 %3, i8* %c2, align 4 - ret void -} - -; Function Attrs: nobuiltin nounwind -declare dso_local void @_ZdlPv(i8*) #9 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE4sizeEv(%"class.std::priority_queue"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::priority_queue"*, align 8 - store %"class.std::priority_queue"* %this, %"class.std::priority_queue"** %this.addr, align 8 - %this1 = load %"class.std::priority_queue"*, %"class.std::priority_queue"** %this.addr, align 8 - %c = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - %call = call i64 @_ZNKSt6vectorIP5INodeSaIS1_EE4sizeEv(%"class.std::vector"* %c) - ret i64 %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %class.INode** @_ZNKSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE3topEv(%"class.std::priority_queue"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::priority_queue"*, align 8 - store %"class.std::priority_queue"* %this, %"class.std::priority_queue"** %this.addr, align 8 - %this1 = load %"class.std::priority_queue"*, %"class.std::priority_queue"** %this.addr, align 8 - %c = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - %call = call dereferenceable(8) %class.INode** @_ZNKSt6vectorIP5INodeSaIS1_EE5frontEv(%"class.std::vector"* %c) - ret %class.INode** %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpE3popEv(%"class.std::priority_queue"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::priority_queue"*, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp2 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp6 = alloca %struct.NodeCmp, align 1 - store %"class.std::priority_queue"* %this, %"class.std::priority_queue"** %this.addr, align 8 - %this1 = load %"class.std::priority_queue"*, %"class.std::priority_queue"** %this.addr, align 8 - %c = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - %call = call %class.INode** @_ZNSt6vectorIP5INodeSaIS1_EE5beginEv(%"class.std::vector"* %c) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - store %class.INode** %call, %class.INode*** %coerce.dive, align 8 - %c3 = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - %call4 = call %class.INode** @_ZNSt6vectorIP5INodeSaIS1_EE3endEv(%"class.std::vector"* %c3) - %coerce.dive5 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp2, i32 0, i32 0 - store %class.INode** %call4, %class.INode*** %coerce.dive5, align 8 - %comp = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 1 - %coerce.dive7 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - %0 = load %class.INode**, %class.INode*** %coerce.dive7, align 8 - %coerce.dive8 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp2, i32 0, i32 0 - %1 = load %class.INode**, %class.INode*** %coerce.dive8, align 8 - call void @_ZSt8pop_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEE7NodeCmpEvT_SA_T0_(%class.INode** %0, %class.INode** %1) - %c9 = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - call void @_ZNSt6vectorIP5INodeSaIS1_EE8pop_backEv(%"class.std::vector"* %c9) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN12InternalNodeC2EP5INodeS1_(%class.InternalNode* %this, %class.INode* %c0, %class.INode* %c1) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.InternalNode*, align 8 - %c0.addr = alloca %class.INode*, align 8 - %c1.addr = alloca %class.INode*, align 8 - store %class.InternalNode* %this, %class.InternalNode** %this.addr, align 8 - store %class.INode* %c0, %class.INode** %c0.addr, align 8 - store %class.INode* %c1, %class.INode** %c1.addr, align 8 - %this1 = load %class.InternalNode*, %class.InternalNode** %this.addr, align 8 - %0 = bitcast %class.InternalNode* %this1 to %class.INode* - %1 = load %class.INode*, %class.INode** %c0.addr, align 8 - %f = getelementptr inbounds %class.INode, %class.INode* %1, i32 0, i32 1 - %2 = load i32, i32* %f, align 8 - %3 = load %class.INode*, %class.INode** %c1.addr, align 8 - %f2 = getelementptr inbounds %class.INode, %class.INode* %3, i32 0, i32 1 - %4 = load i32, i32* %f2, align 8 - %add = add nsw i32 %2, %4 - call void @_ZN5INodeC2Ei(%class.INode* %0, i32 %add) - %5 = bitcast %class.InternalNode* %this1 to i32 (...)*** - store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV12InternalNode, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %5, align 8 - %left = getelementptr inbounds %class.InternalNode, %class.InternalNode* %this1, i32 0, i32 1 - %6 = load %class.INode*, %class.INode** %c0.addr, align 8 - store %class.INode* %6, %class.INode** %left, align 8 - %right = getelementptr inbounds %class.InternalNode, %class.InternalNode* %this1, i32 0, i32 2 - %7 = load %class.INode*, %class.INode** %c1.addr, align 8 - store %class.INode* %7, %class.INode** %right, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt14priority_queueIP5INodeSt6vectorIS1_SaIS1_EE7NodeCmpED2Ev(%"class.std::priority_queue"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::priority_queue"*, align 8 - store %"class.std::priority_queue"* %this, %"class.std::priority_queue"** %this.addr, align 8 - %this1 = load %"class.std::priority_queue"*, %"class.std::priority_queue"** %this.addr, align 8 - %c = getelementptr inbounds %"class.std::priority_queue", %"class.std::priority_queue"* %this1, i32 0, i32 0 - call void @_ZNSt6vectorIP5INodeSaIS1_EED2Ev(%"class.std::vector"* %c) - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z13GenerateCodesPK5INodeRKSt6vectorIbSaIbEERSt3mapIhS4_St4lessIhESaISt4pairIKhS4_EEE(%class.INode* %node, %"class.std::vector.0"* dereferenceable(40) %prefix, %"class.std::map"* dereferenceable(48) %outCodes) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %node.addr = alloca %class.INode*, align 8 - %prefix.addr = alloca %"class.std::vector.0"*, align 8 - %outCodes.addr = alloca %"class.std::map"*, align 8 - %lf = alloca %class.LeafNode*, align 8 - %ref.tmp = alloca i8, align 1 - %in = alloca %class.InternalNode*, align 8 - %leftPrefix = alloca %"class.std::vector.0", align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %rightPrefix = alloca %"class.std::vector.0", align 8 - store %class.INode* %node, %class.INode** %node.addr, align 8 - store %"class.std::vector.0"* %prefix, %"class.std::vector.0"** %prefix.addr, align 8 - store %"class.std::map"* %outCodes, %"class.std::map"** %outCodes.addr, align 8 - %0 = load %class.INode*, %class.INode** %node.addr, align 8 - %1 = icmp eq %class.INode* %0, null - br i1 %1, label %dynamic_cast.null, label %dynamic_cast.notnull - -dynamic_cast.notnull: ; preds = %entry - %2 = bitcast %class.INode* %0 to i8* - %3 = call i8* @__dynamic_cast(i8* %2, i8* bitcast ({ i8*, i8* }* @_ZTI5INode to i8*), i8* bitcast ({ i8*, i8*, i8* }* @_ZTI8LeafNode to i8*), i64 0) #3 - %4 = bitcast i8* %3 to %class.LeafNode* - br label %dynamic_cast.end - -dynamic_cast.null: ; preds = %entry - br label %dynamic_cast.end - -dynamic_cast.end: ; preds = %dynamic_cast.null, %dynamic_cast.notnull - %5 = phi %class.LeafNode* [ %4, %dynamic_cast.notnull ], [ null, %dynamic_cast.null ] - store %class.LeafNode* %5, %class.LeafNode** %lf, align 8 - %6 = load %class.LeafNode*, %class.LeafNode** %lf, align 8 - %tobool = icmp ne %class.LeafNode* %6, null - br i1 %tobool, label %if.then, label %if.else - -if.then: ; preds = %dynamic_cast.end - %7 = load %"class.std::vector.0"*, %"class.std::vector.0"** %prefix.addr, align 8 - %8 = load %"class.std::map"*, %"class.std::map"** %outCodes.addr, align 8 - %9 = load %class.LeafNode*, %class.LeafNode** %lf, align 8 - %c = getelementptr inbounds %class.LeafNode, %class.LeafNode* %9, i32 0, i32 1 - %10 = load i8, i8* %c, align 4 - store i8 %10, i8* %ref.tmp, align 1 - %call = call dereferenceable(40) %"class.std::vector.0"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEEixERS6_(%"class.std::map"* %8, i8* dereferenceable(1) %ref.tmp) - %call1 = call dereferenceable(40) %"class.std::vector.0"* @_ZNSt6vectorIbSaIbEEaSERKS1_(%"class.std::vector.0"* %call, %"class.std::vector.0"* dereferenceable(40) %7) - br label %if.end15 - -if.else: ; preds = %dynamic_cast.end - %11 = load %class.INode*, %class.INode** %node.addr, align 8 - %12 = icmp eq %class.INode* %11, null - br i1 %12, label %dynamic_cast.null3, label %dynamic_cast.notnull2 - -dynamic_cast.notnull2: ; preds = %if.else - %13 = bitcast %class.INode* %11 to i8* - %14 = call i8* @__dynamic_cast(i8* %13, i8* bitcast ({ i8*, i8* }* @_ZTI5INode to i8*), i8* bitcast ({ i8*, i8*, i8* }* @_ZTI12InternalNode to i8*), i64 0) #3 - %15 = bitcast i8* %14 to %class.InternalNode* - br label %dynamic_cast.end4 - -dynamic_cast.null3: ; preds = %if.else - br label %dynamic_cast.end4 - -dynamic_cast.end4: ; preds = %dynamic_cast.null3, %dynamic_cast.notnull2 - %16 = phi %class.InternalNode* [ %15, %dynamic_cast.notnull2 ], [ null, %dynamic_cast.null3 ] - store %class.InternalNode* %16, %class.InternalNode** %in, align 8 - %17 = load %class.InternalNode*, %class.InternalNode** %in, align 8 - %tobool5 = icmp ne %class.InternalNode* %17, null - br i1 %tobool5, label %if.then6, label %if.end - -if.then6: ; preds = %dynamic_cast.end4 - %18 = load %"class.std::vector.0"*, %"class.std::vector.0"** %prefix.addr, align 8 - call void @_ZNSt6vectorIbSaIbEEC2ERKS1_(%"class.std::vector.0"* %leftPrefix, %"class.std::vector.0"* dereferenceable(40) %18) - invoke void @_ZNSt6vectorIbSaIbEE9push_backEb(%"class.std::vector.0"* %leftPrefix, i1 zeroext false) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %if.then6 - %19 = load %class.InternalNode*, %class.InternalNode** %in, align 8 - %left = getelementptr inbounds %class.InternalNode, %class.InternalNode* %19, i32 0, i32 1 - %20 = load %class.INode*, %class.INode** %left, align 8 - %21 = load %"class.std::map"*, %"class.std::map"** %outCodes.addr, align 8 - invoke void @_Z13GenerateCodesPK5INodeRKSt6vectorIbSaIbEERSt3mapIhS4_St4lessIhESaISt4pairIKhS4_EEE(%class.INode* %20, %"class.std::vector.0"* dereferenceable(40) %leftPrefix, %"class.std::map"* dereferenceable(48) %21) - to label %invoke.cont7 unwind label %lpad - -invoke.cont7: ; preds = %invoke.cont - %22 = load %"class.std::vector.0"*, %"class.std::vector.0"** %prefix.addr, align 8 - invoke void @_ZNSt6vectorIbSaIbEEC2ERKS1_(%"class.std::vector.0"* %rightPrefix, %"class.std::vector.0"* dereferenceable(40) %22) - to label %invoke.cont8 unwind label %lpad - -invoke.cont8: ; preds = %invoke.cont7 - invoke void @_ZNSt6vectorIbSaIbEE9push_backEb(%"class.std::vector.0"* %rightPrefix, i1 zeroext true) - to label %invoke.cont10 unwind label %lpad9 - -invoke.cont10: ; preds = %invoke.cont8 - %23 = load %class.InternalNode*, %class.InternalNode** %in, align 8 - %right = getelementptr inbounds %class.InternalNode, %class.InternalNode* %23, i32 0, i32 2 - %24 = load %class.INode*, %class.INode** %right, align 8 - %25 = load %"class.std::map"*, %"class.std::map"** %outCodes.addr, align 8 - invoke void @_Z13GenerateCodesPK5INodeRKSt6vectorIbSaIbEERSt3mapIhS4_St4lessIhESaISt4pairIKhS4_EEE(%class.INode* %24, %"class.std::vector.0"* dereferenceable(40) %rightPrefix, %"class.std::map"* dereferenceable(48) %25) - to label %invoke.cont11 unwind label %lpad9 - -invoke.cont11: ; preds = %invoke.cont10 - invoke void @_ZNSt6vectorIbSaIbEED2Ev(%"class.std::vector.0"* %rightPrefix) - to label %invoke.cont12 unwind label %lpad - -invoke.cont12: ; preds = %invoke.cont11 - call void @_ZNSt6vectorIbSaIbEED2Ev(%"class.std::vector.0"* %leftPrefix) - br label %if.end - -lpad: ; preds = %invoke.cont11, %invoke.cont7, %invoke.cont, %if.then6 - %26 = landingpad { i8*, i32 } - cleanup - %27 = extractvalue { i8*, i32 } %26, 0 - store i8* %27, i8** %exn.slot, align 8 - %28 = extractvalue { i8*, i32 } %26, 1 - store i32 %28, i32* %ehselector.slot, align 4 - br label %ehcleanup - -lpad9: ; preds = %invoke.cont10, %invoke.cont8 - %29 = landingpad { i8*, i32 } - cleanup - %30 = extractvalue { i8*, i32 } %29, 0 - store i8* %30, i8** %exn.slot, align 8 - %31 = extractvalue { i8*, i32 } %29, 1 - store i32 %31, i32* %ehselector.slot, align 4 - invoke void @_ZNSt6vectorIbSaIbEED2Ev(%"class.std::vector.0"* %rightPrefix) - to label %invoke.cont13 unwind label %terminate.lpad - -invoke.cont13: ; preds = %lpad9 - br label %ehcleanup - -ehcleanup: ; preds = %invoke.cont13, %lpad - invoke void @_ZNSt6vectorIbSaIbEED2Ev(%"class.std::vector.0"* %leftPrefix) - to label %invoke.cont14 unwind label %terminate.lpad - -invoke.cont14: ; preds = %ehcleanup - br label %eh.resume - -if.end: ; preds = %invoke.cont12, %dynamic_cast.end4 - br label %if.end15 - -if.end15: ; preds = %if.end, %if.then - ret void - -eh.resume: ; preds = %invoke.cont14 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val16 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val16 - -terminate.lpad: ; preds = %ehcleanup, %lpad9 - %32 = landingpad { i8*, i32 } - catch i8* null - %33 = extractvalue { i8*, i32 } %32, 0 - call void @__clang_call_terminate(i8* %33) #16 - unreachable -} - -; Function Attrs: nounwind readonly -declare dso_local i8* @__dynamic_cast(i8*, i8*, i8*, i64) #10 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(40) %"class.std::vector.0"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEEixERS6_(%"class.std::map"* %this, i8* dereferenceable(1) %__k) #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"class.std::map"*, align 8 - %__k.addr = alloca i8*, align 8 - %__i = alloca %"struct.std::_Rb_tree_iterator", align 8 - %ref.tmp = alloca %"struct.std::_Rb_tree_iterator", align 8 - %ref.tmp5 = alloca %"struct.std::less", align 1 - %undef.agg.tmp = alloca %"struct.std::less", align 1 - %ref.tmp8 = alloca %"struct.std::_Rb_tree_iterator", align 8 - %agg.tmp = alloca %"struct.std::_Rb_tree_iterator", align 8 - %ref.tmp9 = alloca %"struct.std::pair", align 8 - %ref.tmp10 = alloca %"class.std::vector.0", align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"class.std::map"* %this, %"class.std::map"** %this.addr, align 8 - store i8* %__k, i8** %__k.addr, align 8 - %this1 = load %"class.std::map"*, %"class.std::map"** %this.addr, align 8 - %0 = load i8*, i8** %__k.addr, align 8 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE11lower_boundERS6_(%"class.std::map"* %this1, i8* dereferenceable(1) %0) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__i, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - %call2 = call %"struct.std::_Rb_tree_node_base"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE3endEv(%"class.std::map"* %this1) - %coerce.dive3 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %ref.tmp, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call2, %"struct.std::_Rb_tree_node_base"** %coerce.dive3, align 8 - %call4 = call zeroext i1 @_ZNKSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEeqERKS6_(%"struct.std::_Rb_tree_iterator"* %__i, %"struct.std::_Rb_tree_iterator"* dereferenceable(8) %ref.tmp) - br i1 %call4, label %lor.end, label %lor.rhs - -lor.rhs: ; preds = %entry - call void @_ZNKSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE8key_compEv(%"class.std::map"* %this1) - %1 = load i8*, i8** %__k.addr, align 8 - %call6 = call dereferenceable(48) %"struct.std::pair"* @_ZNKSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEdeEv(%"struct.std::_Rb_tree_iterator"* %__i) - %first = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %call6, i32 0, i32 0 - %call7 = call zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %ref.tmp5, i8* dereferenceable(1) %1, i8* dereferenceable(1) %first) - br label %lor.end - -lor.end: ; preds = %lor.rhs, %entry - %2 = phi i1 [ true, %entry ], [ %call7, %lor.rhs ] - br i1 %2, label %if.then, label %if.end - -if.then: ; preds = %lor.end - %3 = bitcast %"struct.std::_Rb_tree_iterator"* %agg.tmp to i8* - %4 = bitcast %"struct.std::_Rb_tree_iterator"* %__i to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 8, i1 false) - %5 = load i8*, i8** %__k.addr, align 8 - call void @_ZNSt6vectorIbSaIbEEC2Ev(%"class.std::vector.0"* %ref.tmp10) - invoke void @_ZNSt4pairIKhSt6vectorIbSaIbEEEC2ERS0_RKS3_(%"struct.std::pair"* %ref.tmp9, i8* dereferenceable(1) %5, %"class.std::vector.0"* dereferenceable(40) %ref.tmp10) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %if.then - %coerce.dive11 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %agg.tmp, i32 0, i32 0 - %6 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive11, align 8 - %call14 = invoke %"struct.std::_Rb_tree_node_base"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE6insertESt17_Rb_tree_iteratorIS7_ERKS7_(%"class.std::map"* %this1, %"struct.std::_Rb_tree_node_base"* %6, %"struct.std::pair"* dereferenceable(48) %ref.tmp9) - to label %invoke.cont13 unwind label %lpad12 - -invoke.cont13: ; preds = %invoke.cont - %coerce.dive15 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %ref.tmp8, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call14, %"struct.std::_Rb_tree_node_base"** %coerce.dive15, align 8 - %7 = bitcast %"struct.std::_Rb_tree_iterator"* %__i to i8* - %8 = bitcast %"struct.std::_Rb_tree_iterator"* %ref.tmp8 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %7, i8* align 8 %8, i64 8, i1 false) - invoke void @_ZNSt4pairIKhSt6vectorIbSaIbEEED2Ev(%"struct.std::pair"* %ref.tmp9) - to label %invoke.cont16 unwind label %lpad - -invoke.cont16: ; preds = %invoke.cont13 - call void @_ZNSt6vectorIbSaIbEED2Ev(%"class.std::vector.0"* %ref.tmp10) - br label %if.end - -lpad: ; preds = %invoke.cont13, %if.then - %9 = landingpad { i8*, i32 } - cleanup - %10 = extractvalue { i8*, i32 } %9, 0 - store i8* %10, i8** %exn.slot, align 8 - %11 = extractvalue { i8*, i32 } %9, 1 - store i32 %11, i32* %ehselector.slot, align 4 - br label %ehcleanup - -lpad12: ; preds = %invoke.cont - %12 = landingpad { i8*, i32 } - cleanup - %13 = extractvalue { i8*, i32 } %12, 0 - store i8* %13, i8** %exn.slot, align 8 - %14 = extractvalue { i8*, i32 } %12, 1 - store i32 %14, i32* %ehselector.slot, align 4 - invoke void @_ZNSt4pairIKhSt6vectorIbSaIbEEED2Ev(%"struct.std::pair"* %ref.tmp9) - to label %invoke.cont17 unwind label %terminate.lpad - -invoke.cont17: ; preds = %lpad12 - br label %ehcleanup - -ehcleanup: ; preds = %invoke.cont17, %lpad - invoke void @_ZNSt6vectorIbSaIbEED2Ev(%"class.std::vector.0"* %ref.tmp10) - to label %invoke.cont18 unwind label %terminate.lpad - -invoke.cont18: ; preds = %ehcleanup - br label %eh.resume - -if.end: ; preds = %invoke.cont16, %lor.end - %call19 = call dereferenceable(48) %"struct.std::pair"* @_ZNKSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEdeEv(%"struct.std::_Rb_tree_iterator"* %__i) - %second = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %call19, i32 0, i32 1 - ret %"class.std::vector.0"* %second - -eh.resume: ; preds = %invoke.cont18 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val20 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val20 - -terminate.lpad: ; preds = %ehcleanup, %lpad12 - %15 = landingpad { i8*, i32 } - catch i8* null - %16 = extractvalue { i8*, i32 } %15, 0 - call void @__clang_call_terminate(i8* %16) #16 - unreachable -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(40) %"class.std::vector.0"* @_ZNSt6vectorIbSaIbEEaSERKS1_(%"class.std::vector.0"* %this, %"class.std::vector.0"* dereferenceable(40) %__x) #0 comdat align 2 { -entry: - %retval = alloca %"class.std::vector.0"*, align 8 - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__x.addr = alloca %"class.std::vector.0"*, align 8 - %ref.tmp = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp8 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp10 = alloca %"struct.std::_Bit_iterator", align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - store %"class.std::vector.0"* %__x, %"class.std::vector.0"** %__x.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = load %"class.std::vector.0"*, %"class.std::vector.0"** %__x.addr, align 8 - %cmp = icmp eq %"class.std::vector.0"* %0, %this1 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - store %"class.std::vector.0"* %this1, %"class.std::vector.0"** %retval, align 8 - br label %return - -if.end: ; preds = %entry - %1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %__x.addr, align 8 - %call = call i64 @_ZNKSt6vectorIbSaIbEE4sizeEv(%"class.std::vector.0"* %1) - %call2 = call i64 @_ZNKSt6vectorIbSaIbEE8capacityEv(%"class.std::vector.0"* %this1) - %cmp3 = icmp ugt i64 %call, %call2 - br i1 %cmp3, label %if.then4, label %if.end6 - -if.then4: ; preds = %if.end - %2 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - call void @_ZNSt13_Bvector_baseISaIbEE13_M_deallocateEv(%"struct.std::_Bvector_base"* %2) - %3 = load %"class.std::vector.0"*, %"class.std::vector.0"** %__x.addr, align 8 - %call5 = call i64 @_ZNKSt6vectorIbSaIbEE4sizeEv(%"class.std::vector.0"* %3) - call void @_ZNSt6vectorIbSaIbEE13_M_initializeEm(%"class.std::vector.0"* %this1, i64 %call5) - br label %if.end6 - -if.end6: ; preds = %if.then4, %if.end - %4 = load %"class.std::vector.0"*, %"class.std::vector.0"** %__x.addr, align 8 - %call7 = call { i64*, i32 } @_ZNKSt6vectorIbSaIbEE5beginEv(%"class.std::vector.0"* %4) - %5 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %6 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %5, i32 0, i32 0 - %7 = extractvalue { i64*, i32 } %call7, 0 - store i64* %7, i64** %6, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %5, i32 0, i32 1 - %9 = extractvalue { i64*, i32 } %call7, 1 - store i32 %9, i32* %8, align 8 - %10 = load %"class.std::vector.0"*, %"class.std::vector.0"** %__x.addr, align 8 - %call9 = call { i64*, i32 } @_ZNKSt6vectorIbSaIbEE3endEv(%"class.std::vector.0"* %10) - %11 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp8 to { i64*, i32 }* - %12 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 0 - %13 = extractvalue { i64*, i32 } %call9, 0 - store i64* %13, i64** %12, align 8 - %14 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 1 - %15 = extractvalue { i64*, i32 } %call9, 1 - store i32 %15, i32* %14, align 8 - %call11 = call { i64*, i32 } @_ZNSt6vectorIbSaIbEE5beginEv(%"class.std::vector.0"* %this1) - %16 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp10 to { i64*, i32 }* - %17 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 0 - %18 = extractvalue { i64*, i32 } %call11, 0 - store i64* %18, i64** %17, align 8 - %19 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 1 - %20 = extractvalue { i64*, i32 } %call11, 1 - store i32 %20, i32* %19, align 8 - %21 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %22 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %21, i32 0, i32 0 - %23 = load i64*, i64** %22, align 8 - %24 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %21, i32 0, i32 1 - %25 = load i32, i32* %24, align 8 - %26 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp8 to { i64*, i32 }* - %27 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %26, i32 0, i32 0 - %28 = load i64*, i64** %27, align 8 - %29 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %26, i32 0, i32 1 - %30 = load i32, i32* %29, align 8 - %call12 = call { i64*, i32 } @_ZNSt6vectorIbSaIbEE15_M_copy_alignedESt19_Bit_const_iteratorS2_St13_Bit_iterator(%"class.std::vector.0"* %this1, i64* %23, i32 %25, i64* %28, i32 %30, %"struct.std::_Bit_iterator"* byval(%"struct.std::_Bit_iterator") align 8 %agg.tmp10) - %31 = bitcast %"struct.std::_Bit_iterator"* %ref.tmp to { i64*, i32 }* - %32 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %31, i32 0, i32 0 - %33 = extractvalue { i64*, i32 } %call12, 0 - store i64* %33, i64** %32, align 8 - %34 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %31, i32 0, i32 1 - %35 = extractvalue { i64*, i32 } %call12, 1 - store i32 %35, i32* %34, align 8 - %36 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %36, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 1 - %37 = bitcast %"struct.std::_Bit_iterator"* %_M_finish to i8* - %38 = bitcast %"struct.std::_Bit_iterator"* %ref.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %37, i8* align 8 %38, i64 12, i1 false) - store %"class.std::vector.0"* %this1, %"class.std::vector.0"** %retval, align 8 - br label %return - -return: ; preds = %if.end6, %if.then - %39 = load %"class.std::vector.0"*, %"class.std::vector.0"** %retval, align 8 - ret %"class.std::vector.0"* %39 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIbSaIbEEC2ERKS1_(%"class.std::vector.0"* %this, %"class.std::vector.0"* dereferenceable(40) %__x) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__x.addr = alloca %"class.std::vector.0"*, align 8 - %ref.tmp = alloca %"class.std::allocator.13", align 1 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %agg.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp9 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp12 = alloca %"struct.std::_Bit_iterator", align 8 - %coerce = alloca %"struct.std::_Bit_iterator", align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - store %"class.std::vector.0"* %__x, %"class.std::vector.0"** %__x.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %__x.addr, align 8 - %2 = bitcast %"class.std::vector.0"* %1 to %"struct.std::_Bvector_base"* - %call = call dereferenceable(1) %"class.std::allocator.1"* @_ZNKSt13_Bvector_baseISaIbEE20_M_get_Bit_allocatorEv(%"struct.std::_Bvector_base"* %2) - %call2 = call dereferenceable(1) %"class.std::allocator.1"* @_ZN9__gnu_cxx14__alloc_traitsISaImEE17_S_select_on_copyERKS1_(%"class.std::allocator.1"* dereferenceable(1) %call) - call void @_ZNSaIbEC2ImEERKSaIT_E(%"class.std::allocator.13"* %ref.tmp, %"class.std::allocator.1"* dereferenceable(1) %call2) #3 - invoke void @_ZNSt13_Bvector_baseISaIbEEC2ERKS0_(%"struct.std::_Bvector_base"* %0, %"class.std::allocator.13"* dereferenceable(1) %ref.tmp) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - call void @_ZNSaIbED2Ev(%"class.std::allocator.13"* %ref.tmp) #3 - %3 = load %"class.std::vector.0"*, %"class.std::vector.0"** %__x.addr, align 8 - %call5 = invoke i64 @_ZNKSt6vectorIbSaIbEE4sizeEv(%"class.std::vector.0"* %3) - to label %invoke.cont4 unwind label %lpad3 - -invoke.cont4: ; preds = %invoke.cont - invoke void @_ZNSt6vectorIbSaIbEE13_M_initializeEm(%"class.std::vector.0"* %this1, i64 %call5) - to label %invoke.cont6 unwind label %lpad3 - -invoke.cont6: ; preds = %invoke.cont4 - %4 = load %"class.std::vector.0"*, %"class.std::vector.0"** %__x.addr, align 8 - %call8 = invoke { i64*, i32 } @_ZNKSt6vectorIbSaIbEE5beginEv(%"class.std::vector.0"* %4) - to label %invoke.cont7 unwind label %lpad3 - -invoke.cont7: ; preds = %invoke.cont6 - %5 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %6 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %5, i32 0, i32 0 - %7 = extractvalue { i64*, i32 } %call8, 0 - store i64* %7, i64** %6, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %5, i32 0, i32 1 - %9 = extractvalue { i64*, i32 } %call8, 1 - store i32 %9, i32* %8, align 8 - %10 = load %"class.std::vector.0"*, %"class.std::vector.0"** %__x.addr, align 8 - %call11 = invoke { i64*, i32 } @_ZNKSt6vectorIbSaIbEE3endEv(%"class.std::vector.0"* %10) - to label %invoke.cont10 unwind label %lpad3 - -invoke.cont10: ; preds = %invoke.cont7 - %11 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp9 to { i64*, i32 }* - %12 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 0 - %13 = extractvalue { i64*, i32 } %call11, 0 - store i64* %13, i64** %12, align 8 - %14 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 1 - %15 = extractvalue { i64*, i32 } %call11, 1 - store i32 %15, i32* %14, align 8 - %16 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %16, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 0 - %17 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp12 to i8* - %18 = bitcast %"struct.std::_Bit_iterator"* %_M_start to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %17, i8* align 8 %18, i64 16, i1 false) - %19 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %20 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %19, i32 0, i32 0 - %21 = load i64*, i64** %20, align 8 - %22 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %19, i32 0, i32 1 - %23 = load i32, i32* %22, align 8 - %24 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp9 to { i64*, i32 }* - %25 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %24, i32 0, i32 0 - %26 = load i64*, i64** %25, align 8 - %27 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %24, i32 0, i32 1 - %28 = load i32, i32* %27, align 8 - %call14 = invoke { i64*, i32 } @_ZNSt6vectorIbSaIbEE15_M_copy_alignedESt19_Bit_const_iteratorS2_St13_Bit_iterator(%"class.std::vector.0"* %this1, i64* %21, i32 %23, i64* %26, i32 %28, %"struct.std::_Bit_iterator"* byval(%"struct.std::_Bit_iterator") align 8 %agg.tmp12) - to label %invoke.cont13 unwind label %lpad3 - -invoke.cont13: ; preds = %invoke.cont10 - %29 = bitcast %"struct.std::_Bit_iterator"* %coerce to { i64*, i32 }* - %30 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %29, i32 0, i32 0 - %31 = extractvalue { i64*, i32 } %call14, 0 - store i64* %31, i64** %30, align 8 - %32 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %29, i32 0, i32 1 - %33 = extractvalue { i64*, i32 } %call14, 1 - store i32 %33, i32* %32, align 8 - ret void - -lpad: ; preds = %entry - %34 = landingpad { i8*, i32 } - cleanup - %35 = extractvalue { i8*, i32 } %34, 0 - store i8* %35, i8** %exn.slot, align 8 - %36 = extractvalue { i8*, i32 } %34, 1 - store i32 %36, i32* %ehselector.slot, align 4 - call void @_ZNSaIbED2Ev(%"class.std::allocator.13"* %ref.tmp) #3 - br label %eh.resume - -lpad3: ; preds = %invoke.cont10, %invoke.cont7, %invoke.cont6, %invoke.cont4, %invoke.cont - %37 = landingpad { i8*, i32 } - cleanup - %38 = extractvalue { i8*, i32 } %37, 0 - store i8* %38, i8** %exn.slot, align 8 - %39 = extractvalue { i8*, i32 } %37, 1 - store i32 %39, i32* %ehselector.slot, align 4 - %40 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - invoke void @_ZNSt13_Bvector_baseISaIbEED2Ev(%"struct.std::_Bvector_base"* %40) - to label %invoke.cont15 unwind label %terminate.lpad - -invoke.cont15: ; preds = %lpad3 - br label %eh.resume - -eh.resume: ; preds = %invoke.cont15, %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val16 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val16 - -terminate.lpad: ; preds = %lpad3 - %41 = landingpad { i8*, i32 } - catch i8* null - %42 = extractvalue { i8*, i32 } %41, 0 - call void @__clang_call_terminate(i8* %42) #16 - unreachable -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIbSaIbEE9push_backEb(%"class.std::vector.0"* %this, i1 zeroext %__x) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__x.addr = alloca i8, align 1 - %ref.tmp = alloca %"struct.std::_Bit_reference", align 8 - %ref.tmp3 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp = alloca %"struct.std::_Bit_iterator", align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %frombool = zext i1 %__x to i8 - store i8 %frombool, i8* %__x.addr, align 1 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 1 - %1 = bitcast %"struct.std::_Bit_iterator"* %_M_finish to %"struct.std::_Bit_iterator_base"* - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %1, i32 0, i32 0 - %2 = load i64*, i64** %_M_p, align 8 - %3 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %3, i32 0, i32 0 - %call = call i64* @_ZNKSt13_Bvector_baseISaIbEE13_Bvector_impl11_M_end_addrEv(%"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl2) - %cmp = icmp ne i64* %2, %call - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %4 = load i8, i8* %__x.addr, align 1 - %tobool = trunc i8 %4 to i1 - %5 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl4 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %5, i32 0, i32 0 - %_M_finish5 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl4, i32 0, i32 1 - %call6 = call { i64*, i32 } @_ZNSt13_Bit_iteratorppEi(%"struct.std::_Bit_iterator"* %_M_finish5, i32 0) - %6 = bitcast %"struct.std::_Bit_iterator"* %ref.tmp3 to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - %8 = extractvalue { i64*, i32 } %call6, 0 - store i64* %8, i64** %7, align 8 - %9 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - %10 = extractvalue { i64*, i32 } %call6, 1 - store i32 %10, i32* %9, align 8 - %call7 = call { i64*, i64 } @_ZNKSt13_Bit_iteratordeEv(%"struct.std::_Bit_iterator"* %ref.tmp3) - %11 = bitcast %"struct.std::_Bit_reference"* %ref.tmp to { i64*, i64 }* - %12 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %11, i32 0, i32 0 - %13 = extractvalue { i64*, i64 } %call7, 0 - store i64* %13, i64** %12, align 8 - %14 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %11, i32 0, i32 1 - %15 = extractvalue { i64*, i64 } %call7, 1 - store i64 %15, i64* %14, align 8 - %call8 = call dereferenceable(16) %"struct.std::_Bit_reference"* @_ZNSt14_Bit_referenceaSEb(%"struct.std::_Bit_reference"* %ref.tmp, i1 zeroext %tobool) - br label %if.end - -if.else: ; preds = %entry - %call9 = call { i64*, i32 } @_ZNSt6vectorIbSaIbEE3endEv(%"class.std::vector.0"* %this1) - %16 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %17 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 0 - %18 = extractvalue { i64*, i32 } %call9, 0 - store i64* %18, i64** %17, align 8 - %19 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 1 - %20 = extractvalue { i64*, i32 } %call9, 1 - store i32 %20, i32* %19, align 8 - %21 = load i8, i8* %__x.addr, align 1 - %tobool10 = trunc i8 %21 to i1 - %22 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %23 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %22, i32 0, i32 0 - %24 = load i64*, i64** %23, align 8 - %25 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %22, i32 0, i32 1 - %26 = load i32, i32* %25, align 8 - call void @_ZNSt6vectorIbSaIbEE13_M_insert_auxESt13_Bit_iteratorb(%"class.std::vector.0"* %this1, i64* %24, i32 %26, i1 zeroext %tobool10) - br label %if.end - -if.end: ; preds = %if.else, %if.then - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIbSaIbEED2Ev(%"class.std::vector.0"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - call void @_ZNSt13_Bvector_baseISaIbEED2Ev(%"struct.std::_Bvector_base"* %0) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local i64 @_Z8get_timev() #6 { -entry: - %tv = alloca %struct.timeval, align 8 - %call = call i32 @gettimeofday(%struct.timeval* %tv, %struct.timezone* null) #3 - %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 0 - %0 = load i64, i64* %tv_sec, align 8 - %mul = mul nsw i64 %0, 1000000 - %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 1 - %1 = load i64, i64* %tv_usec, align 8 - %add = add nsw i64 %mul, %1 - ret i64 %add -} - -; Function Attrs: nounwind -declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #11 - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #12 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %num_block_threads = alloca i32, align 4 - %i = alloca i32, align 4 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call zeroext i1 @_Z8InitCUDAv() - br i1 %call, label %if.end, label %if.then - -if.then: ; preds = %entry - store i32 0, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - store i32 256, i32* %num_block_threads, align 4 - %0 = load i32, i32* %argc.addr, align 4 - %cmp = icmp sgt i32 %0, 1 - br i1 %cmp, label %if.then1, label %if.else - -if.then1: ; preds = %if.end - store i32 1, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.then1 - %1 = load i32, i32* %i, align 4 - %2 = load i32, i32* %argc.addr, align 4 - %cmp2 = icmp slt i32 %1, %2 - br i1 %cmp2, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %3 = load i8**, i8*** %argv.addr, align 8 - %4 = load i32, i32* %i, align 4 - %idxprom = sext i32 %4 to i64 - %arrayidx = getelementptr inbounds i8*, i8** %3, i64 %idxprom - %5 = load i8*, i8** %arrayidx, align 8 - %6 = load i32, i32* %num_block_threads, align 4 - call void @_Z10runVLCTestPcjj(i8* %5, i32 %6, i32 1) - br label %for.inc - -for.inc: ; preds = %for.body - %7 = load i32, i32* %i, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - br label %if.end3 - -if.else: ; preds = %if.end - %8 = load i32, i32* %num_block_threads, align 4 - call void @_Z10runVLCTestPcjj(i8* null, i32 %8, i32 1024) - br label %if.end3 - -if.end3: ; preds = %if.else, %for.end - store i32 0, i32* %retval, align 4 - br label %return - -return: ; preds = %if.end3, %if.then - %9 = load i32, i32* %retval, align 4 - ret i32 %9 -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z10runVLCTestPcjj(i8* %file_name, i32 %num_block_threads, i32 %num_blocks) #0 { -entry: - %file_name.addr = alloca i8*, align 8 - %num_block_threads.addr = alloca i32, align 4 - %num_blocks.addr = alloca i32, align 4 - %num_elements = alloca i32, align 4 - %mem_size = alloca i32, align 4 - %symbol_type_size = alloca i32, align 4 - %H = alloca double, align 8 - %sourceData = alloca i32*, align 8 - %destData = alloca i32*, align 8 - %crefData = alloca i32*, align 8 - %codewords = alloca i32*, align 8 - %codewordlens = alloca i32*, align 8 - %cw32 = alloca i32*, align 8 - %cw32len = alloca i32*, align 8 - %cw32idx = alloca i32*, align 8 - %cindex2 = alloca i32*, align 8 - %d_sourceData = alloca i32*, align 8 - %d_destData = alloca i32*, align 8 - %d_destDataPacked = alloca i32*, align 8 - %d_codewords = alloca i32*, align 8 - %d_codewordlens = alloca i32*, align 8 - %d_cw32 = alloca i32*, align 8 - %d_cw32len = alloca i32*, align 8 - %d_cw32idx = alloca i32*, align 8 - %d_cindex = alloca i32*, align 8 - %d_cindex2 = alloca i32*, align 8 - %err = alloca i32, align 4 - %err37 = alloca i32, align 4 - %err45 = alloca i32, align 4 - %err53 = alloca i32, align 4 - %err62 = alloca i32, align 4 - %err71 = alloca i32, align 4 - %err79 = alloca i32, align 4 - %err87 = alloca i32, align 4 - %err95 = alloca i32, align 4 - %err104 = alloca i32, align 4 - %err113 = alloca i32, align 4 - %err121 = alloca i32, align 4 - %err130 = alloca i32, align 4 - %err139 = alloca i32, align 4 - %grid_size = alloca %struct.dim3, align 4 - %block_size = alloca %struct.dim3, align 4 - %sm_size = alloca i32, align 4 - %NT = alloca i32, align 4 - %refbytesize = alloca i32, align 4 - %timer = alloca i64, align 8 - %msec = alloca float, align 4 - %num_ints = alloca i32, align 4 - %i = alloca i32, align 4 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp167 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp167.coerce = alloca { i64, i32 }, align 4 - %num_scan_elements = alloca i32, align 4 - %agg.tmp174 = alloca %struct.dim3, align 4 - %agg.tmp176 = alloca %struct.dim3, align 4 - %agg.tmp174.coerce = alloca { i64, i32 }, align 4 - %agg.tmp176.coerce = alloca { i64, i32 }, align 4 - %err183 = alloca i32, align 4 - %err190 = alloca i32, align 4 - %err199 = alloca i32, align 4 - %err206 = alloca i32, align 4 - %err213 = alloca i32, align 4 - %err220 = alloca i32, align 4 - %err227 = alloca i32, align 4 - %err234 = alloca i32, align 4 - %err241 = alloca i32, align 4 - %err248 = alloca i32, align 4 - %err255 = alloca i32, align 4 - %err262 = alloca i32, align 4 - store i8* %file_name, i8** %file_name.addr, align 8 - store i32 %num_block_threads, i32* %num_block_threads.addr, align 4 - store i32 %num_blocks, i32* %num_blocks.addr, align 4 - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.4, i64 0, i64 0)) - store i32 4, i32* %symbol_type_size, align 4 - %0 = load i8*, i8** %file_name.addr, align 8 - %1 = load i32, i32* %num_block_threads.addr, align 4 - %2 = load i32, i32* %symbol_type_size, align 4 - call void @_Z10initParamsPcjRjS0_S0_j(i8* %0, i32 %1, i32* dereferenceable(4) %num_blocks.addr, i32* dereferenceable(4) %num_elements, i32* dereferenceable(4) %mem_size, i32 %2) - %3 = load i32, i32* %num_elements, align 4 - %4 = load i32, i32* %num_blocks.addr, align 4 - %5 = load i32, i32* %num_block_threads.addr, align 4 - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([98 x i8], [98 x i8]* @.str.5, i64 0, i64 0), i32 %3, i32 %4, i32 %5) - %6 = load i32, i32* %mem_size, align 4 - %conv = zext i32 %6 to i64 - %call2 = call noalias i8* @malloc(i64 %conv) #3 - %7 = bitcast i8* %call2 to i32* - store i32* %7, i32** %sourceData, align 8 - %8 = load i32, i32* %mem_size, align 4 - %conv3 = zext i32 %8 to i64 - %call4 = call noalias i8* @malloc(i64 %conv3) #3 - %9 = bitcast i8* %call4 to i32* - store i32* %9, i32** %destData, align 8 - %10 = load i32, i32* %mem_size, align 4 - %conv5 = zext i32 %10 to i64 - %call6 = call noalias i8* @malloc(i64 %conv5) #3 - %11 = bitcast i8* %call6 to i32* - store i32* %11, i32** %crefData, align 8 - %12 = load i32, i32* %symbol_type_size, align 4 - %mul = mul i32 256, %12 - %conv7 = zext i32 %mul to i64 - %call8 = call noalias i8* @malloc(i64 %conv7) #3 - %13 = bitcast i8* %call8 to i32* - store i32* %13, i32** %codewords, align 8 - %14 = load i32, i32* %symbol_type_size, align 4 - %mul9 = mul i32 256, %14 - %conv10 = zext i32 %mul9 to i64 - %call11 = call noalias i8* @malloc(i64 %conv10) #3 - %15 = bitcast i8* %call11 to i32* - store i32* %15, i32** %codewordlens, align 8 - %16 = load i32, i32* %mem_size, align 4 - %conv12 = zext i32 %16 to i64 - %call13 = call noalias i8* @malloc(i64 %conv12) #3 - %17 = bitcast i8* %call13 to i32* - store i32* %17, i32** %cw32, align 8 - %18 = load i32, i32* %mem_size, align 4 - %conv14 = zext i32 %18 to i64 - %call15 = call noalias i8* @malloc(i64 %conv14) #3 - %19 = bitcast i8* %call15 to i32* - store i32* %19, i32** %cw32len, align 8 - %20 = load i32, i32* %mem_size, align 4 - %conv16 = zext i32 %20 to i64 - %call17 = call noalias i8* @malloc(i64 %conv16) #3 - %21 = bitcast i8* %call17 to i32* - store i32* %21, i32** %cw32idx, align 8 - %22 = load i32, i32* %num_blocks.addr, align 4 - %conv18 = zext i32 %22 to i64 - %mul19 = mul i64 %conv18, 4 - %call20 = call noalias i8* @malloc(i64 %mul19) #3 - %23 = bitcast i8* %call20 to i32* - store i32* %23, i32** %cindex2, align 8 - %24 = load i32*, i32** %sourceData, align 8 - %25 = bitcast i32* %24 to i8* - %26 = load i32, i32* %mem_size, align 4 - %conv21 = zext i32 %26 to i64 - call void @llvm.memset.p0i8.i64(i8* align 4 %25, i8 0, i64 %conv21, i1 false) - %27 = load i32*, i32** %destData, align 8 - %28 = bitcast i32* %27 to i8* - %29 = load i32, i32* %mem_size, align 4 - %conv22 = zext i32 %29 to i64 - call void @llvm.memset.p0i8.i64(i8* align 4 %28, i8 0, i64 %conv22, i1 false) - %30 = load i32*, i32** %crefData, align 8 - %31 = bitcast i32* %30 to i8* - %32 = load i32, i32* %mem_size, align 4 - %conv23 = zext i32 %32 to i64 - call void @llvm.memset.p0i8.i64(i8* align 4 %31, i8 0, i64 %conv23, i1 false) - %33 = load i32*, i32** %cw32, align 8 - %34 = bitcast i32* %33 to i8* - %35 = load i32, i32* %mem_size, align 4 - %conv24 = zext i32 %35 to i64 - call void @llvm.memset.p0i8.i64(i8* align 4 %34, i8 0, i64 %conv24, i1 false) - %36 = load i32*, i32** %cw32len, align 8 - %37 = bitcast i32* %36 to i8* - %38 = load i32, i32* %mem_size, align 4 - %conv25 = zext i32 %38 to i64 - call void @llvm.memset.p0i8.i64(i8* align 4 %37, i8 0, i64 %conv25, i1 false) - %39 = load i32*, i32** %cw32idx, align 8 - %40 = bitcast i32* %39 to i8* - %41 = load i32, i32* %mem_size, align 4 - %conv26 = zext i32 %41 to i64 - call void @llvm.memset.p0i8.i64(i8* align 4 %40, i8 0, i64 %conv26, i1 false) - %42 = load i32*, i32** %codewords, align 8 - %43 = bitcast i32* %42 to i8* - %44 = load i32, i32* %symbol_type_size, align 4 - %mul27 = mul i32 256, %44 - %conv28 = zext i32 %mul27 to i64 - call void @llvm.memset.p0i8.i64(i8* align 4 %43, i8 0, i64 %conv28, i1 false) - %45 = load i32*, i32** %codewordlens, align 8 - %46 = bitcast i32* %45 to i8* - %47 = load i32, i32* %symbol_type_size, align 4 - %mul29 = mul i32 256, %47 - %conv30 = zext i32 %mul29 to i64 - call void @llvm.memset.p0i8.i64(i8* align 4 %46, i8 0, i64 %conv30, i1 false) - %48 = load i32*, i32** %cindex2, align 8 - %49 = bitcast i32* %48 to i8* - %50 = load i32, i32* %num_blocks.addr, align 4 - %conv31 = zext i32 %50 to i64 - %mul32 = mul i64 %conv31, 4 - call void @llvm.memset.p0i8.i64(i8* align 4 %49, i8 0, i64 %mul32, i1 false) - %51 = load i8*, i8** %file_name.addr, align 8 - %52 = load i32*, i32** %sourceData, align 8 - %53 = load i32*, i32** %codewords, align 8 - %54 = load i32*, i32** %codewordlens, align 8 - %55 = load i32, i32* %num_elements, align 4 - %56 = load i32, i32* %mem_size, align 4 - call void @_Z8loadDataPcPjS0_S0_jjRd(i8* %51, i32* %52, i32* %53, i32* %54, i32 %55, i32 %56, double* dereferenceable(8) %H) - %57 = bitcast i32** %d_sourceData to i8** - %58 = load i32, i32* %mem_size, align 4 - %conv33 = zext i32 %58 to i64 - %call34 = call i32 @cudaMalloc(i8** %57, i64 %conv33) - store i32 %call34, i32* %err, align 4 - %59 = load i32, i32* %err, align 4 - %cmp = icmp ne i32 0, %59 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %60 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %61 = load i32, i32* %err, align 4 - %call35 = call i8* @cudaGetErrorString(i32 %61) - %call36 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %60, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 107, i8* %call35) - call void @exit(i32 1) #16 - unreachable - -if.end: ; preds = %entry - %62 = bitcast i32** %d_destData to i8** - %63 = load i32, i32* %mem_size, align 4 - %conv38 = zext i32 %63 to i64 - %call39 = call i32 @cudaMalloc(i8** %62, i64 %conv38) - store i32 %call39, i32* %err37, align 4 - %64 = load i32, i32* %err37, align 4 - %cmp40 = icmp ne i32 0, %64 - br i1 %cmp40, label %if.then41, label %if.end44 - -if.then41: ; preds = %if.end - %65 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %66 = load i32, i32* %err37, align 4 - %call42 = call i8* @cudaGetErrorString(i32 %66) - %call43 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %65, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 108, i8* %call42) - call void @exit(i32 1) #16 - unreachable - -if.end44: ; preds = %if.end - %67 = bitcast i32** %d_destDataPacked to i8** - %68 = load i32, i32* %mem_size, align 4 - %conv46 = zext i32 %68 to i64 - %call47 = call i32 @cudaMalloc(i8** %67, i64 %conv46) - store i32 %call47, i32* %err45, align 4 - %69 = load i32, i32* %err45, align 4 - %cmp48 = icmp ne i32 0, %69 - br i1 %cmp48, label %if.then49, label %if.end52 - -if.then49: ; preds = %if.end44 - %70 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %71 = load i32, i32* %err45, align 4 - %call50 = call i8* @cudaGetErrorString(i32 %71) - %call51 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %70, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 109, i8* %call50) - call void @exit(i32 1) #16 - unreachable - -if.end52: ; preds = %if.end44 - %72 = bitcast i32** %d_codewords to i8** - %73 = load i32, i32* %symbol_type_size, align 4 - %mul54 = mul i32 256, %73 - %conv55 = zext i32 %mul54 to i64 - %call56 = call i32 @cudaMalloc(i8** %72, i64 %conv55) - store i32 %call56, i32* %err53, align 4 - %74 = load i32, i32* %err53, align 4 - %cmp57 = icmp ne i32 0, %74 - br i1 %cmp57, label %if.then58, label %if.end61 - -if.then58: ; preds = %if.end52 - %75 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %76 = load i32, i32* %err53, align 4 - %call59 = call i8* @cudaGetErrorString(i32 %76) - %call60 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %75, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 112, i8* %call59) - call void @exit(i32 1) #16 - unreachable - -if.end61: ; preds = %if.end52 - %77 = bitcast i32** %d_codewordlens to i8** - %78 = load i32, i32* %symbol_type_size, align 4 - %mul63 = mul i32 256, %78 - %conv64 = zext i32 %mul63 to i64 - %call65 = call i32 @cudaMalloc(i8** %77, i64 %conv64) - store i32 %call65, i32* %err62, align 4 - %79 = load i32, i32* %err62, align 4 - %cmp66 = icmp ne i32 0, %79 - br i1 %cmp66, label %if.then67, label %if.end70 - -if.then67: ; preds = %if.end61 - %80 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %81 = load i32, i32* %err62, align 4 - %call68 = call i8* @cudaGetErrorString(i32 %81) - %call69 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %80, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 114, i8* %call68) - call void @exit(i32 1) #16 - unreachable - -if.end70: ; preds = %if.end61 - %82 = bitcast i32** %d_cw32 to i8** - %83 = load i32, i32* %mem_size, align 4 - %conv72 = zext i32 %83 to i64 - %call73 = call i32 @cudaMalloc(i8** %82, i64 %conv72) - store i32 %call73, i32* %err71, align 4 - %84 = load i32, i32* %err71, align 4 - %cmp74 = icmp ne i32 0, %84 - br i1 %cmp74, label %if.then75, label %if.end78 - -if.then75: ; preds = %if.end70 - %85 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %86 = load i32, i32* %err71, align 4 - %call76 = call i8* @cudaGetErrorString(i32 %86) - %call77 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %85, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 116, i8* %call76) - call void @exit(i32 1) #16 - unreachable - -if.end78: ; preds = %if.end70 - %87 = bitcast i32** %d_cw32len to i8** - %88 = load i32, i32* %mem_size, align 4 - %conv80 = zext i32 %88 to i64 - %call81 = call i32 @cudaMalloc(i8** %87, i64 %conv80) - store i32 %call81, i32* %err79, align 4 - %89 = load i32, i32* %err79, align 4 - %cmp82 = icmp ne i32 0, %89 - br i1 %cmp82, label %if.then83, label %if.end86 - -if.then83: ; preds = %if.end78 - %90 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %91 = load i32, i32* %err79, align 4 - %call84 = call i8* @cudaGetErrorString(i32 %91) - %call85 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %90, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 117, i8* %call84) - call void @exit(i32 1) #16 - unreachable - -if.end86: ; preds = %if.end78 - %92 = bitcast i32** %d_cw32idx to i8** - %93 = load i32, i32* %mem_size, align 4 - %conv88 = zext i32 %93 to i64 - %call89 = call i32 @cudaMalloc(i8** %92, i64 %conv88) - store i32 %call89, i32* %err87, align 4 - %94 = load i32, i32* %err87, align 4 - %cmp90 = icmp ne i32 0, %94 - br i1 %cmp90, label %if.then91, label %if.end94 - -if.then91: ; preds = %if.end86 - %95 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %96 = load i32, i32* %err87, align 4 - %call92 = call i8* @cudaGetErrorString(i32 %96) - %call93 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %95, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 118, i8* %call92) - call void @exit(i32 1) #16 - unreachable - -if.end94: ; preds = %if.end86 - %97 = bitcast i32** %d_cindex to i8** - %98 = load i32, i32* %num_blocks.addr, align 4 - %conv96 = zext i32 %98 to i64 - %mul97 = mul i64 %conv96, 4 - %call98 = call i32 @cudaMalloc(i8** %97, i64 %mul97) - store i32 %call98, i32* %err95, align 4 - %99 = load i32, i32* %err95, align 4 - %cmp99 = icmp ne i32 0, %99 - br i1 %cmp99, label %if.then100, label %if.end103 - -if.then100: ; preds = %if.end94 - %100 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %101 = load i32, i32* %err95, align 4 - %call101 = call i8* @cudaGetErrorString(i32 %101) - %call102 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %100, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 121, i8* %call101) - call void @exit(i32 1) #16 - unreachable - -if.end103: ; preds = %if.end94 - %102 = bitcast i32** %d_cindex2 to i8** - %103 = load i32, i32* %num_blocks.addr, align 4 - %conv105 = zext i32 %103 to i64 - %mul106 = mul i64 %conv105, 4 - %call107 = call i32 @cudaMalloc(i8** %102, i64 %mul106) - store i32 %call107, i32* %err104, align 4 - %104 = load i32, i32* %err104, align 4 - %cmp108 = icmp ne i32 0, %104 - br i1 %cmp108, label %if.then109, label %if.end112 - -if.then109: ; preds = %if.end103 - %105 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %106 = load i32, i32* %err104, align 4 - %call110 = call i8* @cudaGetErrorString(i32 %106) - %call111 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %105, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 123, i8* %call110) - call void @exit(i32 1) #16 - unreachable - -if.end112: ; preds = %if.end103 - %107 = load i32*, i32** %d_sourceData, align 8 - %108 = bitcast i32* %107 to i8* - %109 = load i32*, i32** %sourceData, align 8 - %110 = bitcast i32* %109 to i8* - %111 = load i32, i32* %mem_size, align 4 - %conv114 = zext i32 %111 to i64 - %call115 = call i32 @cudaMemcpy(i8* %108, i8* %110, i64 %conv114, i32 1) - store i32 %call115, i32* %err113, align 4 - %112 = load i32, i32* %err113, align 4 - %cmp116 = icmp ne i32 0, %112 - br i1 %cmp116, label %if.then117, label %if.end120 - -if.then117: ; preds = %if.end112 - %113 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %114 = load i32, i32* %err113, align 4 - %call118 = call i8* @cudaGetErrorString(i32 %114) - %call119 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %113, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 141, i8* %call118) - call void @exit(i32 1) #16 - unreachable - -if.end120: ; preds = %if.end112 - %115 = load i32*, i32** %d_codewords, align 8 - %116 = bitcast i32* %115 to i8* - %117 = load i32*, i32** %codewords, align 8 - %118 = bitcast i32* %117 to i8* - %119 = load i32, i32* %symbol_type_size, align 4 - %mul122 = mul i32 256, %119 - %conv123 = zext i32 %mul122 to i64 - %call124 = call i32 @cudaMemcpy(i8* %116, i8* %118, i64 %conv123, i32 1) - store i32 %call124, i32* %err121, align 4 - %120 = load i32, i32* %err121, align 4 - %cmp125 = icmp ne i32 0, %120 - br i1 %cmp125, label %if.then126, label %if.end129 - -if.then126: ; preds = %if.end120 - %121 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %122 = load i32, i32* %err121, align 4 - %call127 = call i8* @cudaGetErrorString(i32 %122) - %call128 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %121, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 144, i8* %call127) - call void @exit(i32 1) #16 - unreachable - -if.end129: ; preds = %if.end120 - %123 = load i32*, i32** %d_codewordlens, align 8 - %124 = bitcast i32* %123 to i8* - %125 = load i32*, i32** %codewordlens, align 8 - %126 = bitcast i32* %125 to i8* - %127 = load i32, i32* %symbol_type_size, align 4 - %mul131 = mul i32 256, %127 - %conv132 = zext i32 %mul131 to i64 - %call133 = call i32 @cudaMemcpy(i8* %124, i8* %126, i64 %conv132, i32 1) - store i32 %call133, i32* %err130, align 4 - %128 = load i32, i32* %err130, align 4 - %cmp134 = icmp ne i32 0, %128 - br i1 %cmp134, label %if.then135, label %if.end138 - -if.then135: ; preds = %if.end129 - %129 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %130 = load i32, i32* %err130, align 4 - %call136 = call i8* @cudaGetErrorString(i32 %130) - %call137 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %129, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 147, i8* %call136) - call void @exit(i32 1) #16 - unreachable - -if.end138: ; preds = %if.end129 - %131 = load i32*, i32** %d_destData, align 8 - %132 = bitcast i32* %131 to i8* - %133 = load i32*, i32** %destData, align 8 - %134 = bitcast i32* %133 to i8* - %135 = load i32, i32* %mem_size, align 4 - %conv140 = zext i32 %135 to i64 - %call141 = call i32 @cudaMemcpy(i8* %132, i8* %134, i64 %conv140, i32 1) - store i32 %call141, i32* %err139, align 4 - %136 = load i32, i32* %err139, align 4 - %cmp142 = icmp ne i32 0, %136 - br i1 %cmp142, label %if.then143, label %if.end146 - -if.then143: ; preds = %if.end138 - %137 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %138 = load i32, i32* %err139, align 4 - %call144 = call i8* @cudaGetErrorString(i32 %138) - %call145 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %137, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 149, i8* %call144) - call void @exit(i32 1) #16 - unreachable - -if.end146: ; preds = %if.end138 - %139 = load i32, i32* %num_blocks.addr, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid_size, i32 %139, i32 1, i32 1) - %140 = load i32, i32* %num_block_threads.addr, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %block_size, i32 %140, i32 1, i32 1) - store i32 10, i32* %NT, align 4 - %call147 = call i64 @_Z8get_timev() - store i64 %call147, i64* %timer, align 8 - %141 = load i32*, i32** %sourceData, align 8 - %142 = load i32, i32* %num_elements, align 4 - %143 = load i32*, i32** %crefData, align 8 - %144 = load i32*, i32** %codewords, align 8 - %145 = load i32*, i32** %codewordlens, align 8 - call void @cpu_vlc_encode(i32* %141, i32 %142, i32* %143, i32* %refbytesize, i32* %144, i32* %145) - %call148 = call i64 @_Z8get_timev() - %146 = load i64, i64* %timer, align 8 - %sub = sub nsw i64 %call148, %146 - %conv149 = sitofp i64 %sub to double - %div = fdiv double %conv149, 1.000000e+03 - %conv150 = fptrunc double %div to float - store float %conv150, float* %msec, align 4 - %147 = load float, float* %msec, align 4 - %conv151 = fpext float %147 to double - %call152 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.8, i64 0, i64 0), double %conv151) - %148 = load i32, i32* %refbytesize, align 4 - %call153 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.9, i64 0, i64 0), i32 %148) - %149 = load i32, i32* %refbytesize, align 4 - %div154 = udiv i32 %149, 4 - %150 = load i32, i32* %refbytesize, align 4 - %rem = urem i32 %150, 4 - %cmp155 = icmp eq i32 %rem, 0 - %151 = zext i1 %cmp155 to i64 - %cond = select i1 %cmp155, i32 0, i32 1 - %add = add i32 %div154, %cond - store i32 %add, i32* %num_ints, align 4 - %152 = load i32, i32* %num_blocks.addr, align 4 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %grid_size, i32 0, i32 0 - store i32 %152, i32* %x, align 4 - %153 = load i32, i32* %num_block_threads.addr, align 4 - %x156 = getelementptr inbounds %struct.dim3, %struct.dim3* %block_size, i32 0, i32 0 - store i32 %153, i32* %x156, align 4 - %x157 = getelementptr inbounds %struct.dim3, %struct.dim3* %block_size, i32 0, i32 0 - %154 = load i32, i32* %x157, align 4 - %conv158 = zext i32 %154 to i64 - %mul159 = mul i64 %conv158, 4 - %conv160 = trunc i64 %mul159 to i32 - store i32 %conv160, i32* %sm_size, align 4 - %x161 = getelementptr inbounds %struct.dim3, %struct.dim3* %block_size, i32 0, i32 0 - %155 = load i32, i32* %x161, align 4 - %conv162 = zext i32 %155 to i64 - %mul163 = mul i64 %conv162, 4 - %add164 = add i64 2048, %mul163 - %conv165 = trunc i64 %add164 to i32 - store i32 %conv165, i32* %sm_size, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end146 - %156 = load i32, i32* %i, align 4 - %157 = load i32, i32* %NT, align 4 - %cmp166 = icmp ult i32 %156, %157 - br i1 %cmp166, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %158 = bitcast %struct.dim3* %agg.tmp to i8* - %159 = bitcast %struct.dim3* %grid_size to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %158, i8* align 4 %159, i64 12, i1 false) - %160 = bitcast %struct.dim3* %agg.tmp167 to i8* - %161 = bitcast %struct.dim3* %block_size to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %160, i8* align 4 %161, i64 12, i1 false) - %162 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %163 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %162, i8* align 4 %163, i64 12, i1 false) - %164 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %165 = load i64, i64* %164, align 4 - %166 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %167 = load i32, i32* %166, align 4 - %168 = bitcast { i64, i32 }* %agg.tmp167.coerce to i8* - %169 = bitcast %struct.dim3* %agg.tmp167 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %168, i8* align 4 %169, i64 12, i1 false) - %170 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp167.coerce, i32 0, i32 0 - %171 = load i64, i64* %170, align 4 - %172 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp167.coerce, i32 0, i32 1 - %173 = load i32, i32* %172, align 4 - %call168 = call i32 @__cudaPushCallConfiguration(i64 %165, i32 %167, i64 %171, i32 %173, i64 0, i8* null) - %tobool = icmp ne i32 %call168, 0 - br i1 %tobool, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %for.body - %174 = load i32*, i32** %d_sourceData, align 8 - %175 = load i32*, i32** %d_codewords, align 8 - %176 = load i32*, i32** %d_codewordlens, align 8 - %177 = load i32*, i32** %d_cw32, align 8 - %178 = load i32*, i32** %d_cw32len, align 8 - %179 = load i32*, i32** %d_cw32idx, align 8 - %180 = load i32*, i32** %d_destData, align 8 - %181 = load i32*, i32** %d_cindex, align 8 - call void @_ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_(i32* %174, i32* %175, i32* %176, i32* %177, i32* %178, i32* %179, i32* %180, i32* %181) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %for.body - %call169 = call i32 @cudaThreadSynchronize() - br label %for.inc - -for.inc: ; preds = %kcall.end - %182 = load i32, i32* %i, align 4 - %inc = add nsw i32 %182, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %x170 = getelementptr inbounds %struct.dim3, %struct.dim3* %grid_size, i32 0, i32 0 - %183 = load i32, i32* %x170, align 4 - store i32 %183, i32* %num_scan_elements, align 4 - %184 = load i32, i32* %num_scan_elements, align 4 - call void @_ZL17preallocBlockSumsj(i32 %184) - %185 = load i32*, i32** %d_destDataPacked, align 8 - %186 = bitcast i32* %185 to i8* - %187 = load i32, i32* %mem_size, align 4 - %conv171 = zext i32 %187 to i64 - %call172 = call i32 @cudaMemset(i8* %186, i32 0, i64 %conv171) - %188 = load i32, i32* %num_scan_elements, align 4 - %call173 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([40 x i8], [40 x i8]* @.str.10, i64 0, i64 0), i32 %188) - %189 = load i32*, i32** %d_cindex2, align 8 - %190 = load i32*, i32** %d_cindex, align 8 - %191 = load i32, i32* %num_scan_elements, align 4 - call void @_ZL12prescanArrayPjS_i(i32* %189, i32* %190, i32 %191) - %192 = load i32, i32* %num_scan_elements, align 4 - %div175 = udiv i32 %192, 32 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp174, i32 %div175, i32 1, i32 1) - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp176, i32 32, i32 1, i32 1) - %193 = bitcast { i64, i32 }* %agg.tmp174.coerce to i8* - %194 = bitcast %struct.dim3* %agg.tmp174 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %193, i8* align 4 %194, i64 12, i1 false) - %195 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp174.coerce, i32 0, i32 0 - %196 = load i64, i64* %195, align 4 - %197 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp174.coerce, i32 0, i32 1 - %198 = load i32, i32* %197, align 4 - %199 = bitcast { i64, i32 }* %agg.tmp176.coerce to i8* - %200 = bitcast %struct.dim3* %agg.tmp176 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %199, i8* align 4 %200, i64 12, i1 false) - %201 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp176.coerce, i32 0, i32 0 - %202 = load i64, i64* %201, align 4 - %203 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp176.coerce, i32 0, i32 1 - %204 = load i32, i32* %203, align 4 - %call177 = call i32 @__cudaPushCallConfiguration(i64 %196, i32 %198, i64 %202, i32 %204, i64 0, i8* null) - %tobool178 = icmp ne i32 %call177, 0 - br i1 %tobool178, label %kcall.end181, label %kcall.configok179 - -kcall.configok179: ; preds = %for.end - %205 = load i32*, i32** %d_destData, align 8 - %206 = load i32*, i32** %d_cindex, align 8 - %207 = load i32*, i32** %d_cindex2, align 8 - %208 = load i32*, i32** %d_destDataPacked, align 8 - %209 = load i32, i32* %num_elements, align 4 - %210 = load i32, i32* %num_scan_elements, align 4 - %div180 = udiv i32 %209, %210 - call void @_ZL5pack2PjS_S_S_j(i32* %205, i32* %206, i32* %207, i32* %208, i32 %div180) - br label %kcall.end181 - -kcall.end181: ; preds = %kcall.configok179, %for.end - %call182 = call i32 @cudaThreadSynchronize() - %call184 = call i32 @cudaGetLastError() - store i32 %call184, i32* %err183, align 4 - %211 = load i32, i32* %err183, align 4 - %cmp185 = icmp ne i32 0, %211 - br i1 %cmp185, label %if.then186, label %if.end189 - -if.then186: ; preds = %kcall.end181 - %212 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %213 = load i32, i32* %err183, align 4 - %call187 = call i8* @cudaGetErrorString(i32 %213) - %call188 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %212, i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str.12, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 198, i8* %call187) - call void @exit(i32 1) #16 - unreachable - -if.end189: ; preds = %kcall.end181 - call void @_ZL16deallocBlockSumsv() - %214 = load i32*, i32** %destData, align 8 - %215 = bitcast i32* %214 to i8* - %216 = load i32*, i32** %d_destDataPacked, align 8 - %217 = bitcast i32* %216 to i8* - %218 = load i32, i32* %mem_size, align 4 - %conv191 = zext i32 %218 to i64 - %call192 = call i32 @cudaMemcpy(i8* %215, i8* %217, i64 %conv191, i32 2) - store i32 %call192, i32* %err190, align 4 - %219 = load i32, i32* %err190, align 4 - %cmp193 = icmp ne i32 0, %219 - br i1 %cmp193, label %if.then194, label %if.end197 - -if.then194: ; preds = %if.end189 - %220 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %221 = load i32, i32* %err190, align 4 - %call195 = call i8* @cudaGetErrorString(i32 %221) - %call196 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %220, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 203, i8* %call195) - call void @exit(i32 1) #16 - unreachable - -if.end197: ; preds = %if.end189 - %222 = load i32*, i32** %crefData, align 8 - %223 = load i32*, i32** %destData, align 8 - %224 = load i32, i32* %num_ints, align 4 - %call198 = call i32 @_Z15compare_vectorsIjEiPT_S1_j(i32* %222, i32* %223, i32 %224) - %225 = load i32*, i32** %sourceData, align 8 - %226 = bitcast i32* %225 to i8* - call void @free(i8* %226) #3 - %227 = load i32*, i32** %destData, align 8 - %228 = bitcast i32* %227 to i8* - call void @free(i8* %228) #3 - %229 = load i32*, i32** %codewords, align 8 - %230 = bitcast i32* %229 to i8* - call void @free(i8* %230) #3 - %231 = load i32*, i32** %codewordlens, align 8 - %232 = bitcast i32* %231 to i8* - call void @free(i8* %232) #3 - %233 = load i32*, i32** %cw32, align 8 - %234 = bitcast i32* %233 to i8* - call void @free(i8* %234) #3 - %235 = load i32*, i32** %cw32len, align 8 - %236 = bitcast i32* %235 to i8* - call void @free(i8* %236) #3 - %237 = load i32*, i32** %crefData, align 8 - %238 = bitcast i32* %237 to i8* - call void @free(i8* %238) #3 - %239 = load i32*, i32** %d_sourceData, align 8 - %240 = bitcast i32* %239 to i8* - %call200 = call i32 @cudaFree(i8* %240) - store i32 %call200, i32* %err199, align 4 - %241 = load i32, i32* %err199, align 4 - %cmp201 = icmp ne i32 0, %241 - br i1 %cmp201, label %if.then202, label %if.end205 - -if.then202: ; preds = %if.end197 - %242 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %243 = load i32, i32* %err199, align 4 - %call203 = call i8* @cudaGetErrorString(i32 %243) - %call204 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %242, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 214, i8* %call203) - call void @exit(i32 1) #16 - unreachable - -if.end205: ; preds = %if.end197 - %244 = load i32*, i32** %d_destData, align 8 - %245 = bitcast i32* %244 to i8* - %call207 = call i32 @cudaFree(i8* %245) - store i32 %call207, i32* %err206, align 4 - %246 = load i32, i32* %err206, align 4 - %cmp208 = icmp ne i32 0, %246 - br i1 %cmp208, label %if.then209, label %if.end212 - -if.then209: ; preds = %if.end205 - %247 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %248 = load i32, i32* %err206, align 4 - %call210 = call i8* @cudaGetErrorString(i32 %248) - %call211 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %247, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 215, i8* %call210) - call void @exit(i32 1) #16 - unreachable - -if.end212: ; preds = %if.end205 - %249 = load i32*, i32** %d_destDataPacked, align 8 - %250 = bitcast i32* %249 to i8* - %call214 = call i32 @cudaFree(i8* %250) - store i32 %call214, i32* %err213, align 4 - %251 = load i32, i32* %err213, align 4 - %cmp215 = icmp ne i32 0, %251 - br i1 %cmp215, label %if.then216, label %if.end219 - -if.then216: ; preds = %if.end212 - %252 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %253 = load i32, i32* %err213, align 4 - %call217 = call i8* @cudaGetErrorString(i32 %253) - %call218 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %252, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 216, i8* %call217) - call void @exit(i32 1) #16 - unreachable - -if.end219: ; preds = %if.end212 - %254 = load i32*, i32** %d_codewords, align 8 - %255 = bitcast i32* %254 to i8* - %call221 = call i32 @cudaFree(i8* %255) - store i32 %call221, i32* %err220, align 4 - %256 = load i32, i32* %err220, align 4 - %cmp222 = icmp ne i32 0, %256 - br i1 %cmp222, label %if.then223, label %if.end226 - -if.then223: ; preds = %if.end219 - %257 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %258 = load i32, i32* %err220, align 4 - %call224 = call i8* @cudaGetErrorString(i32 %258) - %call225 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %257, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 217, i8* %call224) - call void @exit(i32 1) #16 - unreachable - -if.end226: ; preds = %if.end219 - %259 = load i32*, i32** %d_codewordlens, align 8 - %260 = bitcast i32* %259 to i8* - %call228 = call i32 @cudaFree(i8* %260) - store i32 %call228, i32* %err227, align 4 - %261 = load i32, i32* %err227, align 4 - %cmp229 = icmp ne i32 0, %261 - br i1 %cmp229, label %if.then230, label %if.end233 - -if.then230: ; preds = %if.end226 - %262 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %263 = load i32, i32* %err227, align 4 - %call231 = call i8* @cudaGetErrorString(i32 %263) - %call232 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %262, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 218, i8* %call231) - call void @exit(i32 1) #16 - unreachable - -if.end233: ; preds = %if.end226 - %264 = load i32*, i32** %d_cw32, align 8 - %265 = bitcast i32* %264 to i8* - %call235 = call i32 @cudaFree(i8* %265) - store i32 %call235, i32* %err234, align 4 - %266 = load i32, i32* %err234, align 4 - %cmp236 = icmp ne i32 0, %266 - br i1 %cmp236, label %if.then237, label %if.end240 - -if.then237: ; preds = %if.end233 - %267 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %268 = load i32, i32* %err234, align 4 - %call238 = call i8* @cudaGetErrorString(i32 %268) - %call239 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %267, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 219, i8* %call238) - call void @exit(i32 1) #16 - unreachable - -if.end240: ; preds = %if.end233 - %269 = load i32*, i32** %d_cw32len, align 8 - %270 = bitcast i32* %269 to i8* - %call242 = call i32 @cudaFree(i8* %270) - store i32 %call242, i32* %err241, align 4 - %271 = load i32, i32* %err241, align 4 - %cmp243 = icmp ne i32 0, %271 - br i1 %cmp243, label %if.then244, label %if.end247 - -if.then244: ; preds = %if.end240 - %272 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %273 = load i32, i32* %err241, align 4 - %call245 = call i8* @cudaGetErrorString(i32 %273) - %call246 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %272, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 220, i8* %call245) - call void @exit(i32 1) #16 - unreachable - -if.end247: ; preds = %if.end240 - %274 = load i32*, i32** %d_cw32idx, align 8 - %275 = bitcast i32* %274 to i8* - %call249 = call i32 @cudaFree(i8* %275) - store i32 %call249, i32* %err248, align 4 - %276 = load i32, i32* %err248, align 4 - %cmp250 = icmp ne i32 0, %276 - br i1 %cmp250, label %if.then251, label %if.end254 - -if.then251: ; preds = %if.end247 - %277 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %278 = load i32, i32* %err248, align 4 - %call252 = call i8* @cudaGetErrorString(i32 %278) - %call253 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %277, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 221, i8* %call252) - call void @exit(i32 1) #16 - unreachable - -if.end254: ; preds = %if.end247 - %279 = load i32*, i32** %d_cindex, align 8 - %280 = bitcast i32* %279 to i8* - %call256 = call i32 @cudaFree(i8* %280) - store i32 %call256, i32* %err255, align 4 - %281 = load i32, i32* %err255, align 4 - %cmp257 = icmp ne i32 0, %281 - br i1 %cmp257, label %if.then258, label %if.end261 - -if.then258: ; preds = %if.end254 - %282 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %283 = load i32, i32* %err255, align 4 - %call259 = call i8* @cudaGetErrorString(i32 %283) - %call260 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %282, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 222, i8* %call259) - call void @exit(i32 1) #16 - unreachable - -if.end261: ; preds = %if.end254 - %284 = load i32*, i32** %d_cindex2, align 8 - %285 = bitcast i32* %284 to i8* - %call263 = call i32 @cudaFree(i8* %285) - store i32 %call263, i32* %err262, align 4 - %286 = load i32, i32* %err262, align 4 - %cmp264 = icmp ne i32 0, %286 - br i1 %cmp264, label %if.then265, label %if.end268 - -if.then265: ; preds = %if.end261 - %287 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %288 = load i32, i32* %err262, align 4 - %call266 = call i8* @cudaGetErrorString(i32 %288) - %call267 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %287, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.7, i64 0, i64 0), i32 223, i8* %call266) - call void @exit(i32 1) #16 - unreachable - -if.end268: ; preds = %if.end261 - %289 = load i32*, i32** %cindex2, align 8 - %290 = bitcast i32* %289 to i8* - call void @free(i8* %290) #3 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_Z10initParamsPcjRjS0_S0_j(i8* %file_name, i32 %num_block_threads, i32* dereferenceable(4) %num_blocks, i32* dereferenceable(4) %num_elements, i32* dereferenceable(4) %mem_size, i32 %symbol_type_size) #0 comdat { -entry: - %file_name.addr = alloca i8*, align 8 - %num_block_threads.addr = alloca i32, align 4 - %num_blocks.addr = alloca i32*, align 8 - %num_elements.addr = alloca i32*, align 8 - %mem_size.addr = alloca i32*, align 8 - %symbol_type_size.addr = alloca i32, align 4 - %f = alloca %struct._IO_FILE*, align 8 - store i8* %file_name, i8** %file_name.addr, align 8 - store i32 %num_block_threads, i32* %num_block_threads.addr, align 4 - store i32* %num_blocks, i32** %num_blocks.addr, align 8 - store i32* %num_elements, i32** %num_elements.addr, align 8 - store i32* %mem_size, i32** %mem_size.addr, align 8 - store i32 %symbol_type_size, i32* %symbol_type_size.addr, align 4 - %0 = load i8*, i8** %file_name.addr, align 8 - %cmp = icmp eq i8* %0, null - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %1 = load i32*, i32** %num_blocks.addr, align 8 - %2 = load i32, i32* %1, align 4 - %3 = load i32, i32* %num_block_threads.addr, align 4 - %mul = mul i32 %2, %3 - %4 = load i32*, i32** %num_elements.addr, align 8 - store i32 %mul, i32* %4, align 4 - %5 = load i32*, i32** %num_elements.addr, align 8 - %6 = load i32, i32* %5, align 4 - %7 = load i32, i32* %symbol_type_size.addr, align 4 - %mul1 = mul i32 %6, %7 - %8 = load i32*, i32** %mem_size.addr, align 8 - store i32 %mul1, i32* %8, align 4 - br label %if.end7 - -if.else: ; preds = %entry - %9 = load i8*, i8** %file_name.addr, align 8 - %call = call %struct._IO_FILE* @fopen(i8* %9, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.1, i64 0, i64 0)) - store %struct._IO_FILE* %call, %struct._IO_FILE** %f, align 8 - %10 = load %struct._IO_FILE*, %struct._IO_FILE** %f, align 8 - %tobool = icmp ne %struct._IO_FILE* %10, null - br i1 %tobool, label %if.end, label %if.then2 - -if.then2: ; preds = %if.else - %11 = load i8*, i8** %file_name.addr, align 8 - call void @perror(i8* %11) - call void @exit(i32 1) #16 - unreachable - -if.end: ; preds = %if.else - %12 = load %struct._IO_FILE*, %struct._IO_FILE** %f, align 8 - %call3 = call i32 @fseek(%struct._IO_FILE* %12, i64 0, i32 2) - %13 = load %struct._IO_FILE*, %struct._IO_FILE** %f, align 8 - %call4 = call i64 @ftell(%struct._IO_FILE* %13) - %conv = trunc i64 %call4 to i32 - %14 = load i32*, i32** %mem_size.addr, align 8 - store i32 %conv, i32* %14, align 4 - %15 = load %struct._IO_FILE*, %struct._IO_FILE** %f, align 8 - %call5 = call i32 @fclose(%struct._IO_FILE* %15) - %16 = load i32*, i32** %mem_size.addr, align 8 - %17 = load i32, i32* %16, align 4 - %18 = load i32, i32* %symbol_type_size.addr, align 4 - %div = udiv i32 %17, %18 - %19 = load i32*, i32** %num_elements.addr, align 8 - store i32 %div, i32* %19, align 4 - %20 = load i32*, i32** %num_elements.addr, align 8 - %21 = load i32, i32* %20, align 4 - %22 = load i32, i32* %num_block_threads.addr, align 4 - %div6 = udiv i32 %21, %22 - %23 = load i32*, i32** %num_blocks.addr, align 8 - store i32 %div6, i32* %23, align 4 - br label %if.end7 - -if.end7: ; preds = %if.end, %if.then - ret void -} - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #11 - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #4 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_Z8loadDataPcPjS0_S0_jjRd(i8* %file_name, i32* %sourceData, i32* %codewords, i32* %codewordlens, i32 %num_elements, i32 %mem_size, double* dereferenceable(8) %H) #0 comdat personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %file_name.addr = alloca i8*, align 8 - %sourceData.addr = alloca i32*, align 8 - %codewords.addr = alloca i32*, align 8 - %codewordlens.addr = alloca i32*, align 8 - %num_elements.addr = alloca i32, align 4 - %mem_size.addr = alloca i32, align 4 - %H.addr = alloca double*, align 8 - %freqs = alloca [256 x i32], align 16 - %root = alloca %class.INode*, align 8 - %codes = alloca %"class.std::map", align 8 - %ref.tmp = alloca %"class.std::vector.0", align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %it = alloca %"struct.std::_Rb_tree_const_iterator", align 8 - %ref.tmp8 = alloca %"struct.std::_Rb_tree_iterator", align 8 - %ref.tmp12 = alloca %"struct.std::_Rb_tree_const_iterator", align 8 - %ref.tmp13 = alloca %"struct.std::_Rb_tree_iterator", align 8 - %count = alloca i32, align 4 - %agg.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp24 = alloca %"struct.std::_Bit_const_iterator", align 8 - %i = alloca i32, align 4 - %i59 = alloca i32, align 4 - %p = alloca double, align 8 - store i8* %file_name, i8** %file_name.addr, align 8 - store i32* %sourceData, i32** %sourceData.addr, align 8 - store i32* %codewords, i32** %codewords.addr, align 8 - store i32* %codewordlens, i32** %codewordlens.addr, align 8 - store i32 %num_elements, i32* %num_elements.addr, align 4 - store i32 %mem_size, i32* %mem_size.addr, align 4 - store double* %H, double** %H.addr, align 8 - %0 = load i8*, i8** %file_name.addr, align 8 - %cmp = icmp eq i8* %0, null - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.14, i64 0, i64 0)) - call void @exit(i32 -1) #16 - unreachable - -if.else: ; preds = %entry - %1 = bitcast [256 x i32]* %freqs to i8* - call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 1024, i1 false) - %2 = load i8*, i8** %file_name.addr, align 8 - %arraydecay = getelementptr inbounds [256 x i32], [256 x i32]* %freqs, i64 0, i64 0 - %3 = load i32, i32* %mem_size.addr, align 4 - %4 = load i32*, i32** %sourceData.addr, align 8 - %call1 = call i32 @_Z8runHistoPcPjjS0_(i8* %2, i32* %arraydecay, i32 %3, i32* %4) - %call2 = call %class.INode* @_Z9BuildTreeRA256_j([256 x i32]* dereferenceable(1024) %freqs) - store %class.INode* %call2, %class.INode** %root, align 8 - call void @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEEC2Ev(%"class.std::map"* %codes) - %5 = load %class.INode*, %class.INode** %root, align 8 - invoke void @_ZNSt6vectorIbSaIbEEC2Ev(%"class.std::vector.0"* %ref.tmp) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %if.else - invoke void @_Z13GenerateCodesPK5INodeRKSt6vectorIbSaIbEERSt3mapIhS4_St4lessIhESaISt4pairIKhS4_EEE(%class.INode* %5, %"class.std::vector.0"* dereferenceable(40) %ref.tmp, %"class.std::map"* dereferenceable(48) %codes) - to label %invoke.cont4 unwind label %lpad3 - -invoke.cont4: ; preds = %invoke.cont - invoke void @_ZNSt6vectorIbSaIbEED2Ev(%"class.std::vector.0"* %ref.tmp) - to label %invoke.cont5 unwind label %lpad - -invoke.cont5: ; preds = %invoke.cont4 - %6 = load %class.INode*, %class.INode** %root, align 8 - %isnull = icmp eq %class.INode* %6, null - br i1 %isnull, label %delete.end, label %delete.notnull - -delete.notnull: ; preds = %invoke.cont5 - %7 = bitcast %class.INode* %6 to void (%class.INode*)*** - %vtable = load void (%class.INode*)**, void (%class.INode*)*** %7, align 8 - %vfn = getelementptr inbounds void (%class.INode*)*, void (%class.INode*)** %vtable, i64 1 - %8 = load void (%class.INode*)*, void (%class.INode*)** %vfn, align 8 - invoke void %8(%class.INode* %6) - to label %invoke.cont7 unwind label %lpad - -invoke.cont7: ; preds = %delete.notnull - br label %delete.end - -delete.end: ; preds = %invoke.cont7, %invoke.cont5 - %call10 = invoke %"struct.std::_Rb_tree_node_base"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE5beginEv(%"class.std::map"* %codes) - to label %invoke.cont9 unwind label %lpad - -invoke.cont9: ; preds = %delete.end - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %ref.tmp8, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call10, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - invoke void @_ZNSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2ERKSt17_Rb_tree_iteratorIS5_E(%"struct.std::_Rb_tree_const_iterator"* %it, %"struct.std::_Rb_tree_iterator"* dereferenceable(8) %ref.tmp8) - to label %invoke.cont11 unwind label %lpad - -invoke.cont11: ; preds = %invoke.cont9 - br label %for.cond - -for.cond: ; preds = %invoke.cont56, %invoke.cont11 - %call15 = invoke %"struct.std::_Rb_tree_node_base"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE3endEv(%"class.std::map"* %codes) - to label %invoke.cont14 unwind label %lpad - -invoke.cont14: ; preds = %for.cond - %coerce.dive16 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %ref.tmp13, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call15, %"struct.std::_Rb_tree_node_base"** %coerce.dive16, align 8 - invoke void @_ZNSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2ERKSt17_Rb_tree_iteratorIS5_E(%"struct.std::_Rb_tree_const_iterator"* %ref.tmp12, %"struct.std::_Rb_tree_iterator"* dereferenceable(8) %ref.tmp13) - to label %invoke.cont17 unwind label %lpad - -invoke.cont17: ; preds = %invoke.cont14 - %call19 = invoke zeroext i1 @_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEneERKS6_(%"struct.std::_Rb_tree_const_iterator"* %it, %"struct.std::_Rb_tree_const_iterator"* dereferenceable(8) %ref.tmp12) - to label %invoke.cont18 unwind label %lpad - -invoke.cont18: ; preds = %invoke.cont17 - br i1 %call19, label %for.body, label %for.end58 - -for.body: ; preds = %invoke.cont18 - %call21 = invoke %"struct.std::pair"* @_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEptEv(%"struct.std::_Rb_tree_const_iterator"* %it) - to label %invoke.cont20 unwind label %lpad - -invoke.cont20: ; preds = %for.body - %second = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %call21, i32 0, i32 1 - %call23 = invoke { i64*, i32 } @_ZNKSt6vectorIbSaIbEE5beginEv(%"class.std::vector.0"* %second) - to label %invoke.cont22 unwind label %lpad - -invoke.cont22: ; preds = %invoke.cont20 - %9 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %10 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %9, i32 0, i32 0 - %11 = extractvalue { i64*, i32 } %call23, 0 - store i64* %11, i64** %10, align 8 - %12 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %9, i32 0, i32 1 - %13 = extractvalue { i64*, i32 } %call23, 1 - store i32 %13, i32* %12, align 8 - %call26 = invoke %"struct.std::pair"* @_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEptEv(%"struct.std::_Rb_tree_const_iterator"* %it) - to label %invoke.cont25 unwind label %lpad - -invoke.cont25: ; preds = %invoke.cont22 - %second27 = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %call26, i32 0, i32 1 - %call29 = invoke { i64*, i32 } @_ZNKSt6vectorIbSaIbEE3endEv(%"class.std::vector.0"* %second27) - to label %invoke.cont28 unwind label %lpad - -invoke.cont28: ; preds = %invoke.cont25 - %14 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp24 to { i64*, i32 }* - %15 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %14, i32 0, i32 0 - %16 = extractvalue { i64*, i32 } %call29, 0 - store i64* %16, i64** %15, align 8 - %17 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %14, i32 0, i32 1 - %18 = extractvalue { i64*, i32 } %call29, 1 - store i32 %18, i32* %17, align 8 - %19 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %20 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %19, i32 0, i32 0 - %21 = load i64*, i64** %20, align 8 - %22 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %19, i32 0, i32 1 - %23 = load i32, i32* %22, align 8 - %24 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp24 to { i64*, i32 }* - %25 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %24, i32 0, i32 0 - %26 = load i64*, i64** %25, align 8 - %27 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %24, i32 0, i32 1 - %28 = load i32, i32* %27, align 8 - %call31 = invoke i64 @_ZSt8distanceISt19_Bit_const_iteratorENSt15iterator_traitsIT_E15difference_typeES2_S2_(i64* %21, i32 %23, i64* %26, i32 %28) - to label %invoke.cont30 unwind label %lpad - -invoke.cont30: ; preds = %invoke.cont28 - %conv = trunc i64 %call31 to i32 - store i32 %conv, i32* %count, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond32 - -for.cond32: ; preds = %for.inc, %invoke.cont30 - %29 = load i32, i32* %i, align 4 - %30 = load i32, i32* %count, align 4 - %cmp33 = icmp ult i32 %29, %30 - br i1 %cmp33, label %for.body34, label %for.end - -for.body34: ; preds = %for.cond32 - %call36 = invoke %"struct.std::pair"* @_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEptEv(%"struct.std::_Rb_tree_const_iterator"* %it) - to label %invoke.cont35 unwind label %lpad - -invoke.cont35: ; preds = %for.body34 - %second37 = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %call36, i32 0, i32 1 - %31 = load i32, i32* %i, align 4 - %conv38 = sext i32 %31 to i64 - %call40 = invoke zeroext i1 @_ZNKSt6vectorIbSaIbEEixEm(%"class.std::vector.0"* %second37, i64 %conv38) - to label %invoke.cont39 unwind label %lpad - -invoke.cont39: ; preds = %invoke.cont35 - br i1 %call40, label %if.then41, label %if.end - -if.then41: ; preds = %invoke.cont39 - %32 = load i32, i32* %count, align 4 - %33 = load i32, i32* %i, align 4 - %sub = sub nsw i32 %32, %33 - %sub42 = sub nsw i32 %sub, 1 - %call44 = invoke float @_ZSt3powfi(float 2.000000e+00, i32 %sub42) - to label %invoke.cont43 unwind label %lpad - -invoke.cont43: ; preds = %if.then41 - %conv45 = fptoui float %call44 to i32 - %34 = load i32*, i32** %codewords.addr, align 8 - %call47 = invoke %"struct.std::pair"* @_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEptEv(%"struct.std::_Rb_tree_const_iterator"* %it) - to label %invoke.cont46 unwind label %lpad - -invoke.cont46: ; preds = %invoke.cont43 - %first = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %call47, i32 0, i32 0 - %35 = load i8, i8* %first, align 8 - %conv48 = zext i8 %35 to i32 - %idxprom = zext i32 %conv48 to i64 - %arrayidx = getelementptr inbounds i32, i32* %34, i64 %idxprom - %36 = load i32, i32* %arrayidx, align 4 - %add = add i32 %36, %conv45 - store i32 %add, i32* %arrayidx, align 4 - br label %if.end - -lpad: ; preds = %for.end78, %for.inc55, %for.end, %invoke.cont43, %if.then41, %invoke.cont35, %for.body34, %invoke.cont28, %invoke.cont25, %invoke.cont22, %invoke.cont20, %for.body, %invoke.cont17, %invoke.cont14, %for.cond, %invoke.cont9, %delete.end, %delete.notnull, %invoke.cont4, %if.else - %37 = landingpad { i8*, i32 } - cleanup - %38 = extractvalue { i8*, i32 } %37, 0 - store i8* %38, i8** %exn.slot, align 8 - %39 = extractvalue { i8*, i32 } %37, 1 - store i32 %39, i32* %ehselector.slot, align 4 - br label %ehcleanup - -lpad3: ; preds = %invoke.cont - %40 = landingpad { i8*, i32 } - cleanup - %41 = extractvalue { i8*, i32 } %40, 0 - store i8* %41, i8** %exn.slot, align 8 - %42 = extractvalue { i8*, i32 } %40, 1 - store i32 %42, i32* %ehselector.slot, align 4 - invoke void @_ZNSt6vectorIbSaIbEED2Ev(%"class.std::vector.0"* %ref.tmp) - to label %invoke.cont6 unwind label %terminate.lpad - -invoke.cont6: ; preds = %lpad3 - br label %ehcleanup - -if.end: ; preds = %invoke.cont46, %invoke.cont39 - br label %for.inc - -for.inc: ; preds = %if.end - %43 = load i32, i32* %i, align 4 - %inc = add nsw i32 %43, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond32 - -for.end: ; preds = %for.cond32 - %44 = load i32, i32* %count, align 4 - %45 = load i32*, i32** %codewordlens.addr, align 8 - %call50 = invoke %"struct.std::pair"* @_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEptEv(%"struct.std::_Rb_tree_const_iterator"* %it) - to label %invoke.cont49 unwind label %lpad - -invoke.cont49: ; preds = %for.end - %first51 = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %call50, i32 0, i32 0 - %46 = load i8, i8* %first51, align 8 - %conv52 = zext i8 %46 to i32 - %idxprom53 = zext i32 %conv52 to i64 - %arrayidx54 = getelementptr inbounds i32, i32* %45, i64 %idxprom53 - store i32 %44, i32* %arrayidx54, align 4 - br label %for.inc55 - -for.inc55: ; preds = %invoke.cont49 - %call57 = invoke dereferenceable(8) %"struct.std::_Rb_tree_const_iterator"* @_ZNSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEppEv(%"struct.std::_Rb_tree_const_iterator"* %it) - to label %invoke.cont56 unwind label %lpad - -invoke.cont56: ; preds = %for.inc55 - br label %for.cond - -for.end58: ; preds = %invoke.cont18 - %47 = load double*, double** %H.addr, align 8 - store double 0.000000e+00, double* %47, align 8 - store i32 0, i32* %i59, align 4 - br label %for.cond60 - -for.cond60: ; preds = %for.inc76, %for.end58 - %48 = load i32, i32* %i59, align 4 - %cmp61 = icmp ult i32 %48, 256 - br i1 %cmp61, label %for.body62, label %for.end78 - -for.body62: ; preds = %for.cond60 - %49 = load i32, i32* %i59, align 4 - %idxprom63 = zext i32 %49 to i64 - %arrayidx64 = getelementptr inbounds [256 x i32], [256 x i32]* %freqs, i64 0, i64 %idxprom63 - %50 = load i32, i32* %arrayidx64, align 4 - %cmp65 = icmp ugt i32 %50, 0 - br i1 %cmp65, label %if.then66, label %if.end75 - -if.then66: ; preds = %for.body62 - %51 = load i32, i32* %i59, align 4 - %idxprom67 = zext i32 %51 to i64 - %arrayidx68 = getelementptr inbounds [256 x i32], [256 x i32]* %freqs, i64 0, i64 %idxprom67 - %52 = load i32, i32* %arrayidx68, align 4 - %conv69 = uitofp i32 %52 to double - %53 = load i32, i32* %mem_size.addr, align 4 - %conv70 = uitofp i32 %53 to double - %div = fdiv double %conv69, %conv70 - store double %div, double* %p, align 8 - %54 = load double, double* %p, align 8 - %55 = load double, double* %p, align 8 - %call71 = call double @log(double %55) #3 - %mul = fmul contract double %54, %call71 - %call72 = call double @log(double 2.000000e+00) #3 - %div73 = fdiv double %mul, %call72 - %56 = load double*, double** %H.addr, align 8 - %57 = load double, double* %56, align 8 - %add74 = fadd contract double %57, %div73 - store double %add74, double* %56, align 8 - br label %if.end75 - -if.end75: ; preds = %if.then66, %for.body62 - br label %for.inc76 - -for.inc76: ; preds = %if.end75 - %58 = load i32, i32* %i59, align 4 - %inc77 = add i32 %58, 1 - store i32 %inc77, i32* %i59, align 4 - br label %for.cond60 - -for.end78: ; preds = %for.cond60 - %59 = load double*, double** %H.addr, align 8 - %60 = load double, double* %59, align 8 - %fneg = fneg double %60 - %61 = load double*, double** %H.addr, align 8 - store double %fneg, double* %61, align 8 - %62 = load i8*, i8** %file_name.addr, align 8 - %63 = load i32, i32* %mem_size.addr, align 4 - %64 = load double*, double** %H.addr, align 8 - %65 = load double, double* %64, align 8 - %call80 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.15, i64 0, i64 0), i8* %62, i32 %63, double %65) - to label %invoke.cont79 unwind label %lpad - -invoke.cont79: ; preds = %for.end78 - call void @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEED2Ev(%"class.std::map"* %codes) - br label %if.end82 - -ehcleanup: ; preds = %invoke.cont6, %lpad - invoke void @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEED2Ev(%"class.std::map"* %codes) - to label %invoke.cont81 unwind label %terminate.lpad - -invoke.cont81: ; preds = %ehcleanup - br label %eh.resume - -if.end82: ; preds = %invoke.cont79 - ret void - -eh.resume: ; preds = %invoke.cont81 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val83 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val83 - -terminate.lpad: ; preds = %ehcleanup, %lpad3 - %66 = landingpad { i8*, i32 } - catch i8* null - %67 = extractvalue { i8*, i32 } %66, 0 - call void @__clang_call_terminate(i8* %67) #16 - unreachable -} - -declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1 - -declare dso_local i8* @cudaGetErrorString(i32) #1 - -declare dso_local void @cpu_vlc_encode(i32*, i32, i32*, i32*, i32*, i32*) #1 - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_(i32* %data, i32* %gm_codewords, i32* %gm_codewordlens, i32* %cw32, i32* %cw32len, i32* %cw32idx, i32* %out, i32* %outidx) #0 { -entry: - %data.addr = alloca i32*, align 8 - %gm_codewords.addr = alloca i32*, align 8 - %gm_codewordlens.addr = alloca i32*, align 8 - %cw32.addr = alloca i32*, align 8 - %cw32len.addr = alloca i32*, align 8 - %cw32idx.addr = alloca i32*, align 8 - %out.addr = alloca i32*, align 8 - %outidx.addr = alloca i32*, align 8 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i32* %data, i32** %data.addr, align 8 - store i32* %gm_codewords, i32** %gm_codewords.addr, align 8 - store i32* %gm_codewordlens, i32** %gm_codewordlens.addr, align 8 - store i32* %cw32, i32** %cw32.addr, align 8 - store i32* %cw32len, i32** %cw32len.addr, align 8 - store i32* %cw32idx, i32** %cw32idx.addr, align 8 - store i32* %out, i32** %out.addr, align 8 - store i32* %outidx, i32** %outidx.addr, align 8 - %kernel_args = alloca i8*, i64 8, align 16 - %0 = bitcast i32** %data.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32** %gm_codewords.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32** %gm_codewordlens.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32** %cw32.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32** %cw32len.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32** %cw32idx.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = bitcast i32** %out.addr to i8* - %13 = getelementptr i8*, i8** %kernel_args, i32 6 - store i8* %12, i8** %13 - %14 = bitcast i32** %outidx.addr to i8* - %15 = getelementptr i8*, i8** %kernel_args, i32 7 - store i8* %14, i8** %15 - %16 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %17 = load i64, i64* %shmem_size, align 8 - %18 = load i8*, i8** %stream, align 8 - %19 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %20 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %19, i8* align 8 %20, i64 12, i1 false) - %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %22 = load i64, i64* %21, align 8 - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %24 = load i32, i32* %23, align 8 - %25 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %26 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %25, i8* align 8 %26, i64 12, i1 false) - %27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %28 = load i64, i64* %27, align 8 - %29 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %30 = load i32, i32* %29, align 8 - %31 = bitcast i8* %18 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*, i32*, i32*)* @_ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_ to i8*), i64 %22, i32 %24, i64 %28, i32 %30, i8** %kernel_args, i64 %17, %struct.CUstream_st* %31) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @cudaThreadSynchronize() #1 - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL17preallocBlockSumsj(i32 %maxNumElements) #0 { -entry: - %maxNumElements.addr = alloca i32, align 4 - %blockSize = alloca i32, align 4 - %numElts = alloca i32, align 4 - %level = alloca i32, align 4 - %numBlocks = alloca i32, align 4 - %numBlocks19 = alloca i32, align 4 - %err = alloca i32, align 4 - %err52 = alloca i32, align 4 - store i32 %maxNumElements, i32* %maxNumElements.addr, align 4 - %0 = load i32, i32* @_ZL18g_numEltsAllocated, align 4 - %cmp = icmp eq i32 %0, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - br label %cond.end - -cond.false: ; preds = %entry - call void @__assert_fail(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.16, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 63, i8* getelementptr inbounds ([37 x i8], [37 x i8]* @__PRETTY_FUNCTION__._ZL17preallocBlockSumsj, i64 0, i64 0)) #16 - unreachable - -1: ; No predecessors! - br label %cond.end - -cond.end: ; preds = %1, %cond.true - %2 = load i32, i32* %maxNumElements.addr, align 4 - store i32 %2, i32* @_ZL18g_numEltsAllocated, align 4 - store i32 256, i32* %blockSize, align 4 - %3 = load i32, i32* %maxNumElements.addr, align 4 - store i32 %3, i32* %numElts, align 4 - store i32 0, i32* %level, align 4 - br label %do.body - -do.body: ; preds = %do.cond, %cond.end - %4 = load i32, i32* %numElts, align 4 - %conv = uitofp i32 %4 to float - %5 = load i32, i32* %blockSize, align 4 - %conv1 = uitofp i32 %5 to float - %mul = fmul contract float 2.000000e+00, %conv1 - %div = fdiv float %conv, %mul - %call = call float @_ZSt4ceilf(float %div) - %conv2 = fptosi float %call to i32 - %cmp3 = icmp sgt i32 1, %conv2 - br i1 %cmp3, label %cond.true4, label %cond.false5 - -cond.true4: ; preds = %do.body - br label %cond.end12 - -cond.false5: ; preds = %do.body - %6 = load i32, i32* %numElts, align 4 - %conv6 = uitofp i32 %6 to float - %7 = load i32, i32* %blockSize, align 4 - %conv7 = uitofp i32 %7 to float - %mul8 = fmul contract float 2.000000e+00, %conv7 - %div9 = fdiv float %conv6, %mul8 - %call10 = call float @_ZSt4ceilf(float %div9) - %conv11 = fptosi float %call10 to i32 - br label %cond.end12 - -cond.end12: ; preds = %cond.false5, %cond.true4 - %cond = phi i32 [ 1, %cond.true4 ], [ %conv11, %cond.false5 ] - store i32 %cond, i32* %numBlocks, align 4 - %8 = load i32, i32* %numBlocks, align 4 - %cmp13 = icmp ugt i32 %8, 1 - br i1 %cmp13, label %if.then, label %if.end - -if.then: ; preds = %cond.end12 - %9 = load i32, i32* %level, align 4 - %inc = add nsw i32 %9, 1 - store i32 %inc, i32* %level, align 4 - br label %if.end - -if.end: ; preds = %if.then, %cond.end12 - %10 = load i32, i32* %numBlocks, align 4 - store i32 %10, i32* %numElts, align 4 - br label %do.cond - -do.cond: ; preds = %if.end - %11 = load i32, i32* %numElts, align 4 - %cmp14 = icmp ugt i32 %11, 1 - br i1 %cmp14, label %do.body, label %do.end - -do.end: ; preds = %do.cond - %12 = load i32, i32* %level, align 4 - %conv15 = sext i32 %12 to i64 - %mul16 = mul i64 %conv15, 8 - %call17 = call noalias i8* @malloc(i64 %mul16) #3 - %13 = bitcast i8* %call17 to i32** - store i32** %13, i32*** @_ZL15g_scanBlockSums, align 8 - %14 = load i32, i32* %level, align 4 - store i32 %14, i32* @_ZL20g_numLevelsAllocated, align 4 - %15 = load i32, i32* %maxNumElements.addr, align 4 - store i32 %15, i32* %numElts, align 4 - store i32 0, i32* %level, align 4 - br label %do.body18 - -do.body18: ; preds = %do.cond49, %do.end - %16 = load i32, i32* %numElts, align 4 - %conv20 = uitofp i32 %16 to float - %17 = load i32, i32* %blockSize, align 4 - %conv21 = uitofp i32 %17 to float - %mul22 = fmul contract float 2.000000e+00, %conv21 - %div23 = fdiv float %conv20, %mul22 - %call24 = call float @_ZSt4ceilf(float %div23) - %conv25 = fptosi float %call24 to i32 - %cmp26 = icmp sgt i32 1, %conv25 - br i1 %cmp26, label %cond.true27, label %cond.false28 - -cond.true27: ; preds = %do.body18 - br label %cond.end35 - -cond.false28: ; preds = %do.body18 - %18 = load i32, i32* %numElts, align 4 - %conv29 = uitofp i32 %18 to float - %19 = load i32, i32* %blockSize, align 4 - %conv30 = uitofp i32 %19 to float - %mul31 = fmul contract float 2.000000e+00, %conv30 - %div32 = fdiv float %conv29, %mul31 - %call33 = call float @_ZSt4ceilf(float %div32) - %conv34 = fptosi float %call33 to i32 - br label %cond.end35 - -cond.end35: ; preds = %cond.false28, %cond.true27 - %cond36 = phi i32 [ 1, %cond.true27 ], [ %conv34, %cond.false28 ] - store i32 %cond36, i32* %numBlocks19, align 4 - %20 = load i32, i32* %numBlocks19, align 4 - %cmp37 = icmp ugt i32 %20, 1 - br i1 %cmp37, label %if.then38, label %if.end48 - -if.then38: ; preds = %cond.end35 - %21 = load i32**, i32*** @_ZL15g_scanBlockSums, align 8 - %22 = load i32, i32* %level, align 4 - %inc39 = add nsw i32 %22, 1 - store i32 %inc39, i32* %level, align 4 - %idxprom = sext i32 %22 to i64 - %arrayidx = getelementptr inbounds i32*, i32** %21, i64 %idxprom - %23 = bitcast i32** %arrayidx to i8** - %24 = load i32, i32* %numBlocks19, align 4 - %conv40 = zext i32 %24 to i64 - %mul41 = mul i64 %conv40, 4 - %call42 = call i32 @cudaMalloc(i8** %23, i64 %mul41) - store i32 %call42, i32* %err, align 4 - %25 = load i32, i32* %err, align 4 - %cmp43 = icmp ne i32 0, %25 - br i1 %cmp43, label %if.then44, label %if.end47 - -if.then44: ; preds = %if.then38 - %26 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %27 = load i32, i32* %err, align 4 - %call45 = call i8* @cudaGetErrorString(i32 %27) - %call46 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %26, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str.6, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 89, i8* %call45) - call void @exit(i32 1) #16 - unreachable - -if.end47: ; preds = %if.then38 - br label %if.end48 - -if.end48: ; preds = %if.end47, %cond.end35 - %28 = load i32, i32* %numBlocks19, align 4 - store i32 %28, i32* %numElts, align 4 - br label %do.cond49 - -do.cond49: ; preds = %if.end48 - %29 = load i32, i32* %numElts, align 4 - %cmp50 = icmp ugt i32 %29, 1 - br i1 %cmp50, label %do.body18, label %do.end51 - -do.end51: ; preds = %do.cond49 - %call53 = call i32 @cudaGetLastError() - store i32 %call53, i32* %err52, align 4 - %30 = load i32, i32* %err52, align 4 - %cmp54 = icmp ne i32 0, %30 - br i1 %cmp54, label %if.then55, label %if.end58 - -if.then55: ; preds = %do.end51 - %31 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %32 = load i32, i32* %err52, align 4 - %call56 = call i8* @cudaGetErrorString(i32 %32) - %call57 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %31, i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.18, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 93, i8* %call56) - call void @exit(i32 1) #16 - unreachable - -if.end58: ; preds = %do.end51 - ret void -} - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL12prescanArrayPjS_i(i32* %outArray, i32* %inArray, i32 %numElements) #0 { -entry: - %outArray.addr = alloca i32*, align 8 - %inArray.addr = alloca i32*, align 8 - %numElements.addr = alloca i32, align 4 - store i32* %outArray, i32** %outArray.addr, align 8 - store i32* %inArray, i32** %inArray.addr, align 8 - store i32 %numElements, i32* %numElements.addr, align 4 - %0 = load i32*, i32** %outArray.addr, align 8 - %1 = load i32*, i32** %inArray.addr, align 8 - %2 = load i32, i32* %numElements.addr, align 4 - call void @_ZL21prescanArrayRecursivePjPKjii(i32* %0, i32* %1, i32 %2, i32 0) - ret void -} - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL5pack2PjS_S_S_j(i32* %srcData, i32* %cindex, i32* %cindex2, i32* %dstData, i32 %original_num_block_elements) #0 { -entry: - %srcData.addr = alloca i32*, align 8 - %cindex.addr = alloca i32*, align 8 - %cindex2.addr = alloca i32*, align 8 - %dstData.addr = alloca i32*, align 8 - %original_num_block_elements.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i32* %srcData, i32** %srcData.addr, align 8 - store i32* %cindex, i32** %cindex.addr, align 8 - store i32* %cindex2, i32** %cindex2.addr, align 8 - store i32* %dstData, i32** %dstData.addr, align 8 - store i32 %original_num_block_elements, i32* %original_num_block_elements.addr, align 4 - %kernel_args = alloca i8*, i64 5, align 16 - %0 = bitcast i32** %srcData.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32** %cindex.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32** %cindex2.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32** %dstData.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %original_num_block_elements.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %11 = load i64, i64* %shmem_size, align 8 - %12 = load i8*, i8** %stream, align 8 - %13 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %14 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %13, i8* align 8 %14, i64 12, i1 false) - %15 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %16 = load i64, i64* %15, align 8 - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %18 = load i32, i32* %17, align 8 - %19 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %20 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %19, i8* align 8 %20, i64 12, i1 false) - %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %22 = load i64, i64* %21, align 8 - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %24 = load i32, i32* %23, align 8 - %25 = bitcast i8* %12 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32*, i32*, i32*, i32*, i32)* @_ZL5pack2PjS_S_S_j to i8*), i64 %16, i32 %18, i64 %22, i32 %24, i8** %kernel_args, i64 %11, %struct.CUstream_st* %25) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @cudaGetLastError() #1 - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL16deallocBlockSumsv() #0 { -entry: - %i = alloca i32, align 4 - %err = alloca i32, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* @_ZL20g_numLevelsAllocated, align 4 - %cmp = icmp ult i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load i32**, i32*** @_ZL15g_scanBlockSums, align 8 - %3 = load i32, i32* %i, align 4 - %idxprom = zext i32 %3 to i64 - %arrayidx = getelementptr inbounds i32*, i32** %2, i64 %idxprom - %4 = load i32*, i32** %arrayidx, align 8 - %5 = bitcast i32* %4 to i8* - %call = call i32 @cudaFree(i8* %5) - br label %for.inc - -for.inc: ; preds = %for.body - %6 = load i32, i32* %i, align 4 - %inc = add i32 %6, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %call1 = call i32 @cudaGetLastError() - store i32 %call1, i32* %err, align 4 - %7 = load i32, i32* %err, align 4 - %cmp2 = icmp ne i32 0, %7 - br i1 %cmp2, label %if.then, label %if.end - -if.then: ; preds = %for.end - %8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %9 = load i32, i32* %err, align 4 - %call3 = call i8* @cudaGetErrorString(i32 %9) - %call4 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str.25, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 101, i8* %call3) - call void @exit(i32 1) #16 - unreachable - -if.end: ; preds = %for.end - %10 = load i32**, i32*** @_ZL15g_scanBlockSums, align 8 - %11 = bitcast i32** %10 to i8** - %12 = bitcast i8** %11 to i8* - call void @free(i8* %12) #3 - store i32** null, i32*** @_ZL15g_scanBlockSums, align 8 - store i32 0, i32* @_ZL18g_numEltsAllocated, align 4 - store i32 0, i32* @_ZL20g_numLevelsAllocated, align 4 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i32 @_Z15compare_vectorsIjEiPT_S1_j(i32* %data1, i32* %data2, i32 %size) #0 comdat { -entry: - %data1.addr = alloca i32*, align 8 - %data2.addr = alloca i32*, align 8 - %size.addr = alloca i32, align 4 - %match = alloca i8, align 1 - %i = alloca i32, align 4 - store i32* %data1, i32** %data1.addr, align 8 - store i32* %data2, i32** %data2.addr, align 8 - store i32 %size, i32* %size.addr, align 4 - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.28, i64 0, i64 0)) - store i8 1, i8* %match, align 1 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %size.addr, align 4 - %cmp = icmp ult i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load i32*, i32** %data1.addr, align 8 - %3 = load i32, i32* %i, align 4 - %idxprom = zext i32 %3 to i64 - %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom - %4 = load i32, i32* %arrayidx, align 4 - %5 = load i32*, i32** %data2.addr, align 8 - %6 = load i32, i32* %i, align 4 - %idxprom1 = zext i32 %6 to i64 - %arrayidx2 = getelementptr inbounds i32, i32* %5, i64 %idxprom1 - %7 = load i32, i32* %arrayidx2, align 4 - %cmp3 = icmp ne i32 %4, %7 - br i1 %cmp3, label %if.then, label %if.end - -if.then: ; preds = %for.body - store i8 0, i8* %match, align 1 - %8 = load i32, i32* %i, align 4 - %9 = load i32*, i32** %data1.addr, align 8 - %10 = load i32, i32* %i, align 4 - %idxprom4 = zext i32 %10 to i64 - %arrayidx5 = getelementptr inbounds i32, i32* %9, i64 %idxprom4 - %11 = load i32, i32* %arrayidx5, align 4 - %12 = load i32, i32* %i, align 4 - %13 = load i32*, i32** %data2.addr, align 8 - %14 = load i32, i32* %i, align 4 - %idxprom6 = zext i32 %14 to i64 - %arrayidx7 = getelementptr inbounds i32, i32* %13, i64 %idxprom6 - %15 = load i32, i32* %arrayidx7, align 4 - %call8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.29, i64 0, i64 0), i32 %8, i32 %11, i32 %12, i32 %15) - br label %if.end - -if.end: ; preds = %if.then, %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %16 = load i32, i32* %i, align 4 - %inc = add i32 %16, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %17 = load i8, i8* %match, align 1 - %tobool = trunc i8 %17 to i1 - br i1 %tobool, label %if.then9, label %if.else - -if.then9: ; preds = %for.end - %call10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str.30, i64 0, i64 0)) - ret i32 0 - -if.else: ; preds = %for.end - %call11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.31, i64 0, i64 0)) - call void @exit(i32 1) #16 - unreachable -} - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #11 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN5INodeC2Ei(%class.INode* %this, i32 %f) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.INode*, align 8 - %f.addr = alloca i32, align 4 - store %class.INode* %this, %class.INode** %this.addr, align 8 - store i32 %f, i32* %f.addr, align 4 - %this1 = load %class.INode*, %class.INode** %this.addr, align 8 - %0 = bitcast %class.INode* %this1 to i32 (...)*** - store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV5INode, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %0, align 8 - %f2 = getelementptr inbounds %class.INode, %class.INode* %this1, i32 0, i32 1 - %1 = load i32, i32* %f.addr, align 4 - store i32 %1, i32* %f2, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN8LeafNodeD2Ev(%class.LeafNode* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.LeafNode*, align 8 - store %class.LeafNode* %this, %class.LeafNode** %this.addr, align 8 - %this1 = load %class.LeafNode*, %class.LeafNode** %this.addr, align 8 - %0 = bitcast %class.LeafNode* %this1 to %class.INode* - call void @_ZN5INodeD2Ev(%class.INode* %0) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN8LeafNodeD0Ev(%class.LeafNode* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %class.LeafNode*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %class.LeafNode* %this, %class.LeafNode** %this.addr, align 8 - %this1 = load %class.LeafNode*, %class.LeafNode** %this.addr, align 8 - invoke void @_ZN8LeafNodeD2Ev(%class.LeafNode* %this1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %0 = bitcast %class.LeafNode* %this1 to i8* - call void @_ZdlPv(i8* %0) #18 - ret void - -lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } - cleanup - %2 = extractvalue { i8*, i32 } %1, 0 - store i8* %2, i8** %exn.slot, align 8 - %3 = extractvalue { i8*, i32 } %1, 1 - store i32 %3, i32* %ehselector.slot, align 4 - %4 = bitcast %class.LeafNode* %this1 to i8* - call void @_ZdlPv(i8* %4) #18 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN5INodeD2Ev(%class.INode* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.INode*, align 8 - store %class.INode* %this, %class.INode** %this.addr, align 8 - %this1 = load %class.INode*, %class.INode** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN5INodeD0Ev(%class.INode* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %class.INode*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %class.INode* %this, %class.INode** %this.addr, align 8 - %this1 = load %class.INode*, %class.INode** %this.addr, align 8 - invoke void @_ZN5INodeD2Ev(%class.INode* %this1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %0 = bitcast %class.INode* %this1 to i8* - call void @_ZdlPv(i8* %0) #18 - ret void - -lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } - cleanup - %2 = extractvalue { i8*, i32 } %1, 0 - store i8* %2, i8** %exn.slot, align 8 - %3 = extractvalue { i8*, i32 } %1, 1 - store i32 %3, i32* %ehselector.slot, align 4 - %4 = bitcast %class.INode* %this1 to i8* - call void @_ZdlPv(i8* %4) #18 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN12InternalNodeD2Ev(%class.InternalNode* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %class.InternalNode*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %class.InternalNode* %this, %class.InternalNode** %this.addr, align 8 - %this1 = load %class.InternalNode*, %class.InternalNode** %this.addr, align 8 - %0 = bitcast %class.InternalNode* %this1 to i32 (...)*** - store i32 (...)** bitcast (i8** getelementptr inbounds ({ [4 x i8*] }, { [4 x i8*] }* @_ZTV12InternalNode, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %0, align 8 - %left = getelementptr inbounds %class.InternalNode, %class.InternalNode* %this1, i32 0, i32 1 - %1 = load %class.INode*, %class.INode** %left, align 8 - %isnull = icmp eq %class.INode* %1, null - br i1 %isnull, label %delete.end, label %delete.notnull - -delete.notnull: ; preds = %entry - %2 = bitcast %class.INode* %1 to void (%class.INode*)*** - %vtable = load void (%class.INode*)**, void (%class.INode*)*** %2, align 8 - %vfn = getelementptr inbounds void (%class.INode*)*, void (%class.INode*)** %vtable, i64 1 - %3 = load void (%class.INode*)*, void (%class.INode*)** %vfn, align 8 - invoke void %3(%class.INode* %1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %delete.notnull - br label %delete.end - -delete.end: ; preds = %invoke.cont, %entry - %right = getelementptr inbounds %class.InternalNode, %class.InternalNode* %this1, i32 0, i32 2 - %4 = load %class.INode*, %class.INode** %right, align 8 - %isnull2 = icmp eq %class.INode* %4, null - br i1 %isnull2, label %delete.end7, label %delete.notnull3 - -delete.notnull3: ; preds = %delete.end - %5 = bitcast %class.INode* %4 to void (%class.INode*)*** - %vtable4 = load void (%class.INode*)**, void (%class.INode*)*** %5, align 8 - %vfn5 = getelementptr inbounds void (%class.INode*)*, void (%class.INode*)** %vtable4, i64 1 - %6 = load void (%class.INode*)*, void (%class.INode*)** %vfn5, align 8 - invoke void %6(%class.INode* %4) - to label %invoke.cont6 unwind label %lpad - -invoke.cont6: ; preds = %delete.notnull3 - br label %delete.end7 - -delete.end7: ; preds = %invoke.cont6, %delete.end - %7 = bitcast %class.InternalNode* %this1 to %class.INode* - call void @_ZN5INodeD2Ev(%class.INode* %7) - ret void - -lpad: ; preds = %delete.notnull3, %delete.notnull - %8 = landingpad { i8*, i32 } - cleanup - %9 = extractvalue { i8*, i32 } %8, 0 - store i8* %9, i8** %exn.slot, align 8 - %10 = extractvalue { i8*, i32 } %8, 1 - store i32 %10, i32* %ehselector.slot, align 4 - %11 = bitcast %class.InternalNode* %this1 to %class.INode* - invoke void @_ZN5INodeD2Ev(%class.INode* %11) - to label %invoke.cont8 unwind label %terminate.lpad - -invoke.cont8: ; preds = %lpad - br label %eh.resume - -eh.resume: ; preds = %invoke.cont8 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val9 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val9 - -terminate.lpad: ; preds = %lpad - %12 = landingpad { i8*, i32 } - catch i8* null - %13 = extractvalue { i8*, i32 } %12, 0 - call void @__clang_call_terminate(i8* %13) #16 - unreachable -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN12InternalNodeD0Ev(%class.InternalNode* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %class.InternalNode*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %class.InternalNode* %this, %class.InternalNode** %this.addr, align 8 - %this1 = load %class.InternalNode*, %class.InternalNode** %this.addr, align 8 - invoke void @_ZN12InternalNodeD2Ev(%class.InternalNode* %this1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %0 = bitcast %class.InternalNode* %this1 to i8* - call void @_ZdlPv(i8* %0) #18 - ret void - -lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } - cleanup - %2 = extractvalue { i8*, i32 } %1, 0 - store i8* %2, i8** %exn.slot, align 8 - %3 = extractvalue { i8*, i32 } %1, 1 - store i32 %3, i32* %ehselector.slot, align 4 - %4 = bitcast %class.InternalNode* %this1 to i8* - call void @_ZdlPv(i8* %4) #18 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val2 -} - -declare dso_local i64 @ftell(%struct._IO_FILE*) #1 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEEC2Ev(%"class.std::map"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::map"*, align 8 - store %"class.std::map"* %this, %"class.std::map"** %this.addr, align 8 - %this1 = load %"class.std::map"*, %"class.std::map"** %this.addr, align 8 - %_M_t = getelementptr inbounds %"class.std::map", %"class.std::map"* %this1, i32 0, i32 0 - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EEC2Ev(%"class.std::_Rb_tree"* %_M_t) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIbSaIbEEC2Ev(%"class.std::vector.0"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - call void @_ZNSt13_Bvector_baseISaIbEEC2Ev(%"struct.std::_Bvector_base"* %0) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE5beginEv(%"class.std::map"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %this.addr = alloca %"class.std::map"*, align 8 - store %"class.std::map"* %this, %"class.std::map"** %this.addr, align 8 - %this1 = load %"class.std::map"*, %"class.std::map"** %this.addr, align 8 - %_M_t = getelementptr inbounds %"class.std::map", %"class.std::map"* %this1, i32 0, i32 0 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE5beginEv(%"class.std::_Rb_tree"* %_M_t) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - %coerce.dive2 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive2, align 8 - ret %"struct.std::_Rb_tree_node_base"* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2ERKSt17_Rb_tree_iteratorIS5_E(%"struct.std::_Rb_tree_const_iterator"* %this, %"struct.std::_Rb_tree_iterator"* dereferenceable(8) %__it) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_const_iterator"*, align 8 - %__it.addr = alloca %"struct.std::_Rb_tree_iterator"*, align 8 - store %"struct.std::_Rb_tree_const_iterator"* %this, %"struct.std::_Rb_tree_const_iterator"** %this.addr, align 8 - store %"struct.std::_Rb_tree_iterator"* %__it, %"struct.std::_Rb_tree_iterator"** %__it.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_const_iterator"*, %"struct.std::_Rb_tree_const_iterator"** %this.addr, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %this1, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_iterator"*, %"struct.std::_Rb_tree_iterator"** %__it.addr, align 8 - %_M_node2 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %0, i32 0, i32 0 - %1 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node2, align 8 - store %"struct.std::_Rb_tree_node_base"* %1, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local zeroext i1 @_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEneERKS6_(%"struct.std::_Rb_tree_const_iterator"* %this, %"struct.std::_Rb_tree_const_iterator"* dereferenceable(8) %__x) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_const_iterator"*, align 8 - %__x.addr = alloca %"struct.std::_Rb_tree_const_iterator"*, align 8 - store %"struct.std::_Rb_tree_const_iterator"* %this, %"struct.std::_Rb_tree_const_iterator"** %this.addr, align 8 - store %"struct.std::_Rb_tree_const_iterator"* %__x, %"struct.std::_Rb_tree_const_iterator"** %__x.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_const_iterator"*, %"struct.std::_Rb_tree_const_iterator"** %this.addr, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %this1, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - %1 = load %"struct.std::_Rb_tree_const_iterator"*, %"struct.std::_Rb_tree_const_iterator"** %__x.addr, align 8 - %_M_node2 = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %1, i32 0, i32 0 - %2 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node2, align 8 - %cmp = icmp ne %"struct.std::_Rb_tree_node_base"* %0, %2 - ret i1 %cmp -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE3endEv(%"class.std::map"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %this.addr = alloca %"class.std::map"*, align 8 - store %"class.std::map"* %this, %"class.std::map"** %this.addr, align 8 - %this1 = load %"class.std::map"*, %"class.std::map"** %this.addr, align 8 - %_M_t = getelementptr inbounds %"class.std::map", %"class.std::map"* %this1, i32 0, i32 0 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE3endEv(%"class.std::_Rb_tree"* %_M_t) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - %coerce.dive2 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive2, align 8 - ret %"struct.std::_Rb_tree_node_base"* %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZSt8distanceISt19_Bit_const_iteratorENSt15iterator_traitsIT_E15difference_typeES2_S2_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1) #0 comdat { -entry: - %__first = alloca %"struct.std::_Bit_const_iterator", align 8 - %__last = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp1 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp2 = alloca %"struct.std::random_access_iterator_tag", align 1 - %undef.agg.tmp = alloca %"struct.std::random_access_iterator_tag", align 1 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_const_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to i8* - %7 = bitcast %"struct.std::_Bit_const_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %6, i8* align 8 %7, i64 16, i1 false) - %8 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp1 to i8* - %9 = bitcast %"struct.std::_Bit_const_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %8, i8* align 8 %9, i64 16, i1 false) - call void @_ZSt19__iterator_categoryISt19_Bit_const_iteratorENSt15iterator_traitsIT_E17iterator_categoryERKS2_(%"struct.std::_Bit_const_iterator"* dereferenceable(16) %__first) - %10 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %11 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %10, i32 0, i32 0 - %12 = load i64*, i64** %11, align 8 - %13 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %10, i32 0, i32 1 - %14 = load i32, i32* %13, align 8 - %15 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp1 to { i64*, i32 }* - %16 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %15, i32 0, i32 0 - %17 = load i64*, i64** %16, align 8 - %18 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %15, i32 0, i32 1 - %19 = load i32, i32* %18, align 8 - %call = call i64 @_ZSt10__distanceISt19_Bit_const_iteratorENSt15iterator_traitsIT_E15difference_typeES2_S2_St26random_access_iterator_tag(i64* %12, i32 %14, i64* %17, i32 %19) - ret i64 %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::pair"* @_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEptEv(%"struct.std::_Rb_tree_const_iterator"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_const_iterator"*, align 8 - store %"struct.std::_Rb_tree_const_iterator"* %this, %"struct.std::_Rb_tree_const_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_const_iterator"*, %"struct.std::_Rb_tree_const_iterator"** %this.addr, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %this1, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - %1 = bitcast %"struct.std::_Rb_tree_node_base"* %0 to %"struct.std::_Rb_tree_node"* - %call = call %"struct.std::pair"* @_ZNKSt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEE9_M_valptrEv(%"struct.std::_Rb_tree_node"* %1) - ret %"struct.std::pair"* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZNKSt6vectorIbSaIbEE5beginEv(%"class.std::vector.0"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_const_iterator", align 8 - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 0 - call void @_ZNSt19_Bit_const_iteratorC2ERKSt13_Bit_iterator(%"struct.std::_Bit_const_iterator"* %retval, %"struct.std::_Bit_iterator"* dereferenceable(16) %_M_start) - %1 = bitcast %"struct.std::_Bit_const_iterator"* %retval to { i64*, i32 }* - %2 = load { i64*, i32 }, { i64*, i32 }* %1, align 8 - ret { i64*, i32 } %2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZNKSt6vectorIbSaIbEE3endEv(%"class.std::vector.0"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_const_iterator", align 8 - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 1 - call void @_ZNSt19_Bit_const_iteratorC2ERKSt13_Bit_iterator(%"struct.std::_Bit_const_iterator"* %retval, %"struct.std::_Bit_iterator"* dereferenceable(16) %_M_finish) - %1 = bitcast %"struct.std::_Bit_const_iterator"* %retval to { i64*, i32 }* - %2 = load { i64*, i32 }, { i64*, i32 }* %1, align 8 - ret { i64*, i32 } %2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local zeroext i1 @_ZNKSt6vectorIbSaIbEEixEm(%"class.std::vector.0"* %this, i64 %__n) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__n.addr = alloca i64, align 8 - %ref.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 0 - %1 = bitcast %"struct.std::_Bit_iterator"* %_M_start to %"struct.std::_Bit_iterator_base"* - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %1, i32 0, i32 0 - %2 = load i64*, i64** %_M_p, align 8 - %3 = load i64, i64* %__n.addr, align 8 - %div = udiv i64 %3, 64 - %add.ptr = getelementptr inbounds i64, i64* %2, i64 %div - %4 = load i64, i64* %__n.addr, align 8 - %rem = urem i64 %4, 64 - %conv = trunc i64 %rem to i32 - call void @_ZNSt19_Bit_const_iteratorC2EPmj(%"struct.std::_Bit_const_iterator"* %ref.tmp, i64* %add.ptr, i32 %conv) - %call = call zeroext i1 @_ZNKSt19_Bit_const_iteratordeEv(%"struct.std::_Bit_const_iterator"* %ref.tmp) - ret i1 %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local float @_ZSt3powfi(float %__x, i32 %__n) #6 comdat { -entry: - %__x.addr = alloca float, align 4 - %__n.addr = alloca i32, align 4 - store float %__x, float* %__x.addr, align 4 - store i32 %__n, i32* %__n.addr, align 4 - %0 = load float, float* %__x.addr, align 4 - %1 = load i32, i32* %__n.addr, align 4 - %2 = call float @llvm.powi.f32(float %0, i32 %1) - ret float %2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %"struct.std::_Rb_tree_const_iterator"* @_ZNSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEppEv(%"struct.std::_Rb_tree_const_iterator"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_const_iterator"*, align 8 - store %"struct.std::_Rb_tree_const_iterator"* %this, %"struct.std::_Rb_tree_const_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_const_iterator"*, %"struct.std::_Rb_tree_const_iterator"** %this.addr, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %this1, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZSt18_Rb_tree_incrementPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %0) #10 - %_M_node2 = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %this1, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call, %"struct.std::_Rb_tree_node_base"** %_M_node2, align 8 - ret %"struct.std::_Rb_tree_const_iterator"* %this1 -} - -; Function Attrs: nounwind -declare dso_local double @log(double) #11 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEED2Ev(%"class.std::map"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::map"*, align 8 - store %"class.std::map"* %this, %"class.std::map"** %this.addr, align 8 - %this1 = load %"class.std::map"*, %"class.std::map"** %this.addr, align 8 - %_M_t = getelementptr inbounds %"class.std::map", %"class.std::map"* %this1, i32 0, i32 0 - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EED2Ev(%"class.std::_Rb_tree"* %_M_t) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EEC2Ev(%"class.std::_Rb_tree"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13_Rb_tree_implIS9_Lb0EEC2Ev(%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13_Rb_tree_implIS9_Lb0EEC2Ev(%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %this, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"*, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %this1 to %"class.std::allocator.4"* - call void @_ZNSaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEC2Ev(%"class.std::allocator.4"* %0) #3 - %1 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %this1 to %"struct.std::_Rb_tree_key_compare"* - invoke void @_ZNSt20_Rb_tree_key_compareISt4lessIhEEC2Ev(%"struct.std::_Rb_tree_key_compare"* %1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %2 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %this1 to i8* - %3 = getelementptr inbounds i8, i8* %2, i64 8 - %4 = bitcast i8* %3 to %"struct.std::_Rb_tree_header"* - invoke void @_ZNSt15_Rb_tree_headerC2Ev(%"struct.std::_Rb_tree_header"* %4) - to label %invoke.cont2 unwind label %lpad - -invoke.cont2: ; preds = %invoke.cont - ret void - -lpad: ; preds = %invoke.cont, %entry - %5 = landingpad { i8*, i32 } - cleanup - %6 = extractvalue { i8*, i32 } %5, 0 - store i8* %6, i8** %exn.slot, align 8 - %7 = extractvalue { i8*, i32 } %5, 1 - store i32 %7, i32* %ehselector.slot, align 4 - %8 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %this1 to %"class.std::allocator.4"* - call void @_ZNSaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEED2Ev(%"class.std::allocator.4"* %8) #3 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEC2Ev(%"class.std::allocator.4"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.4"*, align 8 - store %"class.std::allocator.4"* %this, %"class.std::allocator.4"** %this.addr, align 8 - %this1 = load %"class.std::allocator.4"*, %"class.std::allocator.4"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator.4"* %this1 to %"class.__gnu_cxx::new_allocator.5"* - call void @_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEC2Ev(%"class.__gnu_cxx::new_allocator.5"* %0) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt20_Rb_tree_key_compareISt4lessIhEEC2Ev(%"struct.std::_Rb_tree_key_compare"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_key_compare"*, align 8 - store %"struct.std::_Rb_tree_key_compare"* %this, %"struct.std::_Rb_tree_key_compare"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_key_compare"*, %"struct.std::_Rb_tree_key_compare"** %this.addr, align 8 - %_M_key_compare = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %this1, i32 0, i32 0 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt15_Rb_tree_headerC2Ev(%"struct.std::_Rb_tree_header"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_header"*, align 8 - store %"struct.std::_Rb_tree_header"* %this, %"struct.std::_Rb_tree_header"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_header"*, %"struct.std::_Rb_tree_header"** %this.addr, align 8 - %_M_header = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %this1, i32 0, i32 0 - %_M_header2 = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %this1, i32 0, i32 0 - %_M_color = getelementptr inbounds %"struct.std::_Rb_tree_node_base", %"struct.std::_Rb_tree_node_base"* %_M_header2, i32 0, i32 0 - store i32 0, i32* %_M_color, align 8 - call void @_ZNSt15_Rb_tree_header8_M_resetEv(%"struct.std::_Rb_tree_header"* %this1) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEED2Ev(%"class.std::allocator.4"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.4"*, align 8 - store %"class.std::allocator.4"* %this, %"class.std::allocator.4"** %this.addr, align 8 - %this1 = load %"class.std::allocator.4"*, %"class.std::allocator.4"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator.4"* %this1 to %"class.__gnu_cxx::new_allocator.5"* - call void @_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEED2Ev(%"class.__gnu_cxx::new_allocator.5"* %0) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEC2Ev(%"class.__gnu_cxx::new_allocator.5"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.5"*, align 8 - store %"class.__gnu_cxx::new_allocator.5"* %this, %"class.__gnu_cxx::new_allocator.5"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.5"*, %"class.__gnu_cxx::new_allocator.5"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt15_Rb_tree_header8_M_resetEv(%"struct.std::_Rb_tree_header"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_header"*, align 8 - store %"struct.std::_Rb_tree_header"* %this, %"struct.std::_Rb_tree_header"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_header"*, %"struct.std::_Rb_tree_header"** %this.addr, align 8 - %_M_header = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %this1, i32 0, i32 0 - %_M_parent = getelementptr inbounds %"struct.std::_Rb_tree_node_base", %"struct.std::_Rb_tree_node_base"* %_M_header, i32 0, i32 1 - store %"struct.std::_Rb_tree_node_base"* null, %"struct.std::_Rb_tree_node_base"** %_M_parent, align 8 - %_M_header2 = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %this1, i32 0, i32 0 - %_M_header3 = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %this1, i32 0, i32 0 - %_M_left = getelementptr inbounds %"struct.std::_Rb_tree_node_base", %"struct.std::_Rb_tree_node_base"* %_M_header3, i32 0, i32 2 - store %"struct.std::_Rb_tree_node_base"* %_M_header2, %"struct.std::_Rb_tree_node_base"** %_M_left, align 8 - %_M_header4 = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %this1, i32 0, i32 0 - %_M_header5 = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %this1, i32 0, i32 0 - %_M_right = getelementptr inbounds %"struct.std::_Rb_tree_node_base", %"struct.std::_Rb_tree_node_base"* %_M_header5, i32 0, i32 3 - store %"struct.std::_Rb_tree_node_base"* %_M_header4, %"struct.std::_Rb_tree_node_base"** %_M_right, align 8 - %_M_node_count = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %this1, i32 0, i32 1 - store i64 0, i64* %_M_node_count, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEED2Ev(%"class.__gnu_cxx::new_allocator.5"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.5"*, align 8 - store %"class.__gnu_cxx::new_allocator.5"* %this, %"class.__gnu_cxx::new_allocator.5"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.5"*, %"class.__gnu_cxx::new_allocator.5"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt13_Bvector_baseISaIbEEC2Ev(%"struct.std::_Bvector_base"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bvector_base"*, align 8 - store %"struct.std::_Bvector_base"* %this, %"struct.std::_Bvector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Bvector_base"*, %"struct.std::_Bvector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - call void @_ZNSt13_Bvector_baseISaIbEE13_Bvector_implC2Ev(%"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt13_Bvector_baseISaIbEE13_Bvector_implC2Ev(%"struct.std::_Bvector_base >::_Bvector_impl"* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"struct.std::_Bvector_base >::_Bvector_impl"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"struct.std::_Bvector_base >::_Bvector_impl"* %this, %"struct.std::_Bvector_base >::_Bvector_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Bvector_base >::_Bvector_impl"*, %"struct.std::_Bvector_base >::_Bvector_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bvector_base >::_Bvector_impl"* %this1 to %"class.std::allocator.1"* - call void @_ZNSaImEC2Ev(%"class.std::allocator.1"* %0) #3 - %_M_start = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %this1, i32 0, i32 0 - invoke void @_ZNSt13_Bit_iteratorC2Ev(%"struct.std::_Bit_iterator"* %_M_start) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %_M_finish = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %this1, i32 0, i32 1 - invoke void @_ZNSt13_Bit_iteratorC2Ev(%"struct.std::_Bit_iterator"* %_M_finish) - to label %invoke.cont2 unwind label %lpad - -invoke.cont2: ; preds = %invoke.cont - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %this1, i32 0, i32 2 - store i64* null, i64** %_M_end_of_storage, align 8 - ret void - -lpad: ; preds = %invoke.cont, %entry - %1 = landingpad { i8*, i32 } - cleanup - %2 = extractvalue { i8*, i32 } %1, 0 - store i8* %2, i8** %exn.slot, align 8 - %3 = extractvalue { i8*, i32 } %1, 1 - store i32 %3, i32* %ehselector.slot, align 4 - %4 = bitcast %"struct.std::_Bvector_base >::_Bvector_impl"* %this1 to %"class.std::allocator.1"* - call void @_ZNSaImED2Ev(%"class.std::allocator.1"* %4) #3 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaImEC2Ev(%"class.std::allocator.1"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.1"*, align 8 - store %"class.std::allocator.1"* %this, %"class.std::allocator.1"** %this.addr, align 8 - %this1 = load %"class.std::allocator.1"*, %"class.std::allocator.1"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator.1"* %this1 to %"class.__gnu_cxx::new_allocator.2"* - call void @_ZN9__gnu_cxx13new_allocatorImEC2Ev(%"class.__gnu_cxx::new_allocator.2"* %0) #3 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt13_Bit_iteratorC2Ev(%"struct.std::_Bit_iterator"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_iterator"*, align 8 - store %"struct.std::_Bit_iterator"* %this, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Bit_iterator"*, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - call void @_ZNSt18_Bit_iterator_baseC2EPmj(%"struct.std::_Bit_iterator_base"* %0, i64* null, i32 0) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorImEC2Ev(%"class.__gnu_cxx::new_allocator.2"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.2"*, align 8 - store %"class.__gnu_cxx::new_allocator.2"* %this, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.2"*, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt18_Bit_iterator_baseC2EPmj(%"struct.std::_Bit_iterator_base"* %this, i64* %__x, i32 %__y) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_iterator_base"*, align 8 - %__x.addr = alloca i64*, align 8 - %__y.addr = alloca i32, align 4 - store %"struct.std::_Bit_iterator_base"* %this, %"struct.std::_Bit_iterator_base"** %this.addr, align 8 - store i64* %__x, i64** %__x.addr, align 8 - store i32 %__y, i32* %__y.addr, align 4 - %this1 = load %"struct.std::_Bit_iterator_base"*, %"struct.std::_Bit_iterator_base"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_iterator_base"* %this1 to %"struct.std::iterator"* - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 0 - %1 = load i64*, i64** %__x.addr, align 8 - store i64* %1, i64** %_M_p, align 8 - %_M_offset = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 1 - %2 = load i32, i32* %__y.addr, align 4 - store i32 %2, i32* %_M_offset, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorImED2Ev(%"class.__gnu_cxx::new_allocator.2"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.2"*, align 8 - store %"class.__gnu_cxx::new_allocator.2"* %this, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.2"*, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE5beginEv(%"class.std::_Rb_tree"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to i8* - %add.ptr = getelementptr inbounds i8, i8* %0, i64 8 - %1 = bitcast i8* %add.ptr to %"struct.std::_Rb_tree_header"* - %_M_header = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %1, i32 0, i32 0 - %_M_left = getelementptr inbounds %"struct.std::_Rb_tree_node_base", %"struct.std::_Rb_tree_node_base"* %_M_header, i32 0, i32 2 - %2 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_left, align 8 - call void @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2EPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_iterator"* %retval, %"struct.std::_Rb_tree_node_base"* %2) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %3 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - ret %"struct.std::_Rb_tree_node_base"* %3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2EPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_iterator"* %this, %"struct.std::_Rb_tree_node_base"* %__x) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_iterator"*, align 8 - %__x.addr = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - store %"struct.std::_Rb_tree_iterator"* %this, %"struct.std::_Rb_tree_iterator"** %this.addr, align 8 - store %"struct.std::_Rb_tree_node_base"* %__x, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_iterator"*, %"struct.std::_Rb_tree_iterator"** %this.addr, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %this1, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - store %"struct.std::_Rb_tree_node_base"* %0, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE3endEv(%"class.std::_Rb_tree"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to i8* - %add.ptr = getelementptr inbounds i8, i8* %0, i64 8 - %1 = bitcast i8* %add.ptr to %"struct.std::_Rb_tree_header"* - %_M_header = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %1, i32 0, i32 0 - call void @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2EPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_iterator"* %retval, %"struct.std::_Rb_tree_node_base"* %_M_header) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %2 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - ret %"struct.std::_Rb_tree_node_base"* %2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZSt10__distanceISt19_Bit_const_iteratorENSt15iterator_traitsIT_E15difference_typeES2_S2_St26random_access_iterator_tag(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1) #0 comdat { -entry: - %__first = alloca %"struct.std::_Bit_const_iterator", align 8 - %__last = alloca %"struct.std::_Bit_const_iterator", align 8 - %0 = alloca %"struct.std::random_access_iterator_tag", align 1 - %1 = bitcast %"struct.std::_Bit_const_iterator"* %__first to { i64*, i32 }* - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %1, i32 0, i32 0 - store i64* %__first.coerce0, i64** %2, align 8 - %3 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %1, i32 0, i32 1 - store i32 %__first.coerce1, i32* %3, align 8 - %4 = bitcast %"struct.std::_Bit_const_iterator"* %__last to { i64*, i32 }* - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %4, i32 0, i32 0 - store i64* %__last.coerce0, i64** %5, align 8 - %6 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %4, i32 0, i32 1 - store i32 %__last.coerce1, i32* %6, align 8 - %7 = bitcast %"struct.std::_Bit_const_iterator"* %__last to %"struct.std::_Bit_iterator_base"* - %8 = bitcast %"struct.std::_Bit_const_iterator"* %__first to %"struct.std::_Bit_iterator_base"* - %call = call i64 @_ZStmiRKSt18_Bit_iterator_baseS1_(%"struct.std::_Bit_iterator_base"* dereferenceable(16) %7, %"struct.std::_Bit_iterator_base"* dereferenceable(16) %8) - ret i64 %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZSt19__iterator_categoryISt19_Bit_const_iteratorENSt15iterator_traitsIT_E17iterator_categoryERKS2_(%"struct.std::_Bit_const_iterator"* dereferenceable(16) %0) #6 comdat { -entry: - %.addr = alloca %"struct.std::_Bit_const_iterator"*, align 8 - store %"struct.std::_Bit_const_iterator"* %0, %"struct.std::_Bit_const_iterator"** %.addr, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZStmiRKSt18_Bit_iterator_baseS1_(%"struct.std::_Bit_iterator_base"* dereferenceable(16) %__x, %"struct.std::_Bit_iterator_base"* dereferenceable(16) %__y) #6 comdat { -entry: - %__x.addr = alloca %"struct.std::_Bit_iterator_base"*, align 8 - %__y.addr = alloca %"struct.std::_Bit_iterator_base"*, align 8 - store %"struct.std::_Bit_iterator_base"* %__x, %"struct.std::_Bit_iterator_base"** %__x.addr, align 8 - store %"struct.std::_Bit_iterator_base"* %__y, %"struct.std::_Bit_iterator_base"** %__y.addr, align 8 - %0 = load %"struct.std::_Bit_iterator_base"*, %"struct.std::_Bit_iterator_base"** %__x.addr, align 8 - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %0, i32 0, i32 0 - %1 = load i64*, i64** %_M_p, align 8 - %2 = load %"struct.std::_Bit_iterator_base"*, %"struct.std::_Bit_iterator_base"** %__y.addr, align 8 - %_M_p1 = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %2, i32 0, i32 0 - %3 = load i64*, i64** %_M_p1, align 8 - %sub.ptr.lhs.cast = ptrtoint i64* %1 to i64 - %sub.ptr.rhs.cast = ptrtoint i64* %3 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - %mul = mul nsw i64 64, %sub.ptr.div - %4 = load %"struct.std::_Bit_iterator_base"*, %"struct.std::_Bit_iterator_base"** %__x.addr, align 8 - %_M_offset = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %4, i32 0, i32 1 - %5 = load i32, i32* %_M_offset, align 8 - %conv = zext i32 %5 to i64 - %add = add nsw i64 %mul, %conv - %6 = load %"struct.std::_Bit_iterator_base"*, %"struct.std::_Bit_iterator_base"** %__y.addr, align 8 - %_M_offset2 = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %6, i32 0, i32 1 - %7 = load i32, i32* %_M_offset2, align 8 - %conv3 = zext i32 %7 to i64 - %sub = sub nsw i64 %add, %conv3 - ret i64 %sub -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::pair"* @_ZNKSt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEE9_M_valptrEv(%"struct.std::_Rb_tree_node"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - store %"struct.std::_Rb_tree_node"* %this, %"struct.std::_Rb_tree_node"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %this.addr, align 8 - %_M_value_field = getelementptr inbounds %"struct.std::_Rb_tree_node", %"struct.std::_Rb_tree_node"* %this1, i32 0, i32 1 - %call = call %"struct.std::pair"* @_ZSt11__addressofIKSt4pairIKhSt6vectorIbSaIbEEEEPT_RS7_(%"struct.std::pair"* dereferenceable(48) %_M_value_field) - ret %"struct.std::pair"* %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %"struct.std::pair"* @_ZSt11__addressofIKSt4pairIKhSt6vectorIbSaIbEEEEPT_RS7_(%"struct.std::pair"* dereferenceable(48) %__r) #6 comdat { -entry: - %__r.addr = alloca %"struct.std::pair"*, align 8 - store %"struct.std::pair"* %__r, %"struct.std::pair"** %__r.addr, align 8 - %0 = load %"struct.std::pair"*, %"struct.std::pair"** %__r.addr, align 8 - ret %"struct.std::pair"* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt19_Bit_const_iteratorC2ERKSt13_Bit_iterator(%"struct.std::_Bit_const_iterator"* %this, %"struct.std::_Bit_iterator"* dereferenceable(16) %__x) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_const_iterator"*, align 8 - %__x.addr = alloca %"struct.std::_Bit_iterator"*, align 8 - store %"struct.std::_Bit_const_iterator"* %this, %"struct.std::_Bit_const_iterator"** %this.addr, align 8 - store %"struct.std::_Bit_iterator"* %__x, %"struct.std::_Bit_iterator"** %__x.addr, align 8 - %this1 = load %"struct.std::_Bit_const_iterator"*, %"struct.std::_Bit_const_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - %1 = load %"struct.std::_Bit_iterator"*, %"struct.std::_Bit_iterator"** %__x.addr, align 8 - %2 = bitcast %"struct.std::_Bit_iterator"* %1 to %"struct.std::_Bit_iterator_base"* - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %2, i32 0, i32 0 - %3 = load i64*, i64** %_M_p, align 8 - %4 = load %"struct.std::_Bit_iterator"*, %"struct.std::_Bit_iterator"** %__x.addr, align 8 - %5 = bitcast %"struct.std::_Bit_iterator"* %4 to %"struct.std::_Bit_iterator_base"* - %_M_offset = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %5, i32 0, i32 1 - %6 = load i32, i32* %_M_offset, align 8 - call void @_ZNSt18_Bit_iterator_baseC2EPmj(%"struct.std::_Bit_iterator_base"* %0, i64* %3, i32 %6) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt19_Bit_const_iteratorC2EPmj(%"struct.std::_Bit_const_iterator"* %this, i64* %__x, i32 %__y) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_const_iterator"*, align 8 - %__x.addr = alloca i64*, align 8 - %__y.addr = alloca i32, align 4 - store %"struct.std::_Bit_const_iterator"* %this, %"struct.std::_Bit_const_iterator"** %this.addr, align 8 - store i64* %__x, i64** %__x.addr, align 8 - store i32 %__y, i32* %__y.addr, align 4 - %this1 = load %"struct.std::_Bit_const_iterator"*, %"struct.std::_Bit_const_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - %1 = load i64*, i64** %__x.addr, align 8 - %2 = load i32, i32* %__y.addr, align 4 - call void @_ZNSt18_Bit_iterator_baseC2EPmj(%"struct.std::_Bit_iterator_base"* %0, i64* %1, i32 %2) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local zeroext i1 @_ZNKSt19_Bit_const_iteratordeEv(%"struct.std::_Bit_const_iterator"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_const_iterator"*, align 8 - %ref.tmp = alloca %"struct.std::_Bit_reference", align 8 - store %"struct.std::_Bit_const_iterator"* %this, %"struct.std::_Bit_const_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Bit_const_iterator"*, %"struct.std::_Bit_const_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %0, i32 0, i32 0 - %1 = load i64*, i64** %_M_p, align 8 - %2 = bitcast %"struct.std::_Bit_const_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - %_M_offset = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %2, i32 0, i32 1 - %3 = load i32, i32* %_M_offset, align 8 - %sh_prom = zext i32 %3 to i64 - %shl = shl i64 1, %sh_prom - call void @_ZNSt14_Bit_referenceC2EPmm(%"struct.std::_Bit_reference"* %ref.tmp, i64* %1, i64 %shl) - %call = call zeroext i1 @_ZNKSt14_Bit_referencecvbEv(%"struct.std::_Bit_reference"* %ref.tmp) - ret i1 %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt14_Bit_referenceC2EPmm(%"struct.std::_Bit_reference"* %this, i64* %__x, i64 %__y) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_reference"*, align 8 - %__x.addr = alloca i64*, align 8 - %__y.addr = alloca i64, align 8 - store %"struct.std::_Bit_reference"* %this, %"struct.std::_Bit_reference"** %this.addr, align 8 - store i64* %__x, i64** %__x.addr, align 8 - store i64 %__y, i64* %__y.addr, align 8 - %this1 = load %"struct.std::_Bit_reference"*, %"struct.std::_Bit_reference"** %this.addr, align 8 - %_M_p = getelementptr inbounds %"struct.std::_Bit_reference", %"struct.std::_Bit_reference"* %this1, i32 0, i32 0 - %0 = load i64*, i64** %__x.addr, align 8 - store i64* %0, i64** %_M_p, align 8 - %_M_mask = getelementptr inbounds %"struct.std::_Bit_reference", %"struct.std::_Bit_reference"* %this1, i32 0, i32 1 - %1 = load i64, i64* %__y.addr, align 8 - store i64 %1, i64* %_M_mask, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local zeroext i1 @_ZNKSt14_Bit_referencecvbEv(%"struct.std::_Bit_reference"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_reference"*, align 8 - store %"struct.std::_Bit_reference"* %this, %"struct.std::_Bit_reference"** %this.addr, align 8 - %this1 = load %"struct.std::_Bit_reference"*, %"struct.std::_Bit_reference"** %this.addr, align 8 - %_M_p = getelementptr inbounds %"struct.std::_Bit_reference", %"struct.std::_Bit_reference"* %this1, i32 0, i32 0 - %0 = load i64*, i64** %_M_p, align 8 - %1 = load i64, i64* %0, align 8 - %_M_mask = getelementptr inbounds %"struct.std::_Bit_reference", %"struct.std::_Bit_reference"* %this1, i32 0, i32 1 - %2 = load i64, i64* %_M_mask, align 8 - %and = and i64 %1, %2 - %tobool = icmp ne i64 %and, 0 - %lnot = xor i1 %tobool, true - %lnot2 = xor i1 %lnot, true - ret i1 %lnot2 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.powi.f32(float, i32) #13 - -; Function Attrs: nounwind readonly -declare dso_local %"struct.std::_Rb_tree_node_base"* @_ZSt18_Rb_tree_incrementPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"*) #14 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EED2Ev(%"class.std::_Rb_tree"* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %call = invoke %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_M_beginEv(%"class.std::_Rb_tree"* %this1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - invoke void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_M_eraseEPSt13_Rb_tree_nodeIS5_E(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node"* %call) - to label %invoke.cont2 unwind label %lpad - -invoke.cont2: ; preds = %invoke.cont - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13_Rb_tree_implIS9_Lb0EED2Ev(%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl) #3 - ret void - -lpad: ; preds = %invoke.cont, %entry - %0 = landingpad { i8*, i32 } - cleanup - %1 = extractvalue { i8*, i32 } %0, 0 - store i8* %1, i8** %exn.slot, align 8 - %2 = extractvalue { i8*, i32 } %0, 1 - store i32 %2, i32* %ehselector.slot, align 4 - %_M_impl3 = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13_Rb_tree_implIS9_Lb0EED2Ev(%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl3) #3 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val4 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val4 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_M_eraseEPSt13_Rb_tree_nodeIS5_E(%"class.std::_Rb_tree"* %this, %"struct.std::_Rb_tree_node"* %__x) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__x.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - %__y = alloca %"struct.std::_Rb_tree_node"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store %"struct.std::_Rb_tree_node"* %__x, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %0 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %cmp = icmp ne %"struct.std::_Rb_tree_node"* %0, null - br i1 %cmp, label %while.body, label %while.end - -while.body: ; preds = %while.cond - %1 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %2 = bitcast %"struct.std::_Rb_tree_node"* %1 to %"struct.std::_Rb_tree_node_base"* - %call = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_rightEPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %2) - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_M_eraseEPSt13_Rb_tree_nodeIS5_E(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node"* %call) - %3 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %4 = bitcast %"struct.std::_Rb_tree_node"* %3 to %"struct.std::_Rb_tree_node_base"* - %call2 = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE7_S_leftEPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %4) - store %"struct.std::_Rb_tree_node"* %call2, %"struct.std::_Rb_tree_node"** %__y, align 8 - %5 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS5_E(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node"* %5) - %6 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__y, align 8 - store %"struct.std::_Rb_tree_node"* %6, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - br label %while.cond - -while.end: ; preds = %while.cond - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_M_beginEv(%"class.std::_Rb_tree"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to i8* - %add.ptr = getelementptr inbounds i8, i8* %0, i64 8 - %1 = bitcast i8* %add.ptr to %"struct.std::_Rb_tree_header"* - %_M_header = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %1, i32 0, i32 0 - %_M_parent = getelementptr inbounds %"struct.std::_Rb_tree_node_base", %"struct.std::_Rb_tree_node_base"* %_M_header, i32 0, i32 1 - %2 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_parent, align 8 - %3 = bitcast %"struct.std::_Rb_tree_node_base"* %2 to %"struct.std::_Rb_tree_node"* - ret %"struct.std::_Rb_tree_node"* %3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13_Rb_tree_implIS9_Lb0EED2Ev(%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"*, align 8 - store %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %this, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"*, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %this1 to %"class.std::allocator.4"* - call void @_ZNSaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEED2Ev(%"class.std::allocator.4"* %0) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_rightEPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %__x) #6 comdat align 2 { -entry: - %__x.addr = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - store %"struct.std::_Rb_tree_node_base"* %__x, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - %_M_right = getelementptr inbounds %"struct.std::_Rb_tree_node_base", %"struct.std::_Rb_tree_node_base"* %0, i32 0, i32 3 - %1 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_right, align 8 - %2 = bitcast %"struct.std::_Rb_tree_node_base"* %1 to %"struct.std::_Rb_tree_node"* - ret %"struct.std::_Rb_tree_node"* %2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE7_S_leftEPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %__x) #6 comdat align 2 { -entry: - %__x.addr = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - store %"struct.std::_Rb_tree_node_base"* %__x, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - %_M_left = getelementptr inbounds %"struct.std::_Rb_tree_node_base", %"struct.std::_Rb_tree_node_base"* %0, i32 0, i32 2 - %1 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_left, align 8 - %2 = bitcast %"struct.std::_Rb_tree_node_base"* %1 to %"struct.std::_Rb_tree_node"* - ret %"struct.std::_Rb_tree_node"* %2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE12_M_drop_nodeEPSt13_Rb_tree_nodeIS5_E(%"class.std::_Rb_tree"* %this, %"struct.std::_Rb_tree_node"* %__p) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__p.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store %"struct.std::_Rb_tree_node"* %__p, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %0 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE15_M_destroy_nodeEPSt13_Rb_tree_nodeIS5_E(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node"* %0) - %1 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_put_nodeEPSt13_Rb_tree_nodeIS5_E(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node"* %1) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE15_M_destroy_nodeEPSt13_Rb_tree_nodeIS5_E(%"class.std::_Rb_tree"* %this, %"struct.std::_Rb_tree_node"* %__p) #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__p.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - %ref.tmp = alloca %"class.std::allocator.7", align 1 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store %"struct.std::_Rb_tree_node"* %__p, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - call void @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13get_allocatorEv(%"class.std::allocator.7"* sret %ref.tmp, %"class.std::_Rb_tree"* %this1) - %0 = bitcast %"class.std::allocator.7"* %ref.tmp to %"class.__gnu_cxx::new_allocator.8"* - %1 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - %call = invoke %"struct.std::pair"* @_ZNSt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEE9_M_valptrEv(%"struct.std::_Rb_tree_node"* %1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - invoke void @_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEE7destroyEPS6_(%"class.__gnu_cxx::new_allocator.8"* %0, %"struct.std::pair"* %call) - to label %invoke.cont2 unwind label %lpad - -invoke.cont2: ; preds = %invoke.cont - call void @_ZNSaISt4pairIKhSt6vectorIbSaIbEEEED2Ev(%"class.std::allocator.7"* %ref.tmp) #3 - ret void - -lpad: ; preds = %invoke.cont, %entry - %2 = landingpad { i8*, i32 } - cleanup - %3 = extractvalue { i8*, i32 } %2, 0 - store i8* %3, i8** %exn.slot, align 8 - %4 = extractvalue { i8*, i32 } %2, 1 - store i32 %4, i32* %ehselector.slot, align 4 - call void @_ZNSaISt4pairIKhSt6vectorIbSaIbEEEED2Ev(%"class.std::allocator.7"* %ref.tmp) #3 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val3 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_put_nodeEPSt13_Rb_tree_nodeIS5_E(%"class.std::_Rb_tree"* %this, %"struct.std::_Rb_tree_node"* %__p) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__p.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store %"struct.std::_Rb_tree_node"* %__p, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %call = call dereferenceable(1) %"class.std::allocator.4"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE21_M_get_Node_allocatorEv(%"class.std::_Rb_tree"* %this1) - %0 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - call void @_ZN9__gnu_cxx14__alloc_traitsISaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEE10deallocateERS9_PS8_m(%"class.std::allocator.4"* dereferenceable(1) %call, %"struct.std::_Rb_tree_node"* %0, i64 1) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13get_allocatorEv(%"class.std::allocator.7"* noalias sret %agg.result, %"class.std::_Rb_tree"* %this) #0 comdat align 2 { -entry: - %result.ptr = alloca i8*, align 8 - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %0 = bitcast %"class.std::allocator.7"* %agg.result to i8* - store i8* %0, i8** %result.ptr, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %call = call dereferenceable(1) %"class.std::allocator.4"* @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE21_M_get_Node_allocatorEv(%"class.std::_Rb_tree"* %this1) - call void @_ZNSaISt4pairIKhSt6vectorIbSaIbEEEEC2ISt13_Rb_tree_nodeIS4_EEERKSaIT_E(%"class.std::allocator.7"* %agg.result, %"class.std::allocator.4"* dereferenceable(1) %call) #3 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEE7destroyEPS6_(%"class.__gnu_cxx::new_allocator.8"* %this, %"struct.std::pair"* %__p) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.8"*, align 8 - %__p.addr = alloca %"struct.std::pair"*, align 8 - store %"class.__gnu_cxx::new_allocator.8"* %this, %"class.__gnu_cxx::new_allocator.8"** %this.addr, align 8 - store %"struct.std::pair"* %__p, %"struct.std::pair"** %__p.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.8"*, %"class.__gnu_cxx::new_allocator.8"** %this.addr, align 8 - %0 = load %"struct.std::pair"*, %"struct.std::pair"** %__p.addr, align 8 - call void @_ZNSt4pairIKhSt6vectorIbSaIbEEED2Ev(%"struct.std::pair"* %0) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::pair"* @_ZNSt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEE9_M_valptrEv(%"struct.std::_Rb_tree_node"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - store %"struct.std::_Rb_tree_node"* %this, %"struct.std::_Rb_tree_node"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %this.addr, align 8 - %_M_value_field = getelementptr inbounds %"struct.std::_Rb_tree_node", %"struct.std::_Rb_tree_node"* %this1, i32 0, i32 1 - %call = call %"struct.std::pair"* @_ZSt11__addressofISt4pairIKhSt6vectorIbSaIbEEEEPT_RS6_(%"struct.std::pair"* dereferenceable(48) %_M_value_field) - ret %"struct.std::pair"* %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaISt4pairIKhSt6vectorIbSaIbEEEED2Ev(%"class.std::allocator.7"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.7"*, align 8 - store %"class.std::allocator.7"* %this, %"class.std::allocator.7"** %this.addr, align 8 - %this1 = load %"class.std::allocator.7"*, %"class.std::allocator.7"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator.7"* %this1 to %"class.__gnu_cxx::new_allocator.8"* - call void @_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEED2Ev(%"class.__gnu_cxx::new_allocator.8"* %0) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator.4"* @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE21_M_get_Node_allocatorEv(%"class.std::_Rb_tree"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to %"class.std::allocator.4"* - ret %"class.std::allocator.4"* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaISt4pairIKhSt6vectorIbSaIbEEEEC2ISt13_Rb_tree_nodeIS4_EEERKSaIT_E(%"class.std::allocator.7"* %this, %"class.std::allocator.4"* dereferenceable(1) %0) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.7"*, align 8 - %.addr = alloca %"class.std::allocator.4"*, align 8 - store %"class.std::allocator.7"* %this, %"class.std::allocator.7"** %this.addr, align 8 - store %"class.std::allocator.4"* %0, %"class.std::allocator.4"** %.addr, align 8 - %this1 = load %"class.std::allocator.7"*, %"class.std::allocator.7"** %this.addr, align 8 - %1 = bitcast %"class.std::allocator.7"* %this1 to %"class.__gnu_cxx::new_allocator.8"* - call void @_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEEC2Ev(%"class.__gnu_cxx::new_allocator.8"* %1) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEEC2Ev(%"class.__gnu_cxx::new_allocator.8"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.8"*, align 8 - store %"class.__gnu_cxx::new_allocator.8"* %this, %"class.__gnu_cxx::new_allocator.8"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.8"*, %"class.__gnu_cxx::new_allocator.8"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt4pairIKhSt6vectorIbSaIbEEED2Ev(%"struct.std::pair"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::pair"*, align 8 - store %"struct.std::pair"* %this, %"struct.std::pair"** %this.addr, align 8 - %this1 = load %"struct.std::pair"*, %"struct.std::pair"** %this.addr, align 8 - %second = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %this1, i32 0, i32 1 - call void @_ZNSt6vectorIbSaIbEED2Ev(%"class.std::vector.0"* %second) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %"struct.std::pair"* @_ZSt11__addressofISt4pairIKhSt6vectorIbSaIbEEEEPT_RS6_(%"struct.std::pair"* dereferenceable(48) %__r) #6 comdat { -entry: - %__r.addr = alloca %"struct.std::pair"*, align 8 - store %"struct.std::pair"* %__r, %"struct.std::pair"** %__r.addr, align 8 - %0 = load %"struct.std::pair"*, %"struct.std::pair"** %__r.addr, align 8 - ret %"struct.std::pair"* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEED2Ev(%"class.__gnu_cxx::new_allocator.8"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.8"*, align 8 - store %"class.__gnu_cxx::new_allocator.8"* %this, %"class.__gnu_cxx::new_allocator.8"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.8"*, %"class.__gnu_cxx::new_allocator.8"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEE10deallocateERS9_PS8_m(%"class.std::allocator.4"* dereferenceable(1) %__a, %"struct.std::_Rb_tree_node"* %__p, i64 %__n) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.4"*, align 8 - %__p.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::allocator.4"* %__a, %"class.std::allocator.4"** %__a.addr, align 8 - store %"struct.std::_Rb_tree_node"* %__p, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load %"class.std::allocator.4"*, %"class.std::allocator.4"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator.4"* %0 to %"class.__gnu_cxx::new_allocator.5"* - %2 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - %3 = load i64, i64* %__n.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEE10deallocateEPS8_m(%"class.__gnu_cxx::new_allocator.5"* %1, %"struct.std::_Rb_tree_node"* %2, i64 %3) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator.4"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE21_M_get_Node_allocatorEv(%"class.std::_Rb_tree"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to %"class.std::allocator.4"* - ret %"class.std::allocator.4"* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEE10deallocateEPS8_m(%"class.__gnu_cxx::new_allocator.5"* %this, %"struct.std::_Rb_tree_node"* %__p, i64 %0) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.5"*, align 8 - %__p.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - %.addr = alloca i64, align 8 - store %"class.__gnu_cxx::new_allocator.5"* %this, %"class.__gnu_cxx::new_allocator.5"** %this.addr, align 8 - store %"struct.std::_Rb_tree_node"* %__p, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - store i64 %0, i64* %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.5"*, %"class.__gnu_cxx::new_allocator.5"** %this.addr, align 8 - %1 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__p.addr, align 8 - %2 = bitcast %"struct.std::_Rb_tree_node"* %1 to i8* - call void @_ZdlPv(i8* %2) #3 - ret void -} - -; Function Attrs: noreturn nounwind -declare dso_local void @__assert_fail(i8*, i8*, i32, i8*) #5 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local float @_ZSt4ceilf(float %__x) #6 comdat { -entry: - %__x.addr = alloca float, align 4 - store float %__x, float* %__x.addr, align 4 - %0 = load float, float* %__x.addr, align 4 - %1 = call float @llvm.ceil.f32(float %0) - ret float %1 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.ceil.f32(float) #13 - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL21prescanArrayRecursivePjPKjii(i32* %outArray, i32* %inArray, i32 %numElements, i32 %level) #0 { -entry: - %outArray.addr = alloca i32*, align 8 - %inArray.addr = alloca i32*, align 8 - %numElements.addr = alloca i32, align 4 - %level.addr = alloca i32, align 4 - %blockSize = alloca i32, align 4 - %numBlocks = alloca i32, align 4 - %numThreads = alloca i32, align 4 - %numEltsPerBlock = alloca i32, align 4 - %numEltsLastBlock = alloca i32, align 4 - %numThreadsLastBlock = alloca i32, align 4 - %np2LastBlock = alloca i32, align 4 - %sharedMemLastBlock = alloca i32, align 4 - %extraSpace = alloca i32, align 4 - %extraSpace39 = alloca i32, align 4 - %sharedMemSize = alloca i32, align 4 - %grid = alloca %struct.dim3, align 4 - %threads = alloca %struct.dim3, align 4 - %err = alloca i32, align 4 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp60 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp60.coerce = alloca { i64, i32 }, align 4 - %err64 = alloca i32, align 4 - %agg.tmp73 = alloca %struct.dim3, align 4 - %agg.tmp74 = alloca %struct.dim3, align 4 - %agg.tmp73.coerce = alloca { i64, i32 }, align 4 - %agg.tmp74.coerce = alloca { i64, i32 }, align 4 - %err84 = alloca i32, align 4 - %agg.tmp97 = alloca %struct.dim3, align 4 - %agg.tmp98 = alloca %struct.dim3, align 4 - %agg.tmp97.coerce = alloca { i64, i32 }, align 4 - %agg.tmp98.coerce = alloca { i64, i32 }, align 4 - %err107 = alloca i32, align 4 - %agg.tmp116 = alloca %struct.dim3, align 4 - %agg.tmp117 = alloca %struct.dim3, align 4 - %agg.tmp116.coerce = alloca { i64, i32 }, align 4 - %agg.tmp117.coerce = alloca { i64, i32 }, align 4 - %err127 = alloca i32, align 4 - %agg.tmp138 = alloca %struct.dim3, align 4 - %agg.tmp139 = alloca %struct.dim3, align 4 - %agg.tmp138.coerce = alloca { i64, i32 }, align 4 - %agg.tmp139.coerce = alloca { i64, i32 }, align 4 - %err146 = alloca i32, align 4 - %agg.tmp154 = alloca %struct.dim3, align 4 - %agg.tmp155 = alloca %struct.dim3, align 4 - %agg.tmp154.coerce = alloca { i64, i32 }, align 4 - %agg.tmp155.coerce = alloca { i64, i32 }, align 4 - %err161 = alloca i32, align 4 - store i32* %outArray, i32** %outArray.addr, align 8 - store i32* %inArray, i32** %inArray.addr, align 8 - store i32 %numElements, i32* %numElements.addr, align 4 - store i32 %level, i32* %level.addr, align 4 - store i32 256, i32* %blockSize, align 4 - %0 = load i32, i32* %numElements.addr, align 4 - %conv = sitofp i32 %0 to float - %1 = load i32, i32* %blockSize, align 4 - %conv1 = uitofp i32 %1 to float - %mul = fmul contract float 2.000000e+00, %conv1 - %div = fdiv float %conv, %mul - %call = call float @_ZSt4ceilf(float %div) - %conv2 = fptosi float %call to i32 - %cmp = icmp sgt i32 1, %conv2 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - br label %cond.end - -cond.false: ; preds = %entry - %2 = load i32, i32* %numElements.addr, align 4 - %conv3 = sitofp i32 %2 to float - %3 = load i32, i32* %blockSize, align 4 - %conv4 = uitofp i32 %3 to float - %mul5 = fmul contract float 2.000000e+00, %conv4 - %div6 = fdiv float %conv3, %mul5 - %call7 = call float @_ZSt4ceilf(float %div6) - %conv8 = fptosi float %call7 to i32 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ 1, %cond.true ], [ %conv8, %cond.false ] - store i32 %cond, i32* %numBlocks, align 4 - %4 = load i32, i32* %numBlocks, align 4 - %cmp9 = icmp ugt i32 %4, 1 - br i1 %cmp9, label %if.then, label %if.else - -if.then: ; preds = %cond.end - %5 = load i32, i32* %blockSize, align 4 - store i32 %5, i32* %numThreads, align 4 - br label %if.end15 - -if.else: ; preds = %cond.end - %6 = load i32, i32* %numElements.addr, align 4 - %call10 = call zeroext i1 @_Z12isPowerOfTwoi(i32 %6) - br i1 %call10, label %if.then11, label %if.else13 - -if.then11: ; preds = %if.else - %7 = load i32, i32* %numElements.addr, align 4 - %div12 = sdiv i32 %7, 2 - store i32 %div12, i32* %numThreads, align 4 - br label %if.end - -if.else13: ; preds = %if.else - %8 = load i32, i32* %numElements.addr, align 4 - %call14 = call i32 @_Z9floorPow2i(i32 %8) - store i32 %call14, i32* %numThreads, align 4 - br label %if.end - -if.end: ; preds = %if.else13, %if.then11 - br label %if.end15 - -if.end15: ; preds = %if.end, %if.then - %9 = load i32, i32* %numThreads, align 4 - %mul16 = mul i32 %9, 2 - store i32 %mul16, i32* %numEltsPerBlock, align 4 - %10 = load i32, i32* %numElements.addr, align 4 - %11 = load i32, i32* %numBlocks, align 4 - %sub = sub i32 %11, 1 - %12 = load i32, i32* %numEltsPerBlock, align 4 - %mul17 = mul i32 %sub, %12 - %sub18 = sub i32 %10, %mul17 - store i32 %sub18, i32* %numEltsLastBlock, align 4 - %13 = load i32, i32* %numEltsLastBlock, align 4 - %div19 = udiv i32 %13, 2 - %cmp20 = icmp ugt i32 1, %div19 - br i1 %cmp20, label %cond.true21, label %cond.false22 - -cond.true21: ; preds = %if.end15 - br label %cond.end24 - -cond.false22: ; preds = %if.end15 - %14 = load i32, i32* %numEltsLastBlock, align 4 - %div23 = udiv i32 %14, 2 - br label %cond.end24 - -cond.end24: ; preds = %cond.false22, %cond.true21 - %cond25 = phi i32 [ 1, %cond.true21 ], [ %div23, %cond.false22 ] - store i32 %cond25, i32* %numThreadsLastBlock, align 4 - store i32 0, i32* %np2LastBlock, align 4 - store i32 0, i32* %sharedMemLastBlock, align 4 - %15 = load i32, i32* %numEltsLastBlock, align 4 - %16 = load i32, i32* %numEltsPerBlock, align 4 - %cmp26 = icmp ne i32 %15, %16 - br i1 %cmp26, label %if.then27, label %if.end38 - -if.then27: ; preds = %cond.end24 - store i32 1, i32* %np2LastBlock, align 4 - %17 = load i32, i32* %numEltsLastBlock, align 4 - %call28 = call zeroext i1 @_Z12isPowerOfTwoi(i32 %17) - br i1 %call28, label %if.end31, label %if.then29 - -if.then29: ; preds = %if.then27 - %18 = load i32, i32* %numEltsLastBlock, align 4 - %call30 = call i32 @_Z9floorPow2i(i32 %18) - store i32 %call30, i32* %numThreadsLastBlock, align 4 - br label %if.end31 - -if.end31: ; preds = %if.then29, %if.then27 - %19 = load i32, i32* %numThreadsLastBlock, align 4 - %mul32 = mul i32 2, %19 - %div33 = udiv i32 %mul32, 16 - store i32 %div33, i32* %extraSpace, align 4 - %20 = load i32, i32* %numThreadsLastBlock, align 4 - %mul34 = mul i32 2, %20 - %21 = load i32, i32* %extraSpace, align 4 - %add = add i32 %mul34, %21 - %conv35 = zext i32 %add to i64 - %mul36 = mul i64 4, %conv35 - %conv37 = trunc i64 %mul36 to i32 - store i32 %conv37, i32* %sharedMemLastBlock, align 4 - br label %if.end38 - -if.end38: ; preds = %if.end31, %cond.end24 - %22 = load i32, i32* %numEltsPerBlock, align 4 - %div40 = udiv i32 %22, 16 - store i32 %div40, i32* %extraSpace39, align 4 - %23 = load i32, i32* %numEltsPerBlock, align 4 - %24 = load i32, i32* %extraSpace39, align 4 - %add41 = add i32 %23, %24 - %conv42 = zext i32 %add41 to i64 - %mul43 = mul i64 4, %conv42 - %conv44 = trunc i64 %mul43 to i32 - store i32 %conv44, i32* %sharedMemSize, align 4 - %25 = load i32, i32* %numBlocks, align 4 - %26 = load i32, i32* %np2LastBlock, align 4 - %sub45 = sub i32 %25, %26 - %cmp46 = icmp ugt i32 1, %sub45 - br i1 %cmp46, label %cond.true47, label %cond.false48 - -cond.true47: ; preds = %if.end38 - br label %cond.end50 - -cond.false48: ; preds = %if.end38 - %27 = load i32, i32* %numBlocks, align 4 - %28 = load i32, i32* %np2LastBlock, align 4 - %sub49 = sub i32 %27, %28 - br label %cond.end50 - -cond.end50: ; preds = %cond.false48, %cond.true47 - %cond51 = phi i32 [ 1, %cond.true47 ], [ %sub49, %cond.false48 ] - call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid, i32 %cond51, i32 1, i32 1) - %29 = load i32, i32* %numThreads, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %threads, i32 %29, i32 1, i32 1) - %call52 = call i32 @cudaGetLastError() - store i32 %call52, i32* %err, align 4 - %30 = load i32, i32* %err, align 4 - %cmp53 = icmp ne i32 0, %30 - br i1 %cmp53, label %if.then54, label %if.end57 - -if.then54: ; preds = %cond.end50 - %31 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %32 = load i32, i32* %err, align 4 - %call55 = call i8* @cudaGetErrorString(i32 %32) - %call56 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %31, i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.19, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 163, i8* %call55) - call void @exit(i32 1) #16 - unreachable - -if.end57: ; preds = %cond.end50 - %33 = load i32, i32* %numBlocks, align 4 - %cmp58 = icmp ugt i32 %33, 1 - br i1 %cmp58, label %if.then59, label %if.else135 - -if.then59: ; preds = %if.end57 - %34 = bitcast %struct.dim3* %agg.tmp to i8* - %35 = bitcast %struct.dim3* %grid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %34, i8* align 4 %35, i64 12, i1 false) - %36 = bitcast %struct.dim3* %agg.tmp60 to i8* - %37 = bitcast %struct.dim3* %threads to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %36, i8* align 4 %37, i64 12, i1 false) - %38 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %39 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %38, i8* align 4 %39, i64 12, i1 false) - %40 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %41 = load i64, i64* %40, align 4 - %42 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %43 = load i32, i32* %42, align 4 - %44 = bitcast { i64, i32 }* %agg.tmp60.coerce to i8* - %45 = bitcast %struct.dim3* %agg.tmp60 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %44, i8* align 4 %45, i64 12, i1 false) - %46 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp60.coerce, i32 0, i32 0 - %47 = load i64, i64* %46, align 4 - %48 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp60.coerce, i32 0, i32 1 - %49 = load i32, i32* %48, align 4 - %call61 = call i32 @__cudaPushCallConfiguration(i64 %41, i32 %43, i64 %47, i32 %49, i64 0, i8* null) - %tobool = icmp ne i32 %call61, 0 - br i1 %tobool, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %if.then59 - %50 = load i32*, i32** %outArray.addr, align 8 - %51 = load i32*, i32** %inArray.addr, align 8 - %52 = load i32**, i32*** @_ZL15g_scanBlockSums, align 8 - %53 = load i32, i32* %level.addr, align 4 - %idxprom = sext i32 %53 to i64 - %arrayidx = getelementptr inbounds i32*, i32** %52, i64 %idxprom - %54 = load i32*, i32** %arrayidx, align 8 - %55 = load i32, i32* %numThreads, align 4 - %mul62 = mul i32 %55, 2 - call void @_ZL7prescanILb1ELb0EEvPjPKjS0_iii(i32* %50, i32* %51, i32* %54, i32 %mul62, i32 0, i32 0) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %if.then59 - %call63 = call i32 @cudaThreadSynchronize() - %call65 = call i32 @cudaGetLastError() - store i32 %call65, i32* %err64, align 4 - %56 = load i32, i32* %err64, align 4 - %cmp66 = icmp ne i32 0, %56 - br i1 %cmp66, label %if.then67, label %if.end70 - -if.then67: ; preds = %kcall.end - %57 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %58 = load i32, i32* %err64, align 4 - %call68 = call i8* @cudaGetErrorString(i32 %58) - %call69 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %57, i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.20, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 170, i8* %call68) - call void @exit(i32 1) #16 - unreachable - -if.end70: ; preds = %kcall.end - %59 = load i32, i32* %np2LastBlock, align 4 - %tobool71 = icmp ne i32 %59, 0 - br i1 %tobool71, label %if.then72, label %if.end91 - -if.then72: ; preds = %if.end70 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp73, i32 1, i32 1, i32 1) - %60 = load i32, i32* %numThreadsLastBlock, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp74, i32 %60, i32 1, i32 1) - %61 = bitcast { i64, i32 }* %agg.tmp73.coerce to i8* - %62 = bitcast %struct.dim3* %agg.tmp73 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %61, i8* align 4 %62, i64 12, i1 false) - %63 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp73.coerce, i32 0, i32 0 - %64 = load i64, i64* %63, align 4 - %65 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp73.coerce, i32 0, i32 1 - %66 = load i32, i32* %65, align 4 - %67 = bitcast { i64, i32 }* %agg.tmp74.coerce to i8* - %68 = bitcast %struct.dim3* %agg.tmp74 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %67, i8* align 4 %68, i64 12, i1 false) - %69 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp74.coerce, i32 0, i32 0 - %70 = load i64, i64* %69, align 4 - %71 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp74.coerce, i32 0, i32 1 - %72 = load i32, i32* %71, align 4 - %call75 = call i32 @__cudaPushCallConfiguration(i64 %64, i32 %66, i64 %70, i32 %72, i64 0, i8* null) - %tobool76 = icmp ne i32 %call75, 0 - br i1 %tobool76, label %kcall.end82, label %kcall.configok77 - -kcall.configok77: ; preds = %if.then72 - %73 = load i32*, i32** %outArray.addr, align 8 - %74 = load i32*, i32** %inArray.addr, align 8 - %75 = load i32**, i32*** @_ZL15g_scanBlockSums, align 8 - %76 = load i32, i32* %level.addr, align 4 - %idxprom78 = sext i32 %76 to i64 - %arrayidx79 = getelementptr inbounds i32*, i32** %75, i64 %idxprom78 - %77 = load i32*, i32** %arrayidx79, align 8 - %78 = load i32, i32* %numEltsLastBlock, align 4 - %79 = load i32, i32* %numBlocks, align 4 - %sub80 = sub i32 %79, 1 - %80 = load i32, i32* %numElements.addr, align 4 - %81 = load i32, i32* %numEltsLastBlock, align 4 - %sub81 = sub i32 %80, %81 - call void @_ZL7prescanILb1ELb1EEvPjPKjS0_iii(i32* %73, i32* %74, i32* %77, i32 %78, i32 %sub80, i32 %sub81) - br label %kcall.end82 - -kcall.end82: ; preds = %kcall.configok77, %if.then72 - %call83 = call i32 @cudaThreadSynchronize() - %call85 = call i32 @cudaGetLastError() - store i32 %call85, i32* %err84, align 4 - %82 = load i32, i32* %err84, align 4 - %cmp86 = icmp ne i32 0, %82 - br i1 %cmp86, label %if.then87, label %if.end90 - -if.then87: ; preds = %kcall.end82 - %83 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %84 = load i32, i32* %err84, align 4 - %call88 = call i8* @cudaGetErrorString(i32 %84) - %call89 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %83, i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.21, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 176, i8* %call88) - call void @exit(i32 1) #16 - unreachable - -if.end90: ; preds = %kcall.end82 - br label %if.end91 - -if.end91: ; preds = %if.end90, %if.end70 - %85 = load i32**, i32*** @_ZL15g_scanBlockSums, align 8 - %86 = load i32, i32* %level.addr, align 4 - %idxprom92 = sext i32 %86 to i64 - %arrayidx93 = getelementptr inbounds i32*, i32** %85, i64 %idxprom92 - %87 = load i32*, i32** %arrayidx93, align 8 - %88 = load i32**, i32*** @_ZL15g_scanBlockSums, align 8 - %89 = load i32, i32* %level.addr, align 4 - %idxprom94 = sext i32 %89 to i64 - %arrayidx95 = getelementptr inbounds i32*, i32** %88, i64 %idxprom94 - %90 = load i32*, i32** %arrayidx95, align 8 - %91 = load i32, i32* %numBlocks, align 4 - %92 = load i32, i32* %level.addr, align 4 - %add96 = add nsw i32 %92, 1 - call void @_ZL21prescanArrayRecursivePjPKjii(i32* %87, i32* %90, i32 %91, i32 %add96) - %93 = bitcast %struct.dim3* %agg.tmp97 to i8* - %94 = bitcast %struct.dim3* %grid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %93, i8* align 4 %94, i64 12, i1 false) - %95 = bitcast %struct.dim3* %agg.tmp98 to i8* - %96 = bitcast %struct.dim3* %threads to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %95, i8* align 4 %96, i64 12, i1 false) - %97 = bitcast { i64, i32 }* %agg.tmp97.coerce to i8* - %98 = bitcast %struct.dim3* %agg.tmp97 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %97, i8* align 4 %98, i64 12, i1 false) - %99 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp97.coerce, i32 0, i32 0 - %100 = load i64, i64* %99, align 4 - %101 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp97.coerce, i32 0, i32 1 - %102 = load i32, i32* %101, align 4 - %103 = bitcast { i64, i32 }* %agg.tmp98.coerce to i8* - %104 = bitcast %struct.dim3* %agg.tmp98 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %103, i8* align 4 %104, i64 12, i1 false) - %105 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp98.coerce, i32 0, i32 0 - %106 = load i64, i64* %105, align 4 - %107 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp98.coerce, i32 0, i32 1 - %108 = load i32, i32* %107, align 4 - %call99 = call i32 @__cudaPushCallConfiguration(i64 %100, i32 %102, i64 %106, i32 %108, i64 0, i8* null) - %tobool100 = icmp ne i32 %call99, 0 - br i1 %tobool100, label %kcall.end105, label %kcall.configok101 - -kcall.configok101: ; preds = %if.end91 - %109 = load i32*, i32** %outArray.addr, align 8 - %110 = load i32**, i32*** @_ZL15g_scanBlockSums, align 8 - %111 = load i32, i32* %level.addr, align 4 - %idxprom102 = sext i32 %111 to i64 - %arrayidx103 = getelementptr inbounds i32*, i32** %110, i64 %idxprom102 - %112 = load i32*, i32** %arrayidx103, align 8 - %113 = load i32, i32* %numElements.addr, align 4 - %114 = load i32, i32* %numEltsLastBlock, align 4 - %sub104 = sub i32 %113, %114 - call void @_ZL10uniformAddPjS_iii(i32* %109, i32* %112, i32 %sub104, i32 0, i32 0) - br label %kcall.end105 - -kcall.end105: ; preds = %kcall.configok101, %if.end91 - %call106 = call i32 @cudaThreadSynchronize() - %call108 = call i32 @cudaGetLastError() - store i32 %call108, i32* %err107, align 4 - %115 = load i32, i32* %err107, align 4 - %cmp109 = icmp ne i32 0, %115 - br i1 %cmp109, label %if.then110, label %if.end113 - -if.then110: ; preds = %kcall.end105 - %116 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %117 = load i32, i32* %err107, align 4 - %call111 = call i8* @cudaGetErrorString(i32 %117) - %call112 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %116, i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.22, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 190, i8* %call111) - call void @exit(i32 1) #16 - unreachable - -if.end113: ; preds = %kcall.end105 - %118 = load i32, i32* %np2LastBlock, align 4 - %tobool114 = icmp ne i32 %118, 0 - br i1 %tobool114, label %if.then115, label %if.end134 - -if.then115: ; preds = %if.end113 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp116, i32 1, i32 1, i32 1) - %119 = load i32, i32* %numThreadsLastBlock, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp117, i32 %119, i32 1, i32 1) - %120 = bitcast { i64, i32 }* %agg.tmp116.coerce to i8* - %121 = bitcast %struct.dim3* %agg.tmp116 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %120, i8* align 4 %121, i64 12, i1 false) - %122 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp116.coerce, i32 0, i32 0 - %123 = load i64, i64* %122, align 4 - %124 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp116.coerce, i32 0, i32 1 - %125 = load i32, i32* %124, align 4 - %126 = bitcast { i64, i32 }* %agg.tmp117.coerce to i8* - %127 = bitcast %struct.dim3* %agg.tmp117 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %126, i8* align 4 %127, i64 12, i1 false) - %128 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp117.coerce, i32 0, i32 0 - %129 = load i64, i64* %128, align 4 - %130 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp117.coerce, i32 0, i32 1 - %131 = load i32, i32* %130, align 4 - %call118 = call i32 @__cudaPushCallConfiguration(i64 %123, i32 %125, i64 %129, i32 %131, i64 0, i8* null) - %tobool119 = icmp ne i32 %call118, 0 - br i1 %tobool119, label %kcall.end125, label %kcall.configok120 - -kcall.configok120: ; preds = %if.then115 - %132 = load i32*, i32** %outArray.addr, align 8 - %133 = load i32**, i32*** @_ZL15g_scanBlockSums, align 8 - %134 = load i32, i32* %level.addr, align 4 - %idxprom121 = sext i32 %134 to i64 - %arrayidx122 = getelementptr inbounds i32*, i32** %133, i64 %idxprom121 - %135 = load i32*, i32** %arrayidx122, align 8 - %136 = load i32, i32* %numEltsLastBlock, align 4 - %137 = load i32, i32* %numBlocks, align 4 - %sub123 = sub i32 %137, 1 - %138 = load i32, i32* %numElements.addr, align 4 - %139 = load i32, i32* %numEltsLastBlock, align 4 - %sub124 = sub i32 %138, %139 - call void @_ZL10uniformAddPjS_iii(i32* %132, i32* %135, i32 %136, i32 %sub123, i32 %sub124) - br label %kcall.end125 - -kcall.end125: ; preds = %kcall.configok120, %if.then115 - %call126 = call i32 @cudaThreadSynchronize() - %call128 = call i32 @cudaGetLastError() - store i32 %call128, i32* %err127, align 4 - %140 = load i32, i32* %err127, align 4 - %cmp129 = icmp ne i32 0, %140 - br i1 %cmp129, label %if.then130, label %if.end133 - -if.then130: ; preds = %kcall.end125 - %141 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %142 = load i32, i32* %err127, align 4 - %call131 = call i8* @cudaGetErrorString(i32 %142) - %call132 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %141, i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.22, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 196, i8* %call131) - call void @exit(i32 1) #16 - unreachable - -if.end133: ; preds = %kcall.end125 - br label %if.end134 - -if.end134: ; preds = %if.end133, %if.end113 - br label %if.end169 - -if.else135: ; preds = %if.end57 - %143 = load i32, i32* %numElements.addr, align 4 - %call136 = call zeroext i1 @_Z12isPowerOfTwoi(i32 %143) - br i1 %call136, label %if.then137, label %if.else153 - -if.then137: ; preds = %if.else135 - %144 = bitcast %struct.dim3* %agg.tmp138 to i8* - %145 = bitcast %struct.dim3* %grid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %144, i8* align 4 %145, i64 12, i1 false) - %146 = bitcast %struct.dim3* %agg.tmp139 to i8* - %147 = bitcast %struct.dim3* %threads to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %146, i8* align 4 %147, i64 12, i1 false) - %148 = bitcast { i64, i32 }* %agg.tmp138.coerce to i8* - %149 = bitcast %struct.dim3* %agg.tmp138 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %148, i8* align 4 %149, i64 12, i1 false) - %150 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp138.coerce, i32 0, i32 0 - %151 = load i64, i64* %150, align 4 - %152 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp138.coerce, i32 0, i32 1 - %153 = load i32, i32* %152, align 4 - %154 = bitcast { i64, i32 }* %agg.tmp139.coerce to i8* - %155 = bitcast %struct.dim3* %agg.tmp139 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %154, i8* align 4 %155, i64 12, i1 false) - %156 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp139.coerce, i32 0, i32 0 - %157 = load i64, i64* %156, align 4 - %158 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp139.coerce, i32 0, i32 1 - %159 = load i32, i32* %158, align 4 - %call140 = call i32 @__cudaPushCallConfiguration(i64 %151, i32 %153, i64 %157, i32 %159, i64 0, i8* null) - %tobool141 = icmp ne i32 %call140, 0 - br i1 %tobool141, label %kcall.end144, label %kcall.configok142 - -kcall.configok142: ; preds = %if.then137 - %160 = load i32*, i32** %outArray.addr, align 8 - %161 = load i32*, i32** %inArray.addr, align 8 - %162 = load i32, i32* %numThreads, align 4 - %mul143 = mul i32 %162, 2 - call void @_ZL7prescanILb0ELb0EEvPjPKjS0_iii(i32* %160, i32* %161, i32* null, i32 %mul143, i32 0, i32 0) - br label %kcall.end144 - -kcall.end144: ; preds = %kcall.configok142, %if.then137 - %call145 = call i32 @cudaThreadSynchronize() - %call147 = call i32 @cudaGetLastError() - store i32 %call147, i32* %err146, align 4 - %163 = load i32, i32* %err146, align 4 - %cmp148 = icmp ne i32 0, %163 - br i1 %cmp148, label %if.then149, label %if.end152 - -if.then149: ; preds = %kcall.end144 - %164 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %165 = load i32, i32* %err146, align 4 - %call150 = call i8* @cudaGetErrorString(i32 %165) - %call151 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %164, i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.23, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 202, i8* %call150) - call void @exit(i32 1) #16 - unreachable - -if.end152: ; preds = %kcall.end144 - br label %if.end168 - -if.else153: ; preds = %if.else135 - %166 = bitcast %struct.dim3* %agg.tmp154 to i8* - %167 = bitcast %struct.dim3* %grid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %166, i8* align 4 %167, i64 12, i1 false) - %168 = bitcast %struct.dim3* %agg.tmp155 to i8* - %169 = bitcast %struct.dim3* %threads to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %168, i8* align 4 %169, i64 12, i1 false) - %170 = bitcast { i64, i32 }* %agg.tmp154.coerce to i8* - %171 = bitcast %struct.dim3* %agg.tmp154 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %170, i8* align 4 %171, i64 12, i1 false) - %172 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp154.coerce, i32 0, i32 0 - %173 = load i64, i64* %172, align 4 - %174 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp154.coerce, i32 0, i32 1 - %175 = load i32, i32* %174, align 4 - %176 = bitcast { i64, i32 }* %agg.tmp155.coerce to i8* - %177 = bitcast %struct.dim3* %agg.tmp155 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %176, i8* align 4 %177, i64 12, i1 false) - %178 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp155.coerce, i32 0, i32 0 - %179 = load i64, i64* %178, align 4 - %180 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp155.coerce, i32 0, i32 1 - %181 = load i32, i32* %180, align 4 - %call156 = call i32 @__cudaPushCallConfiguration(i64 %173, i32 %175, i64 %179, i32 %181, i64 0, i8* null) - %tobool157 = icmp ne i32 %call156, 0 - br i1 %tobool157, label %kcall.end159, label %kcall.configok158 - -kcall.configok158: ; preds = %if.else153 - %182 = load i32*, i32** %outArray.addr, align 8 - %183 = load i32*, i32** %inArray.addr, align 8 - %184 = load i32, i32* %numElements.addr, align 4 - call void @_ZL7prescanILb0ELb1EEvPjPKjS0_iii(i32* %182, i32* %183, i32* null, i32 %184, i32 0, i32 0) - br label %kcall.end159 - -kcall.end159: ; preds = %kcall.configok158, %if.else153 - %call160 = call i32 @cudaThreadSynchronize() - %call162 = call i32 @cudaGetLastError() - store i32 %call162, i32* %err161, align 4 - %185 = load i32, i32* %err161, align 4 - %cmp163 = icmp ne i32 0, %185 - br i1 %cmp163, label %if.then164, label %if.end167 - -if.then164: ; preds = %kcall.end159 - %186 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %187 = load i32, i32* %err161, align 4 - %call165 = call i8* @cudaGetErrorString(i32 %187) - %call166 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %186, i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.11, i64 0, i64 0), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.24, i64 0, i64 0), i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.17, i64 0, i64 0), i32 207, i8* %call165) - call void @exit(i32 1) #16 - unreachable - -if.end167: ; preds = %kcall.end159 - br label %if.end168 - -if.end168: ; preds = %if.end167, %if.end152 - br label %if.end169 - -if.end169: ; preds = %if.end168, %if.end134 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local zeroext i1 @_Z12isPowerOfTwoi(i32 %n) #6 comdat { -entry: - %n.addr = alloca i32, align 4 - store i32 %n, i32* %n.addr, align 4 - %0 = load i32, i32* %n.addr, align 4 - %1 = load i32, i32* %n.addr, align 4 - %sub = sub nsw i32 %1, 1 - %and = and i32 %0, %sub - %cmp = icmp eq i32 %and, 0 - ret i1 %cmp -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i32 @_Z9floorPow2i(i32 %n) #0 comdat { -entry: - %n.addr = alloca i32, align 4 - %exp = alloca i32, align 4 - store i32 %n, i32* %n.addr, align 4 - %0 = load i32, i32* %n.addr, align 4 - %conv = sitofp i32 %0 to float - %call = call float @_ZSt5frexpfPi(float %conv, i32* %exp) - %1 = load i32, i32* %exp, align 4 - %sub = sub nsw i32 %1, 1 - %shl = shl i32 1, %sub - ret i32 %shl -} - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL7prescanILb1ELb0EEvPjPKjS0_iii(i32* %g_odata, i32* %g_idata, i32* %g_blockSums, i32 %n, i32 %blockIndex, i32 %baseIndex) #0 { -entry: - %g_odata.addr = alloca i32*, align 8 - %g_idata.addr = alloca i32*, align 8 - %g_blockSums.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %blockIndex.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i32* %g_odata, i32** %g_odata.addr, align 8 - store i32* %g_idata, i32** %g_idata.addr, align 8 - store i32* %g_blockSums, i32** %g_blockSums.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - %kernel_args = alloca i8*, i64 6, align 16 - %0 = bitcast i32** %g_odata.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32** %g_idata.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32** %g_blockSums.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32* %n.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %blockIndex.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32* %baseIndex.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %13 = load i64, i64* %shmem_size, align 8 - %14 = load i8*, i8** %stream, align 8 - %15 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %16 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %22 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false) - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %24 = load i64, i64* %23, align 8 - %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %26 = load i32, i32* %25, align 8 - %27 = bitcast i8* %14 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb1ELb0EEvPjPKjS0_iii to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL7prescanILb1ELb1EEvPjPKjS0_iii(i32* %g_odata, i32* %g_idata, i32* %g_blockSums, i32 %n, i32 %blockIndex, i32 %baseIndex) #0 { -entry: - %g_odata.addr = alloca i32*, align 8 - %g_idata.addr = alloca i32*, align 8 - %g_blockSums.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %blockIndex.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i32* %g_odata, i32** %g_odata.addr, align 8 - store i32* %g_idata, i32** %g_idata.addr, align 8 - store i32* %g_blockSums, i32** %g_blockSums.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - %kernel_args = alloca i8*, i64 6, align 16 - %0 = bitcast i32** %g_odata.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32** %g_idata.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32** %g_blockSums.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32* %n.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %blockIndex.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32* %baseIndex.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %13 = load i64, i64* %shmem_size, align 8 - %14 = load i8*, i8** %stream, align 8 - %15 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %16 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %22 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false) - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %24 = load i64, i64* %23, align 8 - %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %26 = load i32, i32* %25, align 8 - %27 = bitcast i8* %14 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb1ELb1EEvPjPKjS0_iii to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL10uniformAddPjS_iii(i32* %g_data, i32* %uniforms, i32 %n, i32 %blockOffset, i32 %baseIndex) #0 { -entry: - %g_data.addr = alloca i32*, align 8 - %uniforms.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %blockOffset.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i32* %g_data, i32** %g_data.addr, align 8 - store i32* %uniforms, i32** %uniforms.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %blockOffset, i32* %blockOffset.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - %kernel_args = alloca i8*, i64 5, align 16 - %0 = bitcast i32** %g_data.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32** %uniforms.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32* %n.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32* %blockOffset.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %baseIndex.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %11 = load i64, i64* %shmem_size, align 8 - %12 = load i8*, i8** %stream, align 8 - %13 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %14 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %13, i8* align 8 %14, i64 12, i1 false) - %15 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %16 = load i64, i64* %15, align 8 - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %18 = load i32, i32* %17, align 8 - %19 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %20 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %19, i8* align 8 %20, i64 12, i1 false) - %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %22 = load i64, i64* %21, align 8 - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %24 = load i32, i32* %23, align 8 - %25 = bitcast i8* %12 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32*, i32*, i32, i32, i32)* @_ZL10uniformAddPjS_iii to i8*), i64 %16, i32 %18, i64 %22, i32 %24, i8** %kernel_args, i64 %11, %struct.CUstream_st* %25) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL7prescanILb0ELb0EEvPjPKjS0_iii(i32* %g_odata, i32* %g_idata, i32* %g_blockSums, i32 %n, i32 %blockIndex, i32 %baseIndex) #0 { -entry: - %g_odata.addr = alloca i32*, align 8 - %g_idata.addr = alloca i32*, align 8 - %g_blockSums.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %blockIndex.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i32* %g_odata, i32** %g_odata.addr, align 8 - store i32* %g_idata, i32** %g_idata.addr, align 8 - store i32* %g_blockSums, i32** %g_blockSums.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - %kernel_args = alloca i8*, i64 6, align 16 - %0 = bitcast i32** %g_odata.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32** %g_idata.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32** %g_blockSums.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32* %n.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %blockIndex.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32* %baseIndex.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %13 = load i64, i64* %shmem_size, align 8 - %14 = load i8*, i8** %stream, align 8 - %15 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %16 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %22 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false) - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %24 = load i64, i64* %23, align 8 - %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %26 = load i32, i32* %25, align 8 - %27 = bitcast i8* %14 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb0ELb0EEvPjPKjS0_iii to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -; Function Attrs: noinline optnone uwtable -define internal void @_ZL7prescanILb0ELb1EEvPjPKjS0_iii(i32* %g_odata, i32* %g_idata, i32* %g_blockSums, i32 %n, i32 %blockIndex, i32 %baseIndex) #0 { -entry: - %g_odata.addr = alloca i32*, align 8 - %g_idata.addr = alloca i32*, align 8 - %g_blockSums.addr = alloca i32*, align 8 - %n.addr = alloca i32, align 4 - %blockIndex.addr = alloca i32, align 4 - %baseIndex.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i32* %g_odata, i32** %g_odata.addr, align 8 - store i32* %g_idata, i32** %g_idata.addr, align 8 - store i32* %g_blockSums, i32** %g_blockSums.addr, align 8 - store i32 %n, i32* %n.addr, align 4 - store i32 %blockIndex, i32* %blockIndex.addr, align 4 - store i32 %baseIndex, i32* %baseIndex.addr, align 4 - %kernel_args = alloca i8*, i64 6, align 16 - %0 = bitcast i32** %g_odata.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32** %g_idata.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32** %g_blockSums.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32* %n.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %blockIndex.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32* %baseIndex.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %13 = load i64, i64* %shmem_size, align 8 - %14 = load i8*, i8** %stream, align 8 - %15 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %16 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %22 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 12, i1 false) - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %24 = load i64, i64* %23, align 8 - %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %26 = load i32, i32* %25, align 8 - %27 = bitcast i8* %14 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb0ELb1EEvPjPKjS0_iii to i8*), i64 %18, i32 %20, i64 %24, i32 %26, i8** %kernel_args, i64 %13, %struct.CUstream_st* %27) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local float @_ZSt5frexpfPi(float %__x, i32* %__exp) #6 comdat { -entry: - %__x.addr = alloca float, align 4 - %__exp.addr = alloca i32*, align 8 - store float %__x, float* %__x.addr, align 4 - store i32* %__exp, i32** %__exp.addr, align 8 - %0 = load float, float* %__x.addr, align 4 - %1 = load i32*, i32** %__exp.addr, align 8 - %call = call float @frexpf(float %0, i32* %1) #3 - ret float %call -} - -; Function Attrs: nounwind -declare dso_local float @frexpf(float, i32*) #11 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseIP5INodeSaIS1_EEC2Ev(%"struct.std::_Vector_base"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - call void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implC2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implC2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base >::_Vector_impl"*, align 8 - store %"struct.std::_Vector_base >::_Vector_impl"* %this, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base >::_Vector_impl"*, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %this1 to %"class.std::allocator"* - call void @_ZNSaIP5INodeEC2Ev(%"class.std::allocator"* %0) #3 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 0 - store %class.INode** null, %class.INode*** %_M_start, align 8 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 1 - store %class.INode** null, %class.INode*** %_M_finish, align 8 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 2 - store %class.INode** null, %class.INode*** %_M_end_of_storage, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaIP5INodeEC2Ev(%"class.std::allocator"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator"*, align 8 - store %"class.std::allocator"* %this, %"class.std::allocator"** %this.addr, align 8 - %this1 = load %"class.std::allocator"*, %"class.std::allocator"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator"* %this1 to %"class.__gnu_cxx::new_allocator"* - call void @_ZN9__gnu_cxx13new_allocatorIP5INodeEC2Ev(%"class.__gnu_cxx::new_allocator"* %0) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorIP5INodeEC2Ev(%"class.__gnu_cxx::new_allocator"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt8_DestroyIPP5INodeS1_EvT_S3_RSaIT0_E(%class.INode** %__first, %class.INode** %__last, %"class.std::allocator"* dereferenceable(1) %0) #0 comdat { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - %.addr = alloca %"class.std::allocator"*, align 8 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - store %"class.std::allocator"* %0, %"class.std::allocator"** %.addr, align 8 - %1 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %2 = load %class.INode**, %class.INode*** %__last.addr, align 8 - call void @_ZSt8_DestroyIPP5INodeEvT_S3_(%class.INode** %1, %class.INode** %2) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator"* - ret %"class.std::allocator"* %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseIP5INodeSaIS1_EED2Ev(%"struct.std::_Vector_base"* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - %0 = load %class.INode**, %class.INode*** %_M_start, align 8 - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 2 - %1 = load %class.INode**, %class.INode*** %_M_end_of_storage, align 8 - %_M_impl3 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_start4 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl3, i32 0, i32 0 - %2 = load %class.INode**, %class.INode*** %_M_start4, align 8 - %sub.ptr.lhs.cast = ptrtoint %class.INode** %1 to i64 - %sub.ptr.rhs.cast = ptrtoint %class.INode** %2 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - invoke void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE13_M_deallocateEPS1_m(%"struct.std::_Vector_base"* %this1, %class.INode** %0, i64 %sub.ptr.div) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %_M_impl5 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - call void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implD2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl5) #3 - ret void - -lpad: ; preds = %entry - %3 = landingpad { i8*, i32 } - cleanup - %4 = extractvalue { i8*, i32 } %3, 0 - store i8* %4, i8** %exn.slot, align 8 - %5 = extractvalue { i8*, i32 } %3, 1 - store i32 %5, i32* %ehselector.slot, align 4 - %_M_impl6 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - call void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implD2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl6) #3 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val7 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val7 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt8_DestroyIPP5INodeEvT_S3_(%class.INode** %__first, %class.INode** %__last) #0 comdat { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - %0 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %1 = load %class.INode**, %class.INode*** %__last.addr, align 8 - call void @_ZNSt12_Destroy_auxILb1EE9__destroyIPP5INodeEEvT_S5_(%class.INode** %0, %class.INode** %1) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Destroy_auxILb1EE9__destroyIPP5INodeEEvT_S5_(%class.INode** %0, %class.INode** %1) #6 comdat align 2 { -entry: - %.addr = alloca %class.INode**, align 8 - %.addr1 = alloca %class.INode**, align 8 - store %class.INode** %0, %class.INode*** %.addr, align 8 - store %class.INode** %1, %class.INode*** %.addr1, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE13_M_deallocateEPS1_m(%"struct.std::_Vector_base"* %this, %class.INode** %__p, i64 %__n) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - %__p.addr = alloca %class.INode**, align 8 - %__n.addr = alloca i64, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - store %class.INode** %__p, %class.INode*** %__p.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %0 = load %class.INode**, %class.INode*** %__p.addr, align 8 - %tobool = icmp ne %class.INode** %0, null - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %1 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator"* - %2 = load %class.INode**, %class.INode*** %__p.addr, align 8 - %3 = load i64, i64* %__n.addr, align 8 - call void @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE10deallocateERS3_PS2_m(%"class.std::allocator"* dereferenceable(1) %1, %class.INode** %2, i64 %3) - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implD2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base >::_Vector_impl"*, align 8 - store %"struct.std::_Vector_base >::_Vector_impl"* %this, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base >::_Vector_impl"*, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %this1 to %"class.std::allocator"* - call void @_ZNSaIP5INodeED2Ev(%"class.std::allocator"* %0) #3 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE10deallocateERS3_PS2_m(%"class.std::allocator"* dereferenceable(1) %__a, %class.INode** %__p, i64 %__n) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - %__p.addr = alloca %class.INode**, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - store %class.INode** %__p, %class.INode*** %__p.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator"* %0 to %"class.__gnu_cxx::new_allocator"* - %2 = load %class.INode**, %class.INode*** %__p.addr, align 8 - %3 = load i64, i64* %__n.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorIP5INodeE10deallocateEPS2_m(%"class.__gnu_cxx::new_allocator"* %1, %class.INode** %2, i64 %3) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorIP5INodeE10deallocateEPS2_m(%"class.__gnu_cxx::new_allocator"* %this, %class.INode** %__p, i64 %0) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - %__p.addr = alloca %class.INode**, align 8 - %.addr = alloca i64, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - store %class.INode** %__p, %class.INode*** %__p.addr, align 8 - store i64 %0, i64* %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %1 = load %class.INode**, %class.INode*** %__p.addr, align 8 - %2 = bitcast %class.INode** %1 to i8* - call void @_ZdlPv(i8* %2) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaIP5INodeED2Ev(%"class.std::allocator"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator"*, align 8 - store %"class.std::allocator"* %this, %"class.std::allocator"** %this.addr, align 8 - %this1 = load %"class.std::allocator"*, %"class.std::allocator"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator"* %this1 to %"class.__gnu_cxx::new_allocator"* - call void @_ZN9__gnu_cxx13new_allocatorIP5INodeED2Ev(%"class.__gnu_cxx::new_allocator"* %0) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorIP5INodeED2Ev(%"class.__gnu_cxx::new_allocator"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIP5INodeSaIS1_EEC2ERKS3_(%"class.std::vector"* %this, %"class.std::vector"* dereferenceable(24) %__x) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - %__x.addr = alloca %"class.std::vector"*, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %agg.tmp5 = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - store %"class.std::vector"* %__x, %"class.std::vector"** %__x.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %1 = load %"class.std::vector"*, %"class.std::vector"** %__x.addr, align 8 - %call = call i64 @_ZNKSt6vectorIP5INodeSaIS1_EE4sizeEv(%"class.std::vector"* %1) - %2 = load %"class.std::vector"*, %"class.std::vector"** %__x.addr, align 8 - %3 = bitcast %"class.std::vector"* %2 to %"struct.std::_Vector_base"* - %call2 = call dereferenceable(1) %"class.std::allocator"* @_ZNKSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %3) - %call3 = call dereferenceable(1) %"class.std::allocator"* @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE17_S_select_on_copyERKS3_(%"class.std::allocator"* dereferenceable(1) %call2) - call void @_ZNSt12_Vector_baseIP5INodeSaIS1_EEC2EmRKS2_(%"struct.std::_Vector_base"* %0, i64 %call, %"class.std::allocator"* dereferenceable(1) %call3) - %4 = load %"class.std::vector"*, %"class.std::vector"** %__x.addr, align 8 - %call4 = invoke %class.INode** @_ZNKSt6vectorIP5INodeSaIS1_EE5beginEv(%"class.std::vector"* %4) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp, i32 0, i32 0 - store %class.INode** %call4, %class.INode*** %coerce.dive, align 8 - %5 = load %"class.std::vector"*, %"class.std::vector"** %__x.addr, align 8 - %call7 = invoke %class.INode** @_ZNKSt6vectorIP5INodeSaIS1_EE3endEv(%"class.std::vector"* %5) - to label %invoke.cont6 unwind label %lpad - -invoke.cont6: ; preds = %invoke.cont - %coerce.dive8 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp5, i32 0, i32 0 - store %class.INode** %call7, %class.INode*** %coerce.dive8, align 8 - %6 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %6, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - %7 = load %class.INode**, %class.INode*** %_M_start, align 8 - %8 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call10 = invoke dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %8) - to label %invoke.cont9 unwind label %lpad - -invoke.cont9: ; preds = %invoke.cont6 - %coerce.dive11 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp, i32 0, i32 0 - %9 = load %class.INode**, %class.INode*** %coerce.dive11, align 8 - %coerce.dive12 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp5, i32 0, i32 0 - %10 = load %class.INode**, %class.INode*** %coerce.dive12, align 8 - %call14 = invoke %class.INode** @_ZSt22__uninitialized_copy_aIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_S3_ET0_T_SC_SB_RSaIT1_E(%class.INode** %9, %class.INode** %10, %class.INode** %7, %"class.std::allocator"* dereferenceable(1) %call10) - to label %invoke.cont13 unwind label %lpad - -invoke.cont13: ; preds = %invoke.cont9 - %11 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl15 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %11, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl15, i32 0, i32 1 - store %class.INode** %call14, %class.INode*** %_M_finish, align 8 - ret void - -lpad: ; preds = %invoke.cont9, %invoke.cont6, %invoke.cont, %entry - %12 = landingpad { i8*, i32 } - cleanup - %13 = extractvalue { i8*, i32 } %12, 0 - store i8* %13, i8** %exn.slot, align 8 - %14 = extractvalue { i8*, i32 } %12, 1 - store i32 %14, i32* %ehselector.slot, align 4 - %15 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - invoke void @_ZNSt12_Vector_baseIP5INodeSaIS1_EED2Ev(%"struct.std::_Vector_base"* %15) - to label %invoke.cont16 unwind label %terminate.lpad - -invoke.cont16: ; preds = %lpad - br label %eh.resume - -eh.resume: ; preds = %invoke.cont16 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val17 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val17 - -terminate.lpad: ; preds = %lpad - %16 = landingpad { i8*, i32 } - catch i8* null - %17 = extractvalue { i8*, i32 } %16, 0 - call void @__clang_call_terminate(i8* %17) #16 - unreachable -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt9make_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEE7NodeCmpEvT_SA_T0_(%class.INode** %__first.coerce, %class.INode** %__last.coerce) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__last = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__comp = alloca %struct.NodeCmp, align 1 - %__cmp = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_iter", align 1 - %agg.tmp = alloca %struct.NodeCmp, align 1 - %agg.tmp2 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp3 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__last, i32 0, i32 0 - store %class.INode** %__last.coerce, %class.INode*** %coerce.dive1, align 8 - call void @_ZN9__gnu_cxx5__ops15_Iter_comp_iterI7NodeCmpEC2ES2_(%"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %__cmp) - %0 = bitcast %"class.__gnu_cxx::__normal_iterator"* %agg.tmp2 to i8* - %1 = bitcast %"class.__gnu_cxx::__normal_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) - %2 = bitcast %"class.__gnu_cxx::__normal_iterator"* %agg.tmp3 to i8* - %3 = bitcast %"class.__gnu_cxx::__normal_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %3, i64 8, i1 false) - %coerce.dive4 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp2, i32 0, i32 0 - %4 = load %class.INode**, %class.INode*** %coerce.dive4, align 8 - %coerce.dive5 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp3, i32 0, i32 0 - %5 = load %class.INode**, %class.INode*** %coerce.dive5, align 8 - call void @_ZSt11__make_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEENS0_5__ops15_Iter_comp_iterI7NodeCmpEEEvT_SD_RT0_(%class.INode** %4, %class.INode** %5, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* dereferenceable(1) %__cmp) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZNSt6vectorIP5INodeSaIS1_EE5beginEv(%"class.std::vector"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - call void @_ZN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEC2ERKS3_(%"class.__gnu_cxx::__normal_iterator"* %retval, %class.INode*** dereferenceable(8) %_M_start) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %retval, i32 0, i32 0 - %1 = load %class.INode**, %class.INode*** %coerce.dive, align 8 - ret %class.INode** %1 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZNSt6vectorIP5INodeSaIS1_EE3endEv(%"class.std::vector"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - call void @_ZN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEC2ERKS3_(%"class.__gnu_cxx::__normal_iterator"* %retval, %class.INode*** dereferenceable(8) %_M_finish) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %retval, i32 0, i32 0 - %1 = load %class.INode**, %class.INode*** %coerce.dive, align 8 - ret %class.INode** %1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorIP5INodeSaIS1_EE4sizeEv(%"class.std::vector"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - %1 = load %class.INode**, %class.INode*** %_M_finish, align 8 - %2 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %2, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 0 - %3 = load %class.INode**, %class.INode*** %_M_start, align 8 - %sub.ptr.lhs.cast = ptrtoint %class.INode** %1 to i64 - %sub.ptr.rhs.cast = ptrtoint %class.INode** %3 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - ret i64 %sub.ptr.div -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator"* @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE17_S_select_on_copyERKS3_(%"class.std::allocator"* dereferenceable(1) %__a) #6 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - ret %"class.std::allocator"* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator"* @_ZNKSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator"* - ret %"class.std::allocator"* %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseIP5INodeSaIS1_EEC2EmRKS2_(%"struct.std::_Vector_base"* %this, i64 %__n, %"class.std::allocator"* dereferenceable(1) %__a) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - %__n.addr = alloca i64, align 8 - %__a.addr = alloca %"class.std::allocator"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - call void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implC2ERKS2_(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, %"class.std::allocator"* dereferenceable(1) %0) - %1 = load i64, i64* %__n.addr, align 8 - invoke void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE17_M_create_storageEm(%"struct.std::_Vector_base"* %this1, i64 %1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - ret void - -lpad: ; preds = %entry - %2 = landingpad { i8*, i32 } - cleanup - %3 = extractvalue { i8*, i32 } %2, 0 - store i8* %3, i8** %exn.slot, align 8 - %4 = extractvalue { i8*, i32 } %2, 1 - store i32 %4, i32* %ehselector.slot, align 4 - call void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implD2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl) #3 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt22__uninitialized_copy_aIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_S3_ET0_T_SC_SB_RSaIT1_E(%class.INode** %__first.coerce, %class.INode** %__last.coerce, %class.INode** %__result, %"class.std::allocator"* dereferenceable(1) %0) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__last = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__result.addr = alloca %class.INode**, align 8 - %.addr = alloca %"class.std::allocator"*, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %agg.tmp2 = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__last, i32 0, i32 0 - store %class.INode** %__last.coerce, %class.INode*** %coerce.dive1, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - store %"class.std::allocator"* %0, %"class.std::allocator"** %.addr, align 8 - %1 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp to i8* - %2 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %1, i8* align 8 %2, i64 8, i1 false) - %3 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp2 to i8* - %4 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 8, i1 false) - %5 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %coerce.dive3 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp, i32 0, i32 0 - %6 = load %class.INode**, %class.INode*** %coerce.dive3, align 8 - %coerce.dive4 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp2, i32 0, i32 0 - %7 = load %class.INode**, %class.INode*** %coerce.dive4, align 8 - %call = call %class.INode** @_ZSt18uninitialized_copyIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_ET0_T_SC_SB_(%class.INode** %6, %class.INode** %7, %class.INode** %5) - ret %class.INode** %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZNKSt6vectorIP5INodeSaIS1_EE5beginEv(%"class.std::vector"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - call void @_ZN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS2_SaIS2_EEEC2ERKS4_(%"class.__gnu_cxx::__normal_iterator.10"* %retval, %class.INode*** dereferenceable(8) %_M_start) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %retval, i32 0, i32 0 - %1 = load %class.INode**, %class.INode*** %coerce.dive, align 8 - ret %class.INode** %1 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZNKSt6vectorIP5INodeSaIS1_EE3endEv(%"class.std::vector"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - call void @_ZN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS2_SaIS2_EEEC2ERKS4_(%"class.__gnu_cxx::__normal_iterator.10"* %retval, %class.INode*** dereferenceable(8) %_M_finish) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %retval, i32 0, i32 0 - %1 = load %class.INode**, %class.INode*** %coerce.dive, align 8 - ret %class.INode** %1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE12_Vector_implC2ERKS2_(%"struct.std::_Vector_base >::_Vector_impl"* %this, %"class.std::allocator"* dereferenceable(1) %__a) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base >::_Vector_impl"*, align 8 - %__a.addr = alloca %"class.std::allocator"*, align 8 - store %"struct.std::_Vector_base >::_Vector_impl"* %this, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - %this1 = load %"struct.std::_Vector_base >::_Vector_impl"*, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %this1 to %"class.std::allocator"* - %1 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - call void @_ZNSaIP5INodeEC2ERKS1_(%"class.std::allocator"* %0, %"class.std::allocator"* dereferenceable(1) %1) #3 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 0 - store %class.INode** null, %class.INode*** %_M_start, align 8 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 1 - store %class.INode** null, %class.INode*** %_M_finish, align 8 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 2 - store %class.INode** null, %class.INode*** %_M_end_of_storage, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE17_M_create_storageEm(%"struct.std::_Vector_base"* %this, i64 %__n) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - %__n.addr = alloca i64, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %0 = load i64, i64* %__n.addr, align 8 - %call = call %class.INode** @_ZNSt12_Vector_baseIP5INodeSaIS1_EE11_M_allocateEm(%"struct.std::_Vector_base"* %this1, i64 %0) - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - store %class.INode** %call, %class.INode*** %_M_start, align 8 - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_start3 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 0 - %1 = load %class.INode**, %class.INode*** %_M_start3, align 8 - %_M_impl4 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl4, i32 0, i32 1 - store %class.INode** %1, %class.INode*** %_M_finish, align 8 - %_M_impl5 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_start6 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl5, i32 0, i32 0 - %2 = load %class.INode**, %class.INode*** %_M_start6, align 8 - %3 = load i64, i64* %__n.addr, align 8 - %add.ptr = getelementptr inbounds %class.INode*, %class.INode** %2, i64 %3 - %_M_impl7 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl7, i32 0, i32 2 - store %class.INode** %add.ptr, %class.INode*** %_M_end_of_storage, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaIP5INodeEC2ERKS1_(%"class.std::allocator"* %this, %"class.std::allocator"* dereferenceable(1) %__a) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator"*, align 8 - %__a.addr = alloca %"class.std::allocator"*, align 8 - store %"class.std::allocator"* %this, %"class.std::allocator"** %this.addr, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - %this1 = load %"class.std::allocator"*, %"class.std::allocator"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator"* %this1 to %"class.__gnu_cxx::new_allocator"* - %1 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %2 = bitcast %"class.std::allocator"* %1 to %"class.__gnu_cxx::new_allocator"* - call void @_ZN9__gnu_cxx13new_allocatorIP5INodeEC2ERKS3_(%"class.__gnu_cxx::new_allocator"* %0, %"class.__gnu_cxx::new_allocator"* dereferenceable(1) %2) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorIP5INodeEC2ERKS3_(%"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"* dereferenceable(1) %0) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - %.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - store %"class.__gnu_cxx::new_allocator"* %0, %"class.__gnu_cxx::new_allocator"** %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZNSt12_Vector_baseIP5INodeSaIS1_EE11_M_allocateEm(%"struct.std::_Vector_base"* %this, i64 %__n) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - %__n.addr = alloca i64, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %0 = load i64, i64* %__n.addr, align 8 - %cmp = icmp ne i64 %0, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %1 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator"* - %2 = load i64, i64* %__n.addr, align 8 - %call = call %class.INode** @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE8allocateERS3_m(%"class.std::allocator"* dereferenceable(1) %1, i64 %2) - br label %cond.end - -cond.false: ; preds = %entry - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi %class.INode** [ %call, %cond.true ], [ null, %cond.false ] - ret %class.INode** %cond -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE8allocateERS3_m(%"class.std::allocator"* dereferenceable(1) %__a, i64 %__n) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator"* %0 to %"class.__gnu_cxx::new_allocator"* - %2 = load i64, i64* %__n.addr, align 8 - %call = call %class.INode** @_ZN9__gnu_cxx13new_allocatorIP5INodeE8allocateEmPKv(%"class.__gnu_cxx::new_allocator"* %1, i64 %2, i8* null) - ret %class.INode** %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZN9__gnu_cxx13new_allocatorIP5INodeE8allocateEmPKv(%"class.__gnu_cxx::new_allocator"* %this, i64 %__n, i8* %0) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - %__n.addr = alloca i64, align 8 - %.addr = alloca i8*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - store i8* %0, i8** %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %1 = load i64, i64* %__n.addr, align 8 - %call = call i64 @_ZNK9__gnu_cxx13new_allocatorIP5INodeE8max_sizeEv(%"class.__gnu_cxx::new_allocator"* %this1) #3 - %cmp = icmp ugt i64 %1, %call - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - call void @_ZSt17__throw_bad_allocv() #19 - unreachable - -if.end: ; preds = %entry - %2 = load i64, i64* %__n.addr, align 8 - %mul = mul i64 %2, 8 - %call2 = call i8* @_Znwm(i64 %mul) - %3 = bitcast i8* %call2 to %class.INode** - ret %class.INode** %3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNK9__gnu_cxx13new_allocatorIP5INodeE8max_sizeEv(%"class.__gnu_cxx::new_allocator"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - ret i64 2305843009213693951 -} - -; Function Attrs: noreturn -declare dso_local void @_ZSt17__throw_bad_allocv() #15 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt18uninitialized_copyIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_ET0_T_SC_SB_(%class.INode** %__first.coerce, %class.INode** %__last.coerce, %class.INode** %__result) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__last = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__result.addr = alloca %class.INode**, align 8 - %__assignable = alloca i8, align 1 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %agg.tmp2 = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__last, i32 0, i32 0 - store %class.INode** %__last.coerce, %class.INode*** %coerce.dive1, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - store i8 1, i8* %__assignable, align 1 - %0 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp to i8* - %1 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) - %2 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp2 to i8* - %3 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %3, i64 8, i1 false) - %4 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %coerce.dive3 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp, i32 0, i32 0 - %5 = load %class.INode**, %class.INode*** %coerce.dive3, align 8 - %coerce.dive4 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp2, i32 0, i32 0 - %6 = load %class.INode**, %class.INode*** %coerce.dive4, align 8 - %call = call %class.INode** @_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS5_SaIS5_EEEEPS5_EET0_T_SE_SD_(%class.INode** %5, %class.INode** %6, %class.INode** %4) - ret %class.INode** %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS5_SaIS5_EEEEPS5_EET0_T_SE_SD_(%class.INode** %__first.coerce, %class.INode** %__last.coerce, %class.INode** %__result) #0 comdat align 2 { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__last = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__result.addr = alloca %class.INode**, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %agg.tmp2 = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__last, i32 0, i32 0 - store %class.INode** %__last.coerce, %class.INode*** %coerce.dive1, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - %0 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp to i8* - %1 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) - %2 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp2 to i8* - %3 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %3, i64 8, i1 false) - %4 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %coerce.dive3 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp, i32 0, i32 0 - %5 = load %class.INode**, %class.INode*** %coerce.dive3, align 8 - %coerce.dive4 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp2, i32 0, i32 0 - %6 = load %class.INode**, %class.INode*** %coerce.dive4, align 8 - %call = call %class.INode** @_ZSt4copyIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_ET0_T_SC_SB_(%class.INode** %5, %class.INode** %6, %class.INode** %4) - ret %class.INode** %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt4copyIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_ET0_T_SC_SB_(%class.INode** %__first.coerce, %class.INode** %__last.coerce, %class.INode** %__result) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__last = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__result.addr = alloca %class.INode**, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %agg.tmp2 = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %agg.tmp5 = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %agg.tmp6 = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__last, i32 0, i32 0 - store %class.INode** %__last.coerce, %class.INode*** %coerce.dive1, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - %0 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp2 to i8* - %1 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) - %coerce.dive3 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp2, i32 0, i32 0 - %2 = load %class.INode**, %class.INode*** %coerce.dive3, align 8 - %call = call %class.INode** @_ZSt12__miter_baseIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEET_SA_(%class.INode** %2) - %coerce.dive4 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp, i32 0, i32 0 - store %class.INode** %call, %class.INode*** %coerce.dive4, align 8 - %3 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp6 to i8* - %4 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 8, i1 false) - %coerce.dive7 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp6, i32 0, i32 0 - %5 = load %class.INode**, %class.INode*** %coerce.dive7, align 8 - %call8 = call %class.INode** @_ZSt12__miter_baseIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEET_SA_(%class.INode** %5) - %coerce.dive9 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp5, i32 0, i32 0 - store %class.INode** %call8, %class.INode*** %coerce.dive9, align 8 - %6 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %coerce.dive10 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp, i32 0, i32 0 - %7 = load %class.INode**, %class.INode*** %coerce.dive10, align 8 - %coerce.dive11 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp5, i32 0, i32 0 - %8 = load %class.INode**, %class.INode*** %coerce.dive11, align 8 - %call12 = call %class.INode** @_ZSt14__copy_move_a2ILb0EN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_ET1_T0_SC_SB_(%class.INode** %7, %class.INode** %8, %class.INode** %6) - ret %class.INode** %call12 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt14__copy_move_a2ILb0EN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEPS3_ET1_T0_SC_SB_(%class.INode** %__first.coerce, %class.INode** %__last.coerce, %class.INode** %__result) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__last = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__result.addr = alloca %class.INode**, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %agg.tmp3 = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__last, i32 0, i32 0 - store %class.INode** %__last.coerce, %class.INode*** %coerce.dive1, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - %0 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp to i8* - %1 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) - %coerce.dive2 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp, i32 0, i32 0 - %2 = load %class.INode**, %class.INode*** %coerce.dive2, align 8 - %call = call %class.INode** @_ZSt12__niter_baseIPKP5INodeSt6vectorIS1_SaIS1_EEET_N9__gnu_cxx17__normal_iteratorIS7_T0_EE(%class.INode** %2) - %3 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp3 to i8* - %4 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 8, i1 false) - %coerce.dive4 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %agg.tmp3, i32 0, i32 0 - %5 = load %class.INode**, %class.INode*** %coerce.dive4, align 8 - %call5 = call %class.INode** @_ZSt12__niter_baseIPKP5INodeSt6vectorIS1_SaIS1_EEET_N9__gnu_cxx17__normal_iteratorIS7_T0_EE(%class.INode** %5) - %6 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %call6 = call %class.INode** @_ZSt12__niter_baseIPP5INodeET_S3_(%class.INode** %6) - %call7 = call %class.INode** @_ZSt13__copy_move_aILb0EPKP5INodePS1_ET1_T0_S6_S5_(%class.INode** %call, %class.INode** %call5, %class.INode** %call6) - ret %class.INode** %call7 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt12__miter_baseIN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS3_SaIS3_EEEEET_SA_(%class.INode** %__it.coerce) #6 comdat { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %__it = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__it, i32 0, i32 0 - store %class.INode** %__it.coerce, %class.INode*** %coerce.dive, align 8 - %0 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %retval to i8* - %1 = bitcast %"class.__gnu_cxx::__normal_iterator.10"* %__it to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %retval, i32 0, i32 0 - %2 = load %class.INode**, %class.INode*** %coerce.dive1, align 8 - ret %class.INode** %2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt13__copy_move_aILb0EPKP5INodePS1_ET1_T0_S6_S5_(%class.INode** %__first, %class.INode** %__last, %class.INode** %__result) #0 comdat { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - %__result.addr = alloca %class.INode**, align 8 - %__simple = alloca i8, align 1 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - store i8 1, i8* %__simple, align 1 - %0 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %1 = load %class.INode**, %class.INode*** %__last.addr, align 8 - %2 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %call = call %class.INode** @_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mIP5INodeEEPT_PKS5_S8_S6_(%class.INode** %0, %class.INode** %1, %class.INode** %2) - ret %class.INode** %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt12__niter_baseIPKP5INodeSt6vectorIS1_SaIS1_EEET_N9__gnu_cxx17__normal_iteratorIS7_T0_EE(%class.INode** %__it.coerce) #0 comdat { -entry: - %__it = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %__it, i32 0, i32 0 - store %class.INode** %__it.coerce, %class.INode*** %coerce.dive, align 8 - %call = call dereferenceable(8) %class.INode*** @_ZNK9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS2_SaIS2_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator.10"* %__it) - %0 = load %class.INode**, %class.INode*** %call, align 8 - ret %class.INode** %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt12__niter_baseIPP5INodeET_S3_(%class.INode** %__it) #6 comdat { -entry: - %__it.addr = alloca %class.INode**, align 8 - store %class.INode** %__it, %class.INode*** %__it.addr, align 8 - %0 = load %class.INode**, %class.INode*** %__it.addr, align 8 - ret %class.INode** %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mIP5INodeEEPT_PKS5_S8_S6_(%class.INode** %__first, %class.INode** %__last, %class.INode** %__result) #6 comdat align 2 { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - %__result.addr = alloca %class.INode**, align 8 - %_Num = alloca i64, align 8 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - %0 = load %class.INode**, %class.INode*** %__last.addr, align 8 - %1 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %sub.ptr.lhs.cast = ptrtoint %class.INode** %0 to i64 - %sub.ptr.rhs.cast = ptrtoint %class.INode** %1 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - store i64 %sub.ptr.div, i64* %_Num, align 8 - %2 = load i64, i64* %_Num, align 8 - %tobool = icmp ne i64 %2, 0 - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - %3 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %4 = bitcast %class.INode** %3 to i8* - %5 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %6 = bitcast %class.INode** %5 to i8* - %7 = load i64, i64* %_Num, align 8 - %mul = mul i64 8, %7 - call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 %4, i8* align 8 %6, i64 %mul, i1 false) - br label %if.end - -if.end: ; preds = %if.then, %entry - %8 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %9 = load i64, i64* %_Num, align 8 - %add.ptr = getelementptr inbounds %class.INode*, %class.INode** %8, i64 %9 - ret %class.INode** %add.ptr -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) #4 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %class.INode*** @_ZNK9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS2_SaIS2_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator.10"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator.10"*, align 8 - store %"class.__gnu_cxx::__normal_iterator.10"* %this, %"class.__gnu_cxx::__normal_iterator.10"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator.10"*, %"class.__gnu_cxx::__normal_iterator.10"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %this1, i32 0, i32 0 - ret %class.INode*** %_M_current -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS2_SaIS2_EEEC2ERKS4_(%"class.__gnu_cxx::__normal_iterator.10"* %this, %class.INode*** dereferenceable(8) %__i) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator.10"*, align 8 - %__i.addr = alloca %class.INode***, align 8 - store %"class.__gnu_cxx::__normal_iterator.10"* %this, %"class.__gnu_cxx::__normal_iterator.10"** %this.addr, align 8 - store %class.INode*** %__i, %class.INode**** %__i.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator.10"*, %"class.__gnu_cxx::__normal_iterator.10"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %this1, i32 0, i32 0 - %0 = load %class.INode***, %class.INode**** %__i.addr, align 8 - %1 = load %class.INode**, %class.INode*** %0, align 8 - store %class.INode** %1, %class.INode*** %_M_current, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx5__ops15_Iter_comp_iterI7NodeCmpEC2ES2_(%"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %this) unnamed_addr #6 comdat align 2 { -entry: - %__comp = alloca %struct.NodeCmp, align 1 - %this.addr = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_iter"*, align 8 - store %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %this, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"** %this.addr, align 8 - %this1 = load %"struct.__gnu_cxx::__ops::_Iter_comp_iter"*, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"** %this.addr, align 8 - %_M_comp = getelementptr inbounds %"struct.__gnu_cxx::__ops::_Iter_comp_iter", %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %this1, i32 0, i32 0 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt11__make_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEENS0_5__ops15_Iter_comp_iterI7NodeCmpEEEvT_SD_RT0_(%class.INode** %__first.coerce, %class.INode** %__last.coerce, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* dereferenceable(1) %__comp) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__last = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__comp.addr = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_iter"*, align 8 - %__len = alloca i64, align 8 - %__parent = alloca i64, align 8 - %__value = alloca %class.INode*, align 8 - %ref.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp6 = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_iter", align 1 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__last, i32 0, i32 0 - store %class.INode** %__last.coerce, %class.INode*** %coerce.dive1, align 8 - store %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %__comp, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"** %__comp.addr, align 8 - %call = call i64 @_ZN9__gnu_cxxmiIPP5INodeSt6vectorIS2_SaIS2_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKSA_SD_(%"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__last, %"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__first) - %cmp = icmp slt i64 %call, 2 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - br label %return - -if.end: ; preds = %entry - %call2 = call i64 @_ZN9__gnu_cxxmiIPP5INodeSt6vectorIS2_SaIS2_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKSA_SD_(%"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__last, %"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__first) - store i64 %call2, i64* %__len, align 8 - %0 = load i64, i64* %__len, align 8 - %sub = sub nsw i64 %0, 2 - %div = sdiv i64 %sub, 2 - store i64 %div, i64* %__parent, align 8 - br label %while.body - -while.body: ; preds = %if.end, %if.end10 - %1 = load i64, i64* %__parent, align 8 - %call3 = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %1) - %coerce.dive4 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp, i32 0, i32 0 - store %class.INode** %call3, %class.INode*** %coerce.dive4, align 8 - %call5 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %ref.tmp) - %2 = load %class.INode*, %class.INode** %call5, align 8 - store %class.INode* %2, %class.INode** %__value, align 8 - %3 = bitcast %"class.__gnu_cxx::__normal_iterator"* %agg.tmp to i8* - %4 = bitcast %"class.__gnu_cxx::__normal_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 8, i1 false) - %5 = load i64, i64* %__parent, align 8 - %6 = load i64, i64* %__len, align 8 - %7 = load %class.INode*, %class.INode** %__value, align 8 - %8 = load %"struct.__gnu_cxx::__ops::_Iter_comp_iter"*, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"** %__comp.addr, align 8 - %9 = bitcast %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %agg.tmp6 to i8* - %10 = bitcast %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %8 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %9, i8* align 1 %10, i64 1, i1 false) - %coerce.dive7 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - %11 = load %class.INode**, %class.INode*** %coerce.dive7, align 8 - call void @_ZSt13__adjust_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEElS3_NS0_5__ops15_Iter_comp_iterI7NodeCmpEEEvT_T0_SE_T1_T2_(%class.INode** %11, i64 %5, i64 %6, %class.INode* %7) - %12 = load i64, i64* %__parent, align 8 - %cmp8 = icmp eq i64 %12, 0 - br i1 %cmp8, label %if.then9, label %if.end10 - -if.then9: ; preds = %while.body - br label %return - -if.end10: ; preds = %while.body - %13 = load i64, i64* %__parent, align 8 - %dec = add nsw i64 %13, -1 - store i64 %dec, i64* %__parent, align 8 - br label %while.body - -return: ; preds = %if.then9, %if.then - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZN9__gnu_cxxmiIPP5INodeSt6vectorIS2_SaIS2_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKSA_SD_(%"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__lhs, %"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__rhs) #0 comdat { -entry: - %__lhs.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - %__rhs.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %__lhs, %"class.__gnu_cxx::__normal_iterator"** %__lhs.addr, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %__rhs, %"class.__gnu_cxx::__normal_iterator"** %__rhs.addr, align 8 - %0 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %__lhs.addr, align 8 - %call = call dereferenceable(8) %class.INode*** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator"* %0) - %1 = load %class.INode**, %class.INode*** %call, align 8 - %2 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %__rhs.addr, align 8 - %call1 = call dereferenceable(8) %class.INode*** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator"* %2) - %3 = load %class.INode**, %class.INode*** %call1, align 8 - %sub.ptr.lhs.cast = ptrtoint %class.INode** %1 to i64 - %sub.ptr.rhs.cast = ptrtoint %class.INode** %3 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - ret i64 %sub.ptr.div -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %this, i64 %__n) #0 comdat align 2 { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - %__n.addr = alloca i64, align 8 - %ref.tmp = alloca %class.INode**, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %this, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %this1, i32 0, i32 0 - %0 = load %class.INode**, %class.INode*** %_M_current, align 8 - %1 = load i64, i64* %__n.addr, align 8 - %add.ptr = getelementptr inbounds %class.INode*, %class.INode** %0, i64 %1 - store %class.INode** %add.ptr, %class.INode*** %ref.tmp, align 8 - call void @_ZN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEC2ERKS3_(%"class.__gnu_cxx::__normal_iterator"* %retval, %class.INode*** dereferenceable(8) %ref.tmp) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %retval, i32 0, i32 0 - %2 = load %class.INode**, %class.INode*** %coerce.dive, align 8 - ret %class.INode** %2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %this, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %this1, i32 0, i32 0 - %0 = load %class.INode**, %class.INode*** %_M_current, align 8 - ret %class.INode** %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt13__adjust_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEElS3_NS0_5__ops15_Iter_comp_iterI7NodeCmpEEEvT_T0_SE_T1_T2_(%class.INode** %__first.coerce, i64 %__holeIndex, i64 %__len, %class.INode* %__value) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__comp = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_iter", align 1 - %__holeIndex.addr = alloca i64, align 8 - %__len.addr = alloca i64, align 8 - %__value.addr = alloca %class.INode*, align 8 - %__topIndex = alloca i64, align 8 - %__secondChild = alloca i64, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp2 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %ref.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %ref.tmp12 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %ref.tmp23 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %ref.tmp28 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__cmp = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_val", align 1 - %agg.tmp34 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - store i64 %__holeIndex, i64* %__holeIndex.addr, align 8 - store i64 %__len, i64* %__len.addr, align 8 - store %class.INode* %__value, %class.INode** %__value.addr, align 8 - %0 = load i64, i64* %__holeIndex.addr, align 8 - store i64 %0, i64* %__topIndex, align 8 - %1 = load i64, i64* %__holeIndex.addr, align 8 - store i64 %1, i64* %__secondChild, align 8 - br label %while.cond - -while.cond: ; preds = %if.end, %entry - %2 = load i64, i64* %__secondChild, align 8 - %3 = load i64, i64* %__len.addr, align 8 - %sub = sub nsw i64 %3, 1 - %div = sdiv i64 %sub, 2 - %cmp = icmp slt i64 %2, %div - br i1 %cmp, label %while.body, label %while.end - -while.body: ; preds = %while.cond - %4 = load i64, i64* %__secondChild, align 8 - %add = add nsw i64 %4, 1 - %mul = mul nsw i64 2, %add - store i64 %mul, i64* %__secondChild, align 8 - %5 = load i64, i64* %__secondChild, align 8 - %call = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %5) - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - store %class.INode** %call, %class.INode*** %coerce.dive1, align 8 - %6 = load i64, i64* %__secondChild, align 8 - %sub3 = sub nsw i64 %6, 1 - %call4 = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %sub3) - %coerce.dive5 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp2, i32 0, i32 0 - store %class.INode** %call4, %class.INode*** %coerce.dive5, align 8 - %coerce.dive6 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - %7 = load %class.INode**, %class.INode*** %coerce.dive6, align 8 - %coerce.dive7 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp2, i32 0, i32 0 - %8 = load %class.INode**, %class.INode*** %coerce.dive7, align 8 - %call8 = call zeroext i1 @_ZN9__gnu_cxx5__ops15_Iter_comp_iterI7NodeCmpEclINS_17__normal_iteratorIPP5INodeSt6vectorIS7_SaIS7_EEEESC_EEbT_T0_(%"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %__comp, %class.INode** %7, %class.INode** %8) - br i1 %call8, label %if.then, label %if.end - -if.then: ; preds = %while.body - %9 = load i64, i64* %__secondChild, align 8 - %dec = add nsw i64 %9, -1 - store i64 %dec, i64* %__secondChild, align 8 - br label %if.end - -if.end: ; preds = %if.then, %while.body - %10 = load i64, i64* %__secondChild, align 8 - %call9 = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %10) - %coerce.dive10 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp, i32 0, i32 0 - store %class.INode** %call9, %class.INode*** %coerce.dive10, align 8 - %call11 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %ref.tmp) - %11 = load %class.INode*, %class.INode** %call11, align 8 - %12 = load i64, i64* %__holeIndex.addr, align 8 - %call13 = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %12) - %coerce.dive14 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp12, i32 0, i32 0 - store %class.INode** %call13, %class.INode*** %coerce.dive14, align 8 - %call15 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %ref.tmp12) - store %class.INode* %11, %class.INode** %call15, align 8 - %13 = load i64, i64* %__secondChild, align 8 - store i64 %13, i64* %__holeIndex.addr, align 8 - br label %while.cond - -while.end: ; preds = %while.cond - %14 = load i64, i64* %__len.addr, align 8 - %and = and i64 %14, 1 - %cmp16 = icmp eq i64 %and, 0 - br i1 %cmp16, label %land.lhs.true, label %if.end33 - -land.lhs.true: ; preds = %while.end - %15 = load i64, i64* %__secondChild, align 8 - %16 = load i64, i64* %__len.addr, align 8 - %sub17 = sub nsw i64 %16, 2 - %div18 = sdiv i64 %sub17, 2 - %cmp19 = icmp eq i64 %15, %div18 - br i1 %cmp19, label %if.then20, label %if.end33 - -if.then20: ; preds = %land.lhs.true - %17 = load i64, i64* %__secondChild, align 8 - %add21 = add nsw i64 %17, 1 - %mul22 = mul nsw i64 2, %add21 - store i64 %mul22, i64* %__secondChild, align 8 - %18 = load i64, i64* %__secondChild, align 8 - %sub24 = sub nsw i64 %18, 1 - %call25 = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %sub24) - %coerce.dive26 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp23, i32 0, i32 0 - store %class.INode** %call25, %class.INode*** %coerce.dive26, align 8 - %call27 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %ref.tmp23) - %19 = load %class.INode*, %class.INode** %call27, align 8 - %20 = load i64, i64* %__holeIndex.addr, align 8 - %call29 = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %20) - %coerce.dive30 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp28, i32 0, i32 0 - store %class.INode** %call29, %class.INode*** %coerce.dive30, align 8 - %call31 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %ref.tmp28) - store %class.INode* %19, %class.INode** %call31, align 8 - %21 = load i64, i64* %__secondChild, align 8 - %sub32 = sub nsw i64 %21, 1 - store i64 %sub32, i64* %__holeIndex.addr, align 8 - br label %if.end33 - -if.end33: ; preds = %if.then20, %land.lhs.true, %while.end - call void @_ZN9__gnu_cxx5__ops14_Iter_comp_valI7NodeCmpEC2ERKNS0_15_Iter_comp_iterIS2_EE(%"struct.__gnu_cxx::__ops::_Iter_comp_val"* %__cmp, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* dereferenceable(1) %__comp) - %22 = bitcast %"class.__gnu_cxx::__normal_iterator"* %agg.tmp34 to i8* - %23 = bitcast %"class.__gnu_cxx::__normal_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %22, i8* align 8 %23, i64 8, i1 false) - %24 = load i64, i64* %__holeIndex.addr, align 8 - %25 = load i64, i64* %__topIndex, align 8 - %26 = load %class.INode*, %class.INode** %__value.addr, align 8 - %coerce.dive35 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp34, i32 0, i32 0 - %27 = load %class.INode**, %class.INode*** %coerce.dive35, align 8 - call void @_ZSt11__push_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEElS3_NS0_5__ops14_Iter_comp_valI7NodeCmpEEEvT_T0_SE_T1_RT2_(%class.INode** %27, i64 %24, i64 %25, %class.INode* %26, %"struct.__gnu_cxx::__ops::_Iter_comp_val"* dereferenceable(1) %__cmp) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %class.INode*** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %this, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %this1, i32 0, i32 0 - ret %class.INode*** %_M_current -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEC2ERKS3_(%"class.__gnu_cxx::__normal_iterator"* %this, %class.INode*** dereferenceable(8) %__i) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - %__i.addr = alloca %class.INode***, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %this, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - store %class.INode*** %__i, %class.INode**** %__i.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %this1, i32 0, i32 0 - %0 = load %class.INode***, %class.INode**** %__i.addr, align 8 - %1 = load %class.INode**, %class.INode*** %0, align 8 - store %class.INode** %1, %class.INode*** %_M_current, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local zeroext i1 @_ZN9__gnu_cxx5__ops15_Iter_comp_iterI7NodeCmpEclINS_17__normal_iteratorIPP5INodeSt6vectorIS7_SaIS7_EEEESC_EEbT_T0_(%"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %this, %class.INode** %__it1.coerce, %class.INode** %__it2.coerce) #0 comdat align 2 { -entry: - %__it1 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__it2 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %this.addr = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_iter"*, align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__it1, i32 0, i32 0 - store %class.INode** %__it1.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__it2, i32 0, i32 0 - store %class.INode** %__it2.coerce, %class.INode*** %coerce.dive1, align 8 - store %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %this, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"** %this.addr, align 8 - %this2 = load %"struct.__gnu_cxx::__ops::_Iter_comp_iter"*, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"** %this.addr, align 8 - %_M_comp = getelementptr inbounds %"struct.__gnu_cxx::__ops::_Iter_comp_iter", %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %this2, i32 0, i32 0 - %call = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %__it1) - %0 = load %class.INode*, %class.INode** %call, align 8 - %call3 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %__it2) - %1 = load %class.INode*, %class.INode** %call3, align 8 - %call4 = call zeroext i1 @_ZNK7NodeCmpclEPK5INodeS2_(%struct.NodeCmp* %_M_comp, %class.INode* %0, %class.INode* %1) - ret i1 %call4 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx5__ops14_Iter_comp_valI7NodeCmpEC2ERKNS0_15_Iter_comp_iterIS2_EE(%"struct.__gnu_cxx::__ops::_Iter_comp_val"* %this, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* dereferenceable(1) %__comp) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_val"*, align 8 - %__comp.addr = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_iter"*, align 8 - store %"struct.__gnu_cxx::__ops::_Iter_comp_val"* %this, %"struct.__gnu_cxx::__ops::_Iter_comp_val"** %this.addr, align 8 - store %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %__comp, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"** %__comp.addr, align 8 - %this1 = load %"struct.__gnu_cxx::__ops::_Iter_comp_val"*, %"struct.__gnu_cxx::__ops::_Iter_comp_val"** %this.addr, align 8 - %_M_comp = getelementptr inbounds %"struct.__gnu_cxx::__ops::_Iter_comp_val", %"struct.__gnu_cxx::__ops::_Iter_comp_val"* %this1, i32 0, i32 0 - %0 = load %"struct.__gnu_cxx::__ops::_Iter_comp_iter"*, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"** %__comp.addr, align 8 - %_M_comp2 = getelementptr inbounds %"struct.__gnu_cxx::__ops::_Iter_comp_iter", %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %0, i32 0, i32 0 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt11__push_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEElS3_NS0_5__ops14_Iter_comp_valI7NodeCmpEEEvT_T0_SE_T1_RT2_(%class.INode** %__first.coerce, i64 %__holeIndex, i64 %__topIndex, %class.INode* %__value, %"struct.__gnu_cxx::__ops::_Iter_comp_val"* dereferenceable(1) %__comp) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__holeIndex.addr = alloca i64, align 8 - %__topIndex.addr = alloca i64, align 8 - %__value.addr = alloca %class.INode*, align 8 - %__comp.addr = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_val"*, align 8 - %__parent = alloca i64, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %ref.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %ref.tmp7 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %ref.tmp13 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - store i64 %__holeIndex, i64* %__holeIndex.addr, align 8 - store i64 %__topIndex, i64* %__topIndex.addr, align 8 - store %class.INode* %__value, %class.INode** %__value.addr, align 8 - store %"struct.__gnu_cxx::__ops::_Iter_comp_val"* %__comp, %"struct.__gnu_cxx::__ops::_Iter_comp_val"** %__comp.addr, align 8 - %0 = load i64, i64* %__holeIndex.addr, align 8 - %sub = sub nsw i64 %0, 1 - %div = sdiv i64 %sub, 2 - store i64 %div, i64* %__parent, align 8 - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %1 = load i64, i64* %__holeIndex.addr, align 8 - %2 = load i64, i64* %__topIndex.addr, align 8 - %cmp = icmp sgt i64 %1, %2 - br i1 %cmp, label %land.rhs, label %land.end - -land.rhs: ; preds = %while.cond - %3 = load %"struct.__gnu_cxx::__ops::_Iter_comp_val"*, %"struct.__gnu_cxx::__ops::_Iter_comp_val"** %__comp.addr, align 8 - %4 = load i64, i64* %__parent, align 8 - %call = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %4) - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - store %class.INode** %call, %class.INode*** %coerce.dive1, align 8 - %coerce.dive2 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - %5 = load %class.INode**, %class.INode*** %coerce.dive2, align 8 - %call3 = call zeroext i1 @_ZN9__gnu_cxx5__ops14_Iter_comp_valI7NodeCmpEclINS_17__normal_iteratorIPP5INodeSt6vectorIS7_SaIS7_EEEES7_EEbT_RT0_(%"struct.__gnu_cxx::__ops::_Iter_comp_val"* %3, %class.INode** %5, %class.INode** dereferenceable(8) %__value.addr) - br label %land.end - -land.end: ; preds = %land.rhs, %while.cond - %6 = phi i1 [ false, %while.cond ], [ %call3, %land.rhs ] - br i1 %6, label %while.body, label %while.end - -while.body: ; preds = %land.end - %7 = load i64, i64* %__parent, align 8 - %call4 = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %7) - %coerce.dive5 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp, i32 0, i32 0 - store %class.INode** %call4, %class.INode*** %coerce.dive5, align 8 - %call6 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %ref.tmp) - %8 = load %class.INode*, %class.INode** %call6, align 8 - %9 = load i64, i64* %__holeIndex.addr, align 8 - %call8 = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %9) - %coerce.dive9 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp7, i32 0, i32 0 - store %class.INode** %call8, %class.INode*** %coerce.dive9, align 8 - %call10 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %ref.tmp7) - store %class.INode* %8, %class.INode** %call10, align 8 - %10 = load i64, i64* %__parent, align 8 - store i64 %10, i64* %__holeIndex.addr, align 8 - %11 = load i64, i64* %__holeIndex.addr, align 8 - %sub11 = sub nsw i64 %11, 1 - %div12 = sdiv i64 %sub11, 2 - store i64 %div12, i64* %__parent, align 8 - br label %while.cond - -while.end: ; preds = %land.end - %12 = load %class.INode*, %class.INode** %__value.addr, align 8 - %13 = load i64, i64* %__holeIndex.addr, align 8 - %call14 = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEplEl(%"class.__gnu_cxx::__normal_iterator"* %__first, i64 %13) - %coerce.dive15 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp13, i32 0, i32 0 - store %class.INode** %call14, %class.INode*** %coerce.dive15, align 8 - %call16 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %ref.tmp13) - store %class.INode* %12, %class.INode** %call16, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local zeroext i1 @_ZNK7NodeCmpclEPK5INodeS2_(%struct.NodeCmp* %this, %class.INode* %lhs, %class.INode* %rhs) #6 comdat align 2 { -entry: - %this.addr = alloca %struct.NodeCmp*, align 8 - %lhs.addr = alloca %class.INode*, align 8 - %rhs.addr = alloca %class.INode*, align 8 - store %struct.NodeCmp* %this, %struct.NodeCmp** %this.addr, align 8 - store %class.INode* %lhs, %class.INode** %lhs.addr, align 8 - store %class.INode* %rhs, %class.INode** %rhs.addr, align 8 - %this1 = load %struct.NodeCmp*, %struct.NodeCmp** %this.addr, align 8 - %0 = load %class.INode*, %class.INode** %lhs.addr, align 8 - %f = getelementptr inbounds %class.INode, %class.INode* %0, i32 0, i32 1 - %1 = load i32, i32* %f, align 8 - %2 = load %class.INode*, %class.INode** %rhs.addr, align 8 - %f2 = getelementptr inbounds %class.INode, %class.INode* %2, i32 0, i32 1 - %3 = load i32, i32* %f2, align 8 - %cmp = icmp sgt i32 %1, %3 - ret i1 %cmp -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local zeroext i1 @_ZN9__gnu_cxx5__ops14_Iter_comp_valI7NodeCmpEclINS_17__normal_iteratorIPP5INodeSt6vectorIS7_SaIS7_EEEES7_EEbT_RT0_(%"struct.__gnu_cxx::__ops::_Iter_comp_val"* %this, %class.INode** %__it.coerce, %class.INode** dereferenceable(8) %__val) #6 comdat align 2 { -entry: - %__it = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %this.addr = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_val"*, align 8 - %__val.addr = alloca %class.INode**, align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__it, i32 0, i32 0 - store %class.INode** %__it.coerce, %class.INode*** %coerce.dive, align 8 - store %"struct.__gnu_cxx::__ops::_Iter_comp_val"* %this, %"struct.__gnu_cxx::__ops::_Iter_comp_val"** %this.addr, align 8 - store %class.INode** %__val, %class.INode*** %__val.addr, align 8 - %this1 = load %"struct.__gnu_cxx::__ops::_Iter_comp_val"*, %"struct.__gnu_cxx::__ops::_Iter_comp_val"** %this.addr, align 8 - %_M_comp = getelementptr inbounds %"struct.__gnu_cxx::__ops::_Iter_comp_val", %"struct.__gnu_cxx::__ops::_Iter_comp_val"* %this1, i32 0, i32 0 - %call = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %__it) - %0 = load %class.INode*, %class.INode** %call, align 8 - %1 = load %class.INode**, %class.INode*** %__val.addr, align 8 - %2 = load %class.INode*, %class.INode** %1, align 8 - %call2 = call zeroext i1 @_ZNK7NodeCmpclEPK5INodeS2_(%struct.NodeCmp* %_M_comp, %class.INode* %0, %class.INode* %2) - ret i1 %call2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIP5INodeSaIS1_EE9push_backERKS1_(%"class.std::vector"* %this, %class.INode** dereferenceable(8) %__x) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - %__x.addr = alloca %class.INode**, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - store %class.INode** %__x, %class.INode*** %__x.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - %1 = load %class.INode**, %class.INode*** %_M_finish, align 8 - %2 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %2, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 2 - %3 = load %class.INode**, %class.INode*** %_M_end_of_storage, align 8 - %cmp = icmp ne %class.INode** %1, %3 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %4 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl3 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %4, i32 0, i32 0 - %5 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl3 to %"class.std::allocator"* - %6 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl4 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %6, i32 0, i32 0 - %_M_finish5 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl4, i32 0, i32 1 - %7 = load %class.INode**, %class.INode*** %_M_finish5, align 8 - %8 = load %class.INode**, %class.INode*** %__x.addr, align 8 - call void @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE9constructIS2_EEvRS3_PS2_RKT_(%"class.std::allocator"* dereferenceable(1) %5, %class.INode** %7, %class.INode** dereferenceable(8) %8) - %9 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl6 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %9, i32 0, i32 0 - %_M_finish7 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl6, i32 0, i32 1 - %10 = load %class.INode**, %class.INode*** %_M_finish7, align 8 - %incdec.ptr = getelementptr inbounds %class.INode*, %class.INode** %10, i32 1 - store %class.INode** %incdec.ptr, %class.INode*** %_M_finish7, align 8 - br label %if.end - -if.else: ; preds = %entry - %call = call %class.INode** @_ZNSt6vectorIP5INodeSaIS1_EE3endEv(%"class.std::vector"* %this1) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - store %class.INode** %call, %class.INode*** %coerce.dive, align 8 - %11 = load %class.INode**, %class.INode*** %__x.addr, align 8 - %coerce.dive8 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - %12 = load %class.INode**, %class.INode*** %coerce.dive8, align 8 - call void @_ZNSt6vectorIP5INodeSaIS1_EE17_M_realloc_insertEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"class.std::vector"* %this1, %class.INode** %12, %class.INode** dereferenceable(8) %11) - br label %if.end - -if.end: ; preds = %if.else, %if.then - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt9push_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEE7NodeCmpEvT_SA_T0_(%class.INode** %__first.coerce, %class.INode** %__last.coerce) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__last = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__comp = alloca %struct.NodeCmp, align 1 - %__cmp = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_val", align 1 - %agg.tmp = alloca %struct.NodeCmp, align 1 - %__value = alloca %class.INode*, align 8 - %ref.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp4 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__last, i32 0, i32 0 - store %class.INode** %__last.coerce, %class.INode*** %coerce.dive1, align 8 - call void @_ZN9__gnu_cxx5__ops14_Iter_comp_valI7NodeCmpEC2ES2_(%"struct.__gnu_cxx::__ops::_Iter_comp_val"* %__cmp) - %call = call %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEmiEl(%"class.__gnu_cxx::__normal_iterator"* %__last, i64 1) - %coerce.dive2 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp, i32 0, i32 0 - store %class.INode** %call, %class.INode*** %coerce.dive2, align 8 - %call3 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %ref.tmp) - %0 = load %class.INode*, %class.INode** %call3, align 8 - store %class.INode* %0, %class.INode** %__value, align 8 - %1 = bitcast %"class.__gnu_cxx::__normal_iterator"* %agg.tmp4 to i8* - %2 = bitcast %"class.__gnu_cxx::__normal_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %1, i8* align 8 %2, i64 8, i1 false) - %call5 = call i64 @_ZN9__gnu_cxxmiIPP5INodeSt6vectorIS2_SaIS2_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKSA_SD_(%"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__last, %"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__first) - %sub = sub nsw i64 %call5, 1 - %3 = load %class.INode*, %class.INode** %__value, align 8 - %coerce.dive6 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp4, i32 0, i32 0 - %4 = load %class.INode**, %class.INode*** %coerce.dive6, align 8 - call void @_ZSt11__push_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEElS3_NS0_5__ops14_Iter_comp_valI7NodeCmpEEEvT_T0_SE_T1_RT2_(%class.INode** %4, i64 %sub, i64 0, %class.INode* %3, %"struct.__gnu_cxx::__ops::_Iter_comp_val"* dereferenceable(1) %__cmp) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE9constructIS2_EEvRS3_PS2_RKT_(%"class.std::allocator"* dereferenceable(1) %__a, %class.INode** %__p, %class.INode** dereferenceable(8) %__arg) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - %__p.addr = alloca %class.INode**, align 8 - %__arg.addr = alloca %class.INode**, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - store %class.INode** %__p, %class.INode*** %__p.addr, align 8 - store %class.INode** %__arg, %class.INode*** %__arg.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator"* %0 to %"class.__gnu_cxx::new_allocator"* - %2 = load %class.INode**, %class.INode*** %__p.addr, align 8 - %3 = load %class.INode**, %class.INode*** %__arg.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorIP5INodeE9constructEPS2_RKS2_(%"class.__gnu_cxx::new_allocator"* %1, %class.INode** %2, %class.INode** dereferenceable(8) %3) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIP5INodeSaIS1_EE17_M_realloc_insertEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%"class.std::vector"* %this, %class.INode** %__position.coerce, %class.INode** dereferenceable(8) %__x) #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %__position = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %this.addr = alloca %"class.std::vector"*, align 8 - %__x.addr = alloca %class.INode**, align 8 - %__len = alloca i64, align 8 - %__elems_before = alloca i64, align 8 - %ref.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__new_start = alloca %class.INode**, align 8 - %__new_finish = alloca %class.INode**, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__position, i32 0, i32 0 - store %class.INode** %__position.coerce, %class.INode*** %coerce.dive, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - store %class.INode** %__x, %class.INode*** %__x.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %call = call i64 @_ZNKSt6vectorIP5INodeSaIS1_EE12_M_check_lenEmPKc(%"class.std::vector"* %this1, i64 1, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.26, i64 0, i64 0)) - store i64 %call, i64* %__len, align 8 - %call2 = call %class.INode** @_ZNSt6vectorIP5INodeSaIS1_EE5beginEv(%"class.std::vector"* %this1) - %coerce.dive3 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp, i32 0, i32 0 - store %class.INode** %call2, %class.INode*** %coerce.dive3, align 8 - %call4 = call i64 @_ZN9__gnu_cxxmiIPP5INodeSt6vectorIS2_SaIS2_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKSA_SD_(%"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__position, %"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %ref.tmp) - store i64 %call4, i64* %__elems_before, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %1 = load i64, i64* %__len, align 8 - %call5 = call %class.INode** @_ZNSt12_Vector_baseIP5INodeSaIS1_EE11_M_allocateEm(%"struct.std::_Vector_base"* %0, i64 %1) - store %class.INode** %call5, %class.INode*** %__new_start, align 8 - %2 = load %class.INode**, %class.INode*** %__new_start, align 8 - store %class.INode** %2, %class.INode*** %__new_finish, align 8 - %3 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %3, i32 0, i32 0 - %4 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator"* - %5 = load %class.INode**, %class.INode*** %__new_start, align 8 - %6 = load i64, i64* %__elems_before, align 8 - %add.ptr = getelementptr inbounds %class.INode*, %class.INode** %5, i64 %6 - %7 = load %class.INode**, %class.INode*** %__x.addr, align 8 - invoke void @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE9constructIS2_EEvRS3_PS2_RKT_(%"class.std::allocator"* dereferenceable(1) %4, %class.INode** %add.ptr, %class.INode** dereferenceable(8) %7) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - store %class.INode** null, %class.INode*** %__new_finish, align 8 - %8 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl6 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %8, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl6, i32 0, i32 0 - %9 = load %class.INode**, %class.INode*** %_M_start, align 8 - %call8 = invoke dereferenceable(8) %class.INode*** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator"* %__position) - to label %invoke.cont7 unwind label %lpad - -invoke.cont7: ; preds = %invoke.cont - %10 = load %class.INode**, %class.INode*** %call8, align 8 - %11 = load %class.INode**, %class.INode*** %__new_start, align 8 - %12 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call10 = invoke dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %12) - to label %invoke.cont9 unwind label %lpad - -invoke.cont9: ; preds = %invoke.cont7 - %call12 = invoke %class.INode** @_ZSt34__uninitialized_move_if_noexcept_aIPP5INodeS2_SaIS1_EET0_T_S5_S4_RT1_(%class.INode** %9, %class.INode** %10, %class.INode** %11, %"class.std::allocator"* dereferenceable(1) %call10) - to label %invoke.cont11 unwind label %lpad - -invoke.cont11: ; preds = %invoke.cont9 - store %class.INode** %call12, %class.INode*** %__new_finish, align 8 - %13 = load %class.INode**, %class.INode*** %__new_finish, align 8 - %incdec.ptr = getelementptr inbounds %class.INode*, %class.INode** %13, i32 1 - store %class.INode** %incdec.ptr, %class.INode*** %__new_finish, align 8 - %call14 = invoke dereferenceable(8) %class.INode*** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator"* %__position) - to label %invoke.cont13 unwind label %lpad - -invoke.cont13: ; preds = %invoke.cont11 - %14 = load %class.INode**, %class.INode*** %call14, align 8 - %15 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl15 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %15, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl15, i32 0, i32 1 - %16 = load %class.INode**, %class.INode*** %_M_finish, align 8 - %17 = load %class.INode**, %class.INode*** %__new_finish, align 8 - %18 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call17 = invoke dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %18) - to label %invoke.cont16 unwind label %lpad - -invoke.cont16: ; preds = %invoke.cont13 - %call19 = invoke %class.INode** @_ZSt34__uninitialized_move_if_noexcept_aIPP5INodeS2_SaIS1_EET0_T_S5_S4_RT1_(%class.INode** %14, %class.INode** %16, %class.INode** %17, %"class.std::allocator"* dereferenceable(1) %call17) - to label %invoke.cont18 unwind label %lpad - -invoke.cont18: ; preds = %invoke.cont16 - store %class.INode** %call19, %class.INode*** %__new_finish, align 8 - br label %try.cont - -lpad: ; preds = %invoke.cont16, %invoke.cont13, %invoke.cont11, %invoke.cont9, %invoke.cont7, %invoke.cont, %entry - %19 = landingpad { i8*, i32 } - catch i8* null - %20 = extractvalue { i8*, i32 } %19, 0 - store i8* %20, i8** %exn.slot, align 8 - %21 = extractvalue { i8*, i32 } %19, 1 - store i32 %21, i32* %ehselector.slot, align 4 - br label %catch - -catch: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %22 = call i8* @__cxa_begin_catch(i8* %exn) #3 - %23 = load %class.INode**, %class.INode*** %__new_finish, align 8 - %tobool = icmp ne %class.INode** %23, null - br i1 %tobool, label %if.else, label %if.then - -if.then: ; preds = %catch - %24 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl20 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %24, i32 0, i32 0 - %25 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl20 to %"class.std::allocator"* - %26 = load %class.INode**, %class.INode*** %__new_start, align 8 - %27 = load i64, i64* %__elems_before, align 8 - %add.ptr21 = getelementptr inbounds %class.INode*, %class.INode** %26, i64 %27 - invoke void @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE7destroyERS3_PS2_(%"class.std::allocator"* dereferenceable(1) %25, %class.INode** %add.ptr21) - to label %invoke.cont23 unwind label %lpad22 - -invoke.cont23: ; preds = %if.then - br label %if.end - -lpad22: ; preds = %invoke.cont27, %if.end, %invoke.cont24, %if.else, %if.then - %28 = landingpad { i8*, i32 } - cleanup - %29 = extractvalue { i8*, i32 } %28, 0 - store i8* %29, i8** %exn.slot, align 8 - %30 = extractvalue { i8*, i32 } %28, 1 - store i32 %30, i32* %ehselector.slot, align 4 - invoke void @__cxa_end_catch() - to label %invoke.cont28 unwind label %terminate.lpad - -if.else: ; preds = %catch - %31 = load %class.INode**, %class.INode*** %__new_start, align 8 - %32 = load %class.INode**, %class.INode*** %__new_finish, align 8 - %33 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call25 = invoke dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %33) - to label %invoke.cont24 unwind label %lpad22 - -invoke.cont24: ; preds = %if.else - invoke void @_ZSt8_DestroyIPP5INodeS1_EvT_S3_RSaIT0_E(%class.INode** %31, %class.INode** %32, %"class.std::allocator"* dereferenceable(1) %call25) - to label %invoke.cont26 unwind label %lpad22 - -invoke.cont26: ; preds = %invoke.cont24 - br label %if.end - -if.end: ; preds = %invoke.cont26, %invoke.cont23 - %34 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %35 = load %class.INode**, %class.INode*** %__new_start, align 8 - %36 = load i64, i64* %__len, align 8 - invoke void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE13_M_deallocateEPS1_m(%"struct.std::_Vector_base"* %34, %class.INode** %35, i64 %36) - to label %invoke.cont27 unwind label %lpad22 - -invoke.cont27: ; preds = %if.end - invoke void @__cxa_rethrow() #19 - to label %unreachable unwind label %lpad22 - -invoke.cont28: ; preds = %lpad22 - br label %eh.resume - -try.cont: ; preds = %invoke.cont18 - %37 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl29 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %37, i32 0, i32 0 - %_M_start30 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl29, i32 0, i32 0 - %38 = load %class.INode**, %class.INode*** %_M_start30, align 8 - %39 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl31 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %39, i32 0, i32 0 - %_M_finish32 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl31, i32 0, i32 1 - %40 = load %class.INode**, %class.INode*** %_M_finish32, align 8 - %41 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call33 = call dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %41) - call void @_ZSt8_DestroyIPP5INodeS1_EvT_S3_RSaIT0_E(%class.INode** %38, %class.INode** %40, %"class.std::allocator"* dereferenceable(1) %call33) - %42 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %43 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl34 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %43, i32 0, i32 0 - %_M_start35 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl34, i32 0, i32 0 - %44 = load %class.INode**, %class.INode*** %_M_start35, align 8 - %45 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl36 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %45, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl36, i32 0, i32 2 - %46 = load %class.INode**, %class.INode*** %_M_end_of_storage, align 8 - %47 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl37 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %47, i32 0, i32 0 - %_M_start38 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl37, i32 0, i32 0 - %48 = load %class.INode**, %class.INode*** %_M_start38, align 8 - %sub.ptr.lhs.cast = ptrtoint %class.INode** %46 to i64 - %sub.ptr.rhs.cast = ptrtoint %class.INode** %48 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - call void @_ZNSt12_Vector_baseIP5INodeSaIS1_EE13_M_deallocateEPS1_m(%"struct.std::_Vector_base"* %42, %class.INode** %44, i64 %sub.ptr.div) - %49 = load %class.INode**, %class.INode*** %__new_start, align 8 - %50 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl39 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %50, i32 0, i32 0 - %_M_start40 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl39, i32 0, i32 0 - store %class.INode** %49, %class.INode*** %_M_start40, align 8 - %51 = load %class.INode**, %class.INode*** %__new_finish, align 8 - %52 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl41 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %52, i32 0, i32 0 - %_M_finish42 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl41, i32 0, i32 1 - store %class.INode** %51, %class.INode*** %_M_finish42, align 8 - %53 = load %class.INode**, %class.INode*** %__new_start, align 8 - %54 = load i64, i64* %__len, align 8 - %add.ptr43 = getelementptr inbounds %class.INode*, %class.INode** %53, i64 %54 - %55 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl44 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %55, i32 0, i32 0 - %_M_end_of_storage45 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl44, i32 0, i32 2 - store %class.INode** %add.ptr43, %class.INode*** %_M_end_of_storage45, align 8 - ret void - -eh.resume: ; preds = %invoke.cont28 - %exn46 = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn46, 0 - %lpad.val47 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val47 - -terminate.lpad: ; preds = %lpad22 - %56 = landingpad { i8*, i32 } - catch i8* null - %57 = extractvalue { i8*, i32 } %56, 0 - call void @__clang_call_terminate(i8* %57) #16 - unreachable - -unreachable: ; preds = %invoke.cont27 - unreachable -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorIP5INodeE9constructEPS2_RKS2_(%"class.__gnu_cxx::new_allocator"* %this, %class.INode** %__p, %class.INode** dereferenceable(8) %__val) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - %__p.addr = alloca %class.INode**, align 8 - %__val.addr = alloca %class.INode**, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - store %class.INode** %__p, %class.INode*** %__p.addr, align 8 - store %class.INode** %__val, %class.INode*** %__val.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %0 = load %class.INode**, %class.INode*** %__p.addr, align 8 - %1 = bitcast %class.INode** %0 to i8* - %2 = bitcast i8* %1 to %class.INode** - %3 = load %class.INode**, %class.INode*** %__val.addr, align 8 - %4 = load %class.INode*, %class.INode** %3, align 8 - store %class.INode* %4, %class.INode** %2, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorIP5INodeSaIS1_EE12_M_check_lenEmPKc(%"class.std::vector"* %this, i64 %__n, i8* %__s) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - %__n.addr = alloca i64, align 8 - %__s.addr = alloca i8*, align 8 - %__len = alloca i64, align 8 - %ref.tmp = alloca i64, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - store i8* %__s, i8** %__s.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %call = call i64 @_ZNKSt6vectorIP5INodeSaIS1_EE8max_sizeEv(%"class.std::vector"* %this1) - %call2 = call i64 @_ZNKSt6vectorIP5INodeSaIS1_EE4sizeEv(%"class.std::vector"* %this1) - %sub = sub i64 %call, %call2 - %0 = load i64, i64* %__n.addr, align 8 - %cmp = icmp ult i64 %sub, %0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load i8*, i8** %__s.addr, align 8 - call void @_ZSt20__throw_length_errorPKc(i8* %1) #19 - unreachable - -if.end: ; preds = %entry - %call3 = call i64 @_ZNKSt6vectorIP5INodeSaIS1_EE4sizeEv(%"class.std::vector"* %this1) - %call4 = call i64 @_ZNKSt6vectorIP5INodeSaIS1_EE4sizeEv(%"class.std::vector"* %this1) - store i64 %call4, i64* %ref.tmp, align 8 - %call5 = call dereferenceable(8) i64* @_ZSt3maxImERKT_S2_S2_(i64* dereferenceable(8) %ref.tmp, i64* dereferenceable(8) %__n.addr) - %2 = load i64, i64* %call5, align 8 - %add = add i64 %call3, %2 - store i64 %add, i64* %__len, align 8 - %3 = load i64, i64* %__len, align 8 - %call6 = call i64 @_ZNKSt6vectorIP5INodeSaIS1_EE4sizeEv(%"class.std::vector"* %this1) - %cmp7 = icmp ult i64 %3, %call6 - br i1 %cmp7, label %cond.true, label %lor.lhs.false - -lor.lhs.false: ; preds = %if.end - %4 = load i64, i64* %__len, align 8 - %call8 = call i64 @_ZNKSt6vectorIP5INodeSaIS1_EE8max_sizeEv(%"class.std::vector"* %this1) - %cmp9 = icmp ugt i64 %4, %call8 - br i1 %cmp9, label %cond.true, label %cond.false - -cond.true: ; preds = %lor.lhs.false, %if.end - %call10 = call i64 @_ZNKSt6vectorIP5INodeSaIS1_EE8max_sizeEv(%"class.std::vector"* %this1) - br label %cond.end - -cond.false: ; preds = %lor.lhs.false - %5 = load i64, i64* %__len, align 8 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i64 [ %call10, %cond.true ], [ %5, %cond.false ] - ret i64 %cond -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt34__uninitialized_move_if_noexcept_aIPP5INodeS2_SaIS1_EET0_T_S5_S4_RT1_(%class.INode** %__first, %class.INode** %__last, %class.INode** %__result, %"class.std::allocator"* dereferenceable(1) %__alloc) #0 comdat { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - %__result.addr = alloca %class.INode**, align 8 - %__alloc.addr = alloca %"class.std::allocator"*, align 8 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - store %"class.std::allocator"* %__alloc, %"class.std::allocator"** %__alloc.addr, align 8 - %0 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %1 = load %class.INode**, %class.INode*** %__last.addr, align 8 - %2 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %3 = load %"class.std::allocator"*, %"class.std::allocator"** %__alloc.addr, align 8 - %call = call %class.INode** @_ZSt22__uninitialized_copy_aIPP5INodeS2_S1_ET0_T_S4_S3_RSaIT1_E(%class.INode** %0, %class.INode** %1, %class.INode** %2, %"class.std::allocator"* dereferenceable(1) %3) - ret %class.INode** %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE7destroyERS3_PS2_(%"class.std::allocator"* dereferenceable(1) %__a, %class.INode** %__p) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - %__p.addr = alloca %class.INode**, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - store %class.INode** %__p, %class.INode*** %__p.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator"* %0 to %"class.__gnu_cxx::new_allocator"* - %2 = load %class.INode**, %class.INode*** %__p.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorIP5INodeE7destroyEPS2_(%"class.__gnu_cxx::new_allocator"* %1, %class.INode** %2) - ret void -} - -declare dso_local void @__cxa_rethrow() - -declare dso_local void @__cxa_end_catch() - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorIP5INodeSaIS1_EE8max_sizeEv(%"class.std::vector"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call = call dereferenceable(1) %"class.std::allocator"* @_ZNKSt12_Vector_baseIP5INodeSaIS1_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %0) - %call2 = call i64 @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE8max_sizeERKS3_(%"class.std::allocator"* dereferenceable(1) %call) - ret i64 %call2 -} - -; Function Attrs: noreturn -declare dso_local void @_ZSt20__throw_length_errorPKc(i8*) #15 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) i64* @_ZSt3maxImERKT_S2_S2_(i64* dereferenceable(8) %__a, i64* dereferenceable(8) %__b) #6 comdat { -entry: - %retval = alloca i64*, align 8 - %__a.addr = alloca i64*, align 8 - %__b.addr = alloca i64*, align 8 - store i64* %__a, i64** %__a.addr, align 8 - store i64* %__b, i64** %__b.addr, align 8 - %0 = load i64*, i64** %__a.addr, align 8 - %1 = load i64, i64* %0, align 8 - %2 = load i64*, i64** %__b.addr, align 8 - %3 = load i64, i64* %2, align 8 - %cmp = icmp ult i64 %1, %3 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %4 = load i64*, i64** %__b.addr, align 8 - store i64* %4, i64** %retval, align 8 - br label %return - -if.end: ; preds = %entry - %5 = load i64*, i64** %__a.addr, align 8 - store i64* %5, i64** %retval, align 8 - br label %return - -return: ; preds = %if.end, %if.then - %6 = load i64*, i64** %retval, align 8 - ret i64* %6 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE8max_sizeERKS3_(%"class.std::allocator"* dereferenceable(1) %__a) #6 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator"* %0 to %"class.__gnu_cxx::new_allocator"* - %call = call i64 @_ZNK9__gnu_cxx13new_allocatorIP5INodeE8max_sizeEv(%"class.__gnu_cxx::new_allocator"* %1) #3 - ret i64 %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt22__uninitialized_copy_aIPP5INodeS2_S1_ET0_T_S4_S3_RSaIT1_E(%class.INode** %__first, %class.INode** %__last, %class.INode** %__result, %"class.std::allocator"* dereferenceable(1) %0) #0 comdat { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - %__result.addr = alloca %class.INode**, align 8 - %.addr = alloca %"class.std::allocator"*, align 8 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - store %"class.std::allocator"* %0, %"class.std::allocator"** %.addr, align 8 - %1 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %2 = load %class.INode**, %class.INode*** %__last.addr, align 8 - %3 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %call = call %class.INode** @_ZSt18uninitialized_copyIPP5INodeS2_ET0_T_S4_S3_(%class.INode** %1, %class.INode** %2, %class.INode** %3) - ret %class.INode** %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt18uninitialized_copyIPP5INodeS2_ET0_T_S4_S3_(%class.INode** %__first, %class.INode** %__last, %class.INode** %__result) #0 comdat { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - %__result.addr = alloca %class.INode**, align 8 - %__assignable = alloca i8, align 1 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - store i8 1, i8* %__assignable, align 1 - %0 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %1 = load %class.INode**, %class.INode*** %__last.addr, align 8 - %2 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %call = call %class.INode** @_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIPP5INodeS4_EET0_T_S6_S5_(%class.INode** %0, %class.INode** %1, %class.INode** %2) - ret %class.INode** %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIPP5INodeS4_EET0_T_S6_S5_(%class.INode** %__first, %class.INode** %__last, %class.INode** %__result) #0 comdat align 2 { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - %__result.addr = alloca %class.INode**, align 8 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - %0 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %1 = load %class.INode**, %class.INode*** %__last.addr, align 8 - %2 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %call = call %class.INode** @_ZSt4copyIPP5INodeS2_ET0_T_S4_S3_(%class.INode** %0, %class.INode** %1, %class.INode** %2) - ret %class.INode** %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt4copyIPP5INodeS2_ET0_T_S4_S3_(%class.INode** %__first, %class.INode** %__last, %class.INode** %__result) #0 comdat { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - %__result.addr = alloca %class.INode**, align 8 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - %0 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %call = call %class.INode** @_ZSt12__miter_baseIPP5INodeET_S3_(%class.INode** %0) - %1 = load %class.INode**, %class.INode*** %__last.addr, align 8 - %call1 = call %class.INode** @_ZSt12__miter_baseIPP5INodeET_S3_(%class.INode** %1) - %2 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %call2 = call %class.INode** @_ZSt14__copy_move_a2ILb0EPP5INodeS2_ET1_T0_S4_S3_(%class.INode** %call, %class.INode** %call1, %class.INode** %2) - ret %class.INode** %call2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt14__copy_move_a2ILb0EPP5INodeS2_ET1_T0_S4_S3_(%class.INode** %__first, %class.INode** %__last, %class.INode** %__result) #0 comdat { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - %__result.addr = alloca %class.INode**, align 8 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - %0 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %call = call %class.INode** @_ZSt12__niter_baseIPP5INodeET_S3_(%class.INode** %0) - %1 = load %class.INode**, %class.INode*** %__last.addr, align 8 - %call1 = call %class.INode** @_ZSt12__niter_baseIPP5INodeET_S3_(%class.INode** %1) - %2 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %call2 = call %class.INode** @_ZSt12__niter_baseIPP5INodeET_S3_(%class.INode** %2) - %call3 = call %class.INode** @_ZSt13__copy_move_aILb0EPP5INodeS2_ET1_T0_S4_S3_(%class.INode** %call, %class.INode** %call1, %class.INode** %call2) - ret %class.INode** %call3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt12__miter_baseIPP5INodeET_S3_(%class.INode** %__it) #6 comdat { -entry: - %__it.addr = alloca %class.INode**, align 8 - store %class.INode** %__it, %class.INode*** %__it.addr, align 8 - %0 = load %class.INode**, %class.INode*** %__it.addr, align 8 - ret %class.INode** %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZSt13__copy_move_aILb0EPP5INodeS2_ET1_T0_S4_S3_(%class.INode** %__first, %class.INode** %__last, %class.INode** %__result) #6 comdat { -entry: - %__first.addr = alloca %class.INode**, align 8 - %__last.addr = alloca %class.INode**, align 8 - %__result.addr = alloca %class.INode**, align 8 - %__simple = alloca i8, align 1 - store %class.INode** %__first, %class.INode*** %__first.addr, align 8 - store %class.INode** %__last, %class.INode*** %__last.addr, align 8 - store %class.INode** %__result, %class.INode*** %__result.addr, align 8 - store i8 1, i8* %__simple, align 1 - %0 = load %class.INode**, %class.INode*** %__first.addr, align 8 - %1 = load %class.INode**, %class.INode*** %__last.addr, align 8 - %2 = load %class.INode**, %class.INode*** %__result.addr, align 8 - %call = call %class.INode** @_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mIP5INodeEEPT_PKS5_S8_S6_(%class.INode** %0, %class.INode** %1, %class.INode** %2) - ret %class.INode** %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorIP5INodeE7destroyEPS2_(%"class.__gnu_cxx::new_allocator"* %this, %class.INode** %__p) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - %__p.addr = alloca %class.INode**, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - store %class.INode** %__p, %class.INode*** %__p.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx5__ops14_Iter_comp_valI7NodeCmpEC2ES2_(%"struct.__gnu_cxx::__ops::_Iter_comp_val"* %this) unnamed_addr #6 comdat align 2 { -entry: - %__comp = alloca %struct.NodeCmp, align 1 - %this.addr = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_val"*, align 8 - store %"struct.__gnu_cxx::__ops::_Iter_comp_val"* %this, %"struct.__gnu_cxx::__ops::_Iter_comp_val"** %this.addr, align 8 - %this1 = load %"struct.__gnu_cxx::__ops::_Iter_comp_val"*, %"struct.__gnu_cxx::__ops::_Iter_comp_val"** %this.addr, align 8 - %_M_comp = getelementptr inbounds %"struct.__gnu_cxx::__ops::_Iter_comp_val", %"struct.__gnu_cxx::__ops::_Iter_comp_val"* %this1, i32 0, i32 0 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEmiEl(%"class.__gnu_cxx::__normal_iterator"* %this, i64 %__n) #0 comdat align 2 { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - %__n.addr = alloca i64, align 8 - %ref.tmp = alloca %class.INode**, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %this, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %this1, i32 0, i32 0 - %0 = load %class.INode**, %class.INode*** %_M_current, align 8 - %1 = load i64, i64* %__n.addr, align 8 - %idx.neg = sub i64 0, %1 - %add.ptr = getelementptr inbounds %class.INode*, %class.INode** %0, i64 %idx.neg - store %class.INode** %add.ptr, %class.INode*** %ref.tmp, align 8 - call void @_ZN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEC2ERKS3_(%"class.__gnu_cxx::__normal_iterator"* %retval, %class.INode*** dereferenceable(8) %ref.tmp) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %retval, i32 0, i32 0 - %2 = load %class.INode**, %class.INode*** %coerce.dive, align 8 - ret %class.INode** %2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %class.INode** @_ZNKSt6vectorIP5INodeSaIS1_EE5frontEv(%"class.std::vector"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - %ref.tmp = alloca %"class.__gnu_cxx::__normal_iterator.10", align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %call = call %class.INode** @_ZNKSt6vectorIP5INodeSaIS1_EE5beginEv(%"class.std::vector"* %this1) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %ref.tmp, i32 0, i32 0 - store %class.INode** %call, %class.INode*** %coerce.dive, align 8 - %call2 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator.10"* %ref.tmp) - ret %class.INode** %call2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPKP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator.10"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator.10"*, align 8 - store %"class.__gnu_cxx::__normal_iterator.10"* %this, %"class.__gnu_cxx::__normal_iterator.10"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator.10"*, %"class.__gnu_cxx::__normal_iterator.10"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.10", %"class.__gnu_cxx::__normal_iterator.10"* %this1, i32 0, i32 0 - %0 = load %class.INode**, %class.INode*** %_M_current, align 8 - ret %class.INode** %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt8pop_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEE7NodeCmpEvT_SA_T0_(%class.INode** %__first.coerce, %class.INode** %__last.coerce) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__last = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__comp = alloca %struct.NodeCmp, align 1 - %__cmp = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_iter", align 1 - %agg.tmp = alloca %struct.NodeCmp, align 1 - %agg.tmp3 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp4 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp5 = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__last, i32 0, i32 0 - store %class.INode** %__last.coerce, %class.INode*** %coerce.dive1, align 8 - %call = call i64 @_ZN9__gnu_cxxmiIPP5INodeSt6vectorIS2_SaIS2_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKSA_SD_(%"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__last, %"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__first) - %cmp = icmp sgt i64 %call, 1 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - call void @_ZN9__gnu_cxx5__ops15_Iter_comp_iterI7NodeCmpEC2ES2_(%"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %__cmp) - %call2 = call dereferenceable(8) %"class.__gnu_cxx::__normal_iterator"* @_ZN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEmmEv(%"class.__gnu_cxx::__normal_iterator"* %__last) - %0 = bitcast %"class.__gnu_cxx::__normal_iterator"* %agg.tmp3 to i8* - %1 = bitcast %"class.__gnu_cxx::__normal_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) - %2 = bitcast %"class.__gnu_cxx::__normal_iterator"* %agg.tmp4 to i8* - %3 = bitcast %"class.__gnu_cxx::__normal_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %3, i64 8, i1 false) - %4 = bitcast %"class.__gnu_cxx::__normal_iterator"* %agg.tmp5 to i8* - %5 = bitcast %"class.__gnu_cxx::__normal_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %4, i8* align 8 %5, i64 8, i1 false) - %coerce.dive6 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp3, i32 0, i32 0 - %6 = load %class.INode**, %class.INode*** %coerce.dive6, align 8 - %coerce.dive7 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp4, i32 0, i32 0 - %7 = load %class.INode**, %class.INode*** %coerce.dive7, align 8 - %coerce.dive8 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp5, i32 0, i32 0 - %8 = load %class.INode**, %class.INode*** %coerce.dive8, align 8 - call void @_ZSt10__pop_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEENS0_5__ops15_Iter_comp_iterI7NodeCmpEEEvT_SD_SD_RT0_(%class.INode** %6, %class.INode** %7, %class.INode** %8, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* dereferenceable(1) %__cmp) - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIP5INodeSaIS1_EE8pop_backEv(%"class.std::vector"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - %1 = load %class.INode**, %class.INode*** %_M_finish, align 8 - %incdec.ptr = getelementptr inbounds %class.INode*, %class.INode** %1, i32 -1 - store %class.INode** %incdec.ptr, %class.INode*** %_M_finish, align 8 - %2 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %2, i32 0, i32 0 - %3 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2 to %"class.std::allocator"* - %4 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl3 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %4, i32 0, i32 0 - %_M_finish4 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl3, i32 0, i32 1 - %5 = load %class.INode**, %class.INode*** %_M_finish4, align 8 - call void @_ZN9__gnu_cxx14__alloc_traitsISaIP5INodeEE7destroyERS3_PS2_(%"class.std::allocator"* dereferenceable(1) %3, %class.INode** %5) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %"class.__gnu_cxx::__normal_iterator"* @_ZN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEmmEv(%"class.__gnu_cxx::__normal_iterator"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %this, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %this1, i32 0, i32 0 - %0 = load %class.INode**, %class.INode*** %_M_current, align 8 - %incdec.ptr = getelementptr inbounds %class.INode*, %class.INode** %0, i32 -1 - store %class.INode** %incdec.ptr, %class.INode*** %_M_current, align 8 - ret %"class.__gnu_cxx::__normal_iterator"* %this1 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt10__pop_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEENS0_5__ops15_Iter_comp_iterI7NodeCmpEEEvT_SD_SD_RT0_(%class.INode** %__first.coerce, %class.INode** %__last.coerce, %class.INode** %__result.coerce, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* dereferenceable(1) %__comp) #0 comdat { -entry: - %__first = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__last = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__result = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__comp.addr = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_iter"*, align 8 - %__value = alloca %class.INode*, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %agg.tmp6 = alloca %"struct.__gnu_cxx::__ops::_Iter_comp_iter", align 1 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__first, i32 0, i32 0 - store %class.INode** %__first.coerce, %class.INode*** %coerce.dive, align 8 - %coerce.dive1 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__last, i32 0, i32 0 - store %class.INode** %__last.coerce, %class.INode*** %coerce.dive1, align 8 - %coerce.dive2 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__result, i32 0, i32 0 - store %class.INode** %__result.coerce, %class.INode*** %coerce.dive2, align 8 - store %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %__comp, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"** %__comp.addr, align 8 - %call = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %__result) - %0 = load %class.INode*, %class.INode** %call, align 8 - store %class.INode* %0, %class.INode** %__value, align 8 - %call3 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %__first) - %1 = load %class.INode*, %class.INode** %call3, align 8 - %call4 = call dereferenceable(8) %class.INode** @_ZNK9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS2_SaIS2_EEEdeEv(%"class.__gnu_cxx::__normal_iterator"* %__result) - store %class.INode* %1, %class.INode** %call4, align 8 - %2 = bitcast %"class.__gnu_cxx::__normal_iterator"* %agg.tmp to i8* - %3 = bitcast %"class.__gnu_cxx::__normal_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %2, i8* align 8 %3, i64 8, i1 false) - %call5 = call i64 @_ZN9__gnu_cxxmiIPP5INodeSt6vectorIS2_SaIS2_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKSA_SD_(%"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__last, %"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__first) - %4 = load %class.INode*, %class.INode** %__value, align 8 - %5 = load %"struct.__gnu_cxx::__ops::_Iter_comp_iter"*, %"struct.__gnu_cxx::__ops::_Iter_comp_iter"** %__comp.addr, align 8 - %6 = bitcast %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %agg.tmp6 to i8* - %7 = bitcast %"struct.__gnu_cxx::__ops::_Iter_comp_iter"* %5 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %6, i8* align 1 %7, i64 1, i1 false) - %coerce.dive7 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - %8 = load %class.INode**, %class.INode*** %coerce.dive7, align 8 - call void @_ZSt13__adjust_heapIN9__gnu_cxx17__normal_iteratorIPP5INodeSt6vectorIS3_SaIS3_EEEElS3_NS0_5__ops15_Iter_comp_iterI7NodeCmpEEEvT_T0_SE_T1_T2_(%class.INode** %8, i64 0, i64 %call5, %class.INode* %4) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE11lower_boundERS6_(%"class.std::map"* %this, i8* dereferenceable(1) %__x) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %this.addr = alloca %"class.std::map"*, align 8 - %__x.addr = alloca i8*, align 8 - store %"class.std::map"* %this, %"class.std::map"** %this.addr, align 8 - store i8* %__x, i8** %__x.addr, align 8 - %this1 = load %"class.std::map"*, %"class.std::map"** %this.addr, align 8 - %_M_t = getelementptr inbounds %"class.std::map", %"class.std::map"* %this1, i32 0, i32 0 - %0 = load i8*, i8** %__x.addr, align 8 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11lower_boundERS1_(%"class.std::_Rb_tree"* %_M_t, i8* dereferenceable(1) %0) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - %coerce.dive2 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %1 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive2, align 8 - ret %"struct.std::_Rb_tree_node_base"* %1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local zeroext i1 @_ZNKSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEeqERKS6_(%"struct.std::_Rb_tree_iterator"* %this, %"struct.std::_Rb_tree_iterator"* dereferenceable(8) %__x) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_iterator"*, align 8 - %__x.addr = alloca %"struct.std::_Rb_tree_iterator"*, align 8 - store %"struct.std::_Rb_tree_iterator"* %this, %"struct.std::_Rb_tree_iterator"** %this.addr, align 8 - store %"struct.std::_Rb_tree_iterator"* %__x, %"struct.std::_Rb_tree_iterator"** %__x.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_iterator"*, %"struct.std::_Rb_tree_iterator"** %this.addr, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %this1, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - %1 = load %"struct.std::_Rb_tree_iterator"*, %"struct.std::_Rb_tree_iterator"** %__x.addr, align 8 - %_M_node2 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %1, i32 0, i32 0 - %2 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node2, align 8 - %cmp = icmp eq %"struct.std::_Rb_tree_node_base"* %0, %2 - ret i1 %cmp -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNKSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE8key_compEv(%"class.std::map"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::map"*, align 8 - %undef.agg.tmp = alloca %"struct.std::less", align 1 - store %"class.std::map"* %this, %"class.std::map"** %this.addr, align 8 - %this1 = load %"class.std::map"*, %"class.std::map"** %this.addr, align 8 - %_M_t = getelementptr inbounds %"class.std::map", %"class.std::map"* %this1, i32 0, i32 0 - call void @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8key_compEv(%"class.std::_Rb_tree"* %_M_t) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %this, i8* dereferenceable(1) %__x, i8* dereferenceable(1) %__y) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::less"*, align 8 - %__x.addr = alloca i8*, align 8 - %__y.addr = alloca i8*, align 8 - store %"struct.std::less"* %this, %"struct.std::less"** %this.addr, align 8 - store i8* %__x, i8** %__x.addr, align 8 - store i8* %__y, i8** %__y.addr, align 8 - %this1 = load %"struct.std::less"*, %"struct.std::less"** %this.addr, align 8 - %0 = load i8*, i8** %__x.addr, align 8 - %1 = load i8, i8* %0, align 1 - %conv = zext i8 %1 to i32 - %2 = load i8*, i8** %__y.addr, align 8 - %3 = load i8, i8* %2, align 1 - %conv2 = zext i8 %3 to i32 - %cmp = icmp slt i32 %conv, %conv2 - ret i1 %cmp -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(48) %"struct.std::pair"* @_ZNKSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEdeEv(%"struct.std::_Rb_tree_iterator"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_iterator"*, align 8 - store %"struct.std::_Rb_tree_iterator"* %this, %"struct.std::_Rb_tree_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_iterator"*, %"struct.std::_Rb_tree_iterator"** %this.addr, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %this1, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - %1 = bitcast %"struct.std::_Rb_tree_node_base"* %0 to %"struct.std::_Rb_tree_node"* - %call = call %"struct.std::pair"* @_ZNSt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEE9_M_valptrEv(%"struct.std::_Rb_tree_node"* %1) - ret %"struct.std::pair"* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt3mapIhSt6vectorIbSaIbEESt4lessIhESaISt4pairIKhS2_EEE6insertESt17_Rb_tree_iteratorIS7_ERKS7_(%"class.std::map"* %this, %"struct.std::_Rb_tree_node_base"* %__position.coerce, %"struct.std::pair"* dereferenceable(48) %__x) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %__position = alloca %"struct.std::_Rb_tree_iterator", align 8 - %this.addr = alloca %"class.std::map"*, align 8 - %__x.addr = alloca %"struct.std::pair"*, align 8 - %agg.tmp = alloca %"struct.std::_Rb_tree_const_iterator", align 8 - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__position, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %__position.coerce, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - store %"class.std::map"* %this, %"class.std::map"** %this.addr, align 8 - store %"struct.std::pair"* %__x, %"struct.std::pair"** %__x.addr, align 8 - %this1 = load %"class.std::map"*, %"class.std::map"** %this.addr, align 8 - %_M_t = getelementptr inbounds %"class.std::map", %"class.std::map"* %this1, i32 0, i32 0 - call void @_ZNSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2ERKSt17_Rb_tree_iteratorIS5_E(%"struct.std::_Rb_tree_const_iterator"* %agg.tmp, %"struct.std::_Rb_tree_iterator"* dereferenceable(8) %__position) - %0 = load %"struct.std::pair"*, %"struct.std::pair"** %__x.addr, align 8 - %coerce.dive2 = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %agg.tmp, i32 0, i32 0 - %1 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive2, align 8 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE17_M_insert_unique_ESt23_Rb_tree_const_iteratorIS5_ERKS5_(%"class.std::_Rb_tree"* %_M_t, %"struct.std::_Rb_tree_node_base"* %1, %"struct.std::pair"* dereferenceable(48) %0) - %coerce.dive3 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call, %"struct.std::_Rb_tree_node_base"** %coerce.dive3, align 8 - %coerce.dive4 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %2 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive4, align 8 - ret %"struct.std::_Rb_tree_node_base"* %2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt4pairIKhSt6vectorIbSaIbEEEC2ERS0_RKS3_(%"struct.std::pair"* %this, i8* dereferenceable(1) %__a, %"class.std::vector.0"* dereferenceable(40) %__b) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::pair"*, align 8 - %__a.addr = alloca i8*, align 8 - %__b.addr = alloca %"class.std::vector.0"*, align 8 - store %"struct.std::pair"* %this, %"struct.std::pair"** %this.addr, align 8 - store i8* %__a, i8** %__a.addr, align 8 - store %"class.std::vector.0"* %__b, %"class.std::vector.0"** %__b.addr, align 8 - %this1 = load %"struct.std::pair"*, %"struct.std::pair"** %this.addr, align 8 - %0 = bitcast %"struct.std::pair"* %this1 to %"class.std::__pair_base"* - %first = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %this1, i32 0, i32 0 - %1 = load i8*, i8** %__a.addr, align 8 - %2 = load i8, i8* %1, align 1 - store i8 %2, i8* %first, align 8 - %second = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %this1, i32 0, i32 1 - %3 = load %"class.std::vector.0"*, %"class.std::vector.0"** %__b.addr, align 8 - call void @_ZNSt6vectorIbSaIbEEC2ERKS1_(%"class.std::vector.0"* %second, %"class.std::vector.0"* dereferenceable(40) %3) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11lower_boundERS1_(%"class.std::_Rb_tree"* %this, i8* dereferenceable(1) %__k) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__k.addr = alloca i8*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store i8* %__k, i8** %__k.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %call = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_M_beginEv(%"class.std::_Rb_tree"* %this1) - %call2 = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_M_endEv(%"class.std::_Rb_tree"* %this1) - %0 = load i8*, i8** %__k.addr, align 8 - %call3 = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE14_M_lower_boundEPSt13_Rb_tree_nodeIS5_EPSt18_Rb_tree_node_baseRS1_(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node"* %call, %"struct.std::_Rb_tree_node_base"* %call2, i8* dereferenceable(1) %0) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call3, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - %coerce.dive4 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %1 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive4, align 8 - ret %"struct.std::_Rb_tree_node_base"* %1 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE14_M_lower_boundEPSt13_Rb_tree_nodeIS5_EPSt18_Rb_tree_node_baseRS1_(%"class.std::_Rb_tree"* %this, %"struct.std::_Rb_tree_node"* %__x, %"struct.std::_Rb_tree_node_base"* %__y, i8* dereferenceable(1) %__k) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__x.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - %__y.addr = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %__k.addr = alloca i8*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store %"struct.std::_Rb_tree_node"* %__x, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - store %"struct.std::_Rb_tree_node_base"* %__y, %"struct.std::_Rb_tree_node_base"** %__y.addr, align 8 - store i8* %__k, i8** %__k.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - br label %while.cond - -while.cond: ; preds = %if.end, %entry - %0 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %cmp = icmp ne %"struct.std::_Rb_tree_node"* %0, null - br i1 %cmp, label %while.body, label %while.end - -while.body: ; preds = %while.cond - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %1 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to %"struct.std::_Rb_tree_key_compare"* - %_M_key_compare = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %1, i32 0, i32 0 - %2 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %call = call dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt13_Rb_tree_nodeIS5_E(%"struct.std::_Rb_tree_node"* %2) - %3 = load i8*, i8** %__k.addr, align 8 - %call2 = call zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %_M_key_compare, i8* dereferenceable(1) %call, i8* dereferenceable(1) %3) - br i1 %call2, label %if.else, label %if.then - -if.then: ; preds = %while.body - %4 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %5 = bitcast %"struct.std::_Rb_tree_node"* %4 to %"struct.std::_Rb_tree_node_base"* - store %"struct.std::_Rb_tree_node_base"* %5, %"struct.std::_Rb_tree_node_base"** %__y.addr, align 8 - %6 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %7 = bitcast %"struct.std::_Rb_tree_node"* %6 to %"struct.std::_Rb_tree_node_base"* - %call3 = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE7_S_leftEPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %7) - store %"struct.std::_Rb_tree_node"* %call3, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - br label %if.end - -if.else: ; preds = %while.body - %8 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %9 = bitcast %"struct.std::_Rb_tree_node"* %8 to %"struct.std::_Rb_tree_node_base"* - %call4 = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_rightEPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %9) - store %"struct.std::_Rb_tree_node"* %call4, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - br label %if.end - -if.end: ; preds = %if.else, %if.then - br label %while.cond - -while.end: ; preds = %while.cond - %10 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__y.addr, align 8 - call void @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2EPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_iterator"* %retval, %"struct.std::_Rb_tree_node_base"* %10) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %11 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - ret %"struct.std::_Rb_tree_node_base"* %11 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_M_endEv(%"class.std::_Rb_tree"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to i8* - %add.ptr = getelementptr inbounds i8, i8* %0, i64 8 - %1 = bitcast i8* %add.ptr to %"struct.std::_Rb_tree_header"* - %_M_header = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %1, i32 0, i32 0 - ret %"struct.std::_Rb_tree_node_base"* %_M_header -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt13_Rb_tree_nodeIS5_E(%"struct.std::_Rb_tree_node"* %__x) #0 comdat align 2 { -entry: - %__x.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - %ref.tmp = alloca %"struct.std::_Select1st", align 1 - store %"struct.std::_Rb_tree_node"* %__x, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %0 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %call = call dereferenceable(48) %"struct.std::pair"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_valueEPKSt13_Rb_tree_nodeIS5_E(%"struct.std::_Rb_tree_node"* %0) - %call1 = call dereferenceable(1) i8* @_ZNKSt10_Select1stISt4pairIKhSt6vectorIbSaIbEEEEclERKS5_(%"struct.std::_Select1st"* %ref.tmp, %"struct.std::pair"* dereferenceable(48) %call) - ret i8* %call1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) i8* @_ZNKSt10_Select1stISt4pairIKhSt6vectorIbSaIbEEEEclERKS5_(%"struct.std::_Select1st"* %this, %"struct.std::pair"* dereferenceable(48) %__x) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Select1st"*, align 8 - %__x.addr = alloca %"struct.std::pair"*, align 8 - store %"struct.std::_Select1st"* %this, %"struct.std::_Select1st"** %this.addr, align 8 - store %"struct.std::pair"* %__x, %"struct.std::pair"** %__x.addr, align 8 - %this1 = load %"struct.std::_Select1st"*, %"struct.std::_Select1st"** %this.addr, align 8 - %0 = load %"struct.std::pair"*, %"struct.std::pair"** %__x.addr, align 8 - %first = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %0, i32 0, i32 0 - ret i8* %first -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(48) %"struct.std::pair"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_valueEPKSt13_Rb_tree_nodeIS5_E(%"struct.std::_Rb_tree_node"* %__x) #0 comdat align 2 { -entry: - %__x.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - store %"struct.std::_Rb_tree_node"* %__x, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %0 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x.addr, align 8 - %call = call %"struct.std::pair"* @_ZNKSt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEE9_M_valptrEv(%"struct.std::_Rb_tree_node"* %0) - ret %"struct.std::pair"* %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8key_compEv(%"class.std::_Rb_tree"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to %"struct.std::_Rb_tree_key_compare"* - %_M_key_compare = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %0, i32 0, i32 0 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE17_M_insert_unique_ESt23_Rb_tree_const_iteratorIS5_ERKS5_(%"class.std::_Rb_tree"* %this, %"struct.std::_Rb_tree_node_base"* %__pos.coerce, %"struct.std::pair"* dereferenceable(48) %__x) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %__pos = alloca %"struct.std::_Rb_tree_const_iterator", align 8 - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__x.addr = alloca %"struct.std::pair"*, align 8 - %__an = alloca %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node", align 8 - %agg.tmp = alloca %"struct.std::_Rb_tree_const_iterator", align 8 - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %__pos, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %__pos.coerce, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store %"struct.std::pair"* %__x, %"struct.std::pair"** %__x.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_Alloc_nodeC2ERSB_(%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* %__an, %"class.std::_Rb_tree"* dereferenceable(48) %this1) - %0 = bitcast %"struct.std::_Rb_tree_const_iterator"* %agg.tmp to i8* - %1 = bitcast %"struct.std::_Rb_tree_const_iterator"* %__pos to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) - %2 = load %"struct.std::pair"*, %"struct.std::pair"** %__x.addr, align 8 - %coerce.dive2 = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %agg.tmp, i32 0, i32 0 - %3 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive2, align 8 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE17_M_insert_unique_INSB_11_Alloc_nodeEEESt17_Rb_tree_iteratorIS5_ESt23_Rb_tree_const_iteratorIS5_ERKS5_RT_(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node_base"* %3, %"struct.std::pair"* dereferenceable(48) %2, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* dereferenceable(8) %__an) - %coerce.dive3 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call, %"struct.std::_Rb_tree_node_base"** %coerce.dive3, align 8 - %coerce.dive4 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %4 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive4, align 8 - ret %"struct.std::_Rb_tree_node_base"* %4 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_Alloc_nodeC2ERSB_(%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* %this, %"class.std::_Rb_tree"* dereferenceable(48) %__t) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"*, align 8 - %__t.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* %this, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"** %this.addr, align 8 - store %"class.std::_Rb_tree"* %__t, %"class.std::_Rb_tree"** %__t.addr, align 8 - %this1 = load %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"*, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"** %this.addr, align 8 - %_M_t = getelementptr inbounds %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node", %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* %this1, i32 0, i32 0 - %0 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %__t.addr, align 8 - store %"class.std::_Rb_tree"* %0, %"class.std::_Rb_tree"** %_M_t, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE17_M_insert_unique_INSB_11_Alloc_nodeEEESt17_Rb_tree_iteratorIS5_ESt23_Rb_tree_const_iteratorIS5_ERKS5_RT_(%"class.std::_Rb_tree"* %this, %"struct.std::_Rb_tree_node_base"* %__position.coerce, %"struct.std::pair"* dereferenceable(48) %__v, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* dereferenceable(8) %__node_gen) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %__position = alloca %"struct.std::_Rb_tree_const_iterator", align 8 - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__v.addr = alloca %"struct.std::pair"*, align 8 - %__node_gen.addr = alloca %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"*, align 8 - %__res = alloca %"struct.std::pair.11", align 8 - %agg.tmp = alloca %"struct.std::_Rb_tree_const_iterator", align 8 - %ref.tmp = alloca %"struct.std::_Select1st", align 1 - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %__position, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %__position.coerce, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store %"struct.std::pair"* %__v, %"struct.std::pair"** %__v.addr, align 8 - store %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* %__node_gen, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"** %__node_gen.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Rb_tree_const_iterator"* %agg.tmp to i8* - %1 = bitcast %"struct.std::_Rb_tree_const_iterator"* %__position to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 8, i1 false) - %2 = load %"struct.std::pair"*, %"struct.std::pair"** %__v.addr, align 8 - %call = call dereferenceable(1) i8* @_ZNKSt10_Select1stISt4pairIKhSt6vectorIbSaIbEEEEclERKS5_(%"struct.std::_Select1st"* %ref.tmp, %"struct.std::pair"* dereferenceable(48) %2) - %coerce.dive2 = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %agg.tmp, i32 0, i32 0 - %3 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive2, align 8 - %call3 = call { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS5_ERS1_(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node_base"* %3, i8* dereferenceable(1) %call) - %4 = bitcast %"struct.std::pair.11"* %__res to { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* - %5 = getelementptr inbounds { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }, { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* %4, i32 0, i32 0 - %6 = extractvalue { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %call3, 0 - store %"struct.std::_Rb_tree_node_base"* %6, %"struct.std::_Rb_tree_node_base"** %5, align 8 - %7 = getelementptr inbounds { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }, { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* %4, i32 0, i32 1 - %8 = extractvalue { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %call3, 1 - store %"struct.std::_Rb_tree_node_base"* %8, %"struct.std::_Rb_tree_node_base"** %7, align 8 - %second = getelementptr inbounds %"struct.std::pair.11", %"struct.std::pair.11"* %__res, i32 0, i32 1 - %9 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %second, align 8 - %tobool = icmp ne %"struct.std::_Rb_tree_node_base"* %9, null - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - %first = getelementptr inbounds %"struct.std::pair.11", %"struct.std::pair.11"* %__res, i32 0, i32 0 - %10 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %first, align 8 - %second4 = getelementptr inbounds %"struct.std::pair.11", %"struct.std::pair.11"* %__res, i32 0, i32 1 - %11 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %second4, align 8 - %12 = load %"struct.std::pair"*, %"struct.std::pair"** %__v.addr, align 8 - %13 = load %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"*, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"** %__node_gen.addr, align 8 - %call5 = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE10_M_insert_INSB_11_Alloc_nodeEEESt17_Rb_tree_iteratorIS5_EPSt18_Rb_tree_node_baseSH_RKS5_RT_(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node_base"* %10, %"struct.std::_Rb_tree_node_base"* %11, %"struct.std::pair"* dereferenceable(48) %12, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* dereferenceable(8) %13) - %coerce.dive6 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call5, %"struct.std::_Rb_tree_node_base"** %coerce.dive6, align 8 - br label %return - -if.end: ; preds = %entry - %first7 = getelementptr inbounds %"struct.std::pair.11", %"struct.std::pair.11"* %__res, i32 0, i32 0 - %14 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %first7, align 8 - call void @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2EPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_iterator"* %retval, %"struct.std::_Rb_tree_node_base"* %14) - br label %return - -return: ; preds = %if.end, %if.then - %coerce.dive8 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %15 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive8, align 8 - ret %"struct.std::_Rb_tree_node_base"* %15 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE29_M_get_insert_hint_unique_posESt23_Rb_tree_const_iteratorIS5_ERS1_(%"class.std::_Rb_tree"* %this, %"struct.std::_Rb_tree_node_base"* %__position.coerce, i8* dereferenceable(1) %__k) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::pair.11", align 8 - %__position = alloca %"struct.std::_Rb_tree_const_iterator", align 8 - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__k.addr = alloca i8*, align 8 - %__pos = alloca %"struct.std::_Rb_tree_iterator", align 8 - %ref.tmp = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %__before = alloca %"struct.std::_Rb_tree_iterator", align 8 - %ref.tmp37 = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %__after = alloca %"struct.std::_Rb_tree_iterator", align 8 - %ref.tmp55 = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %ref.tmp69 = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %ref.tmp78 = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %__position, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %__position.coerce, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store i8* %__k, i8** %__k.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEE13_M_const_castEv(%"struct.std::_Rb_tree_const_iterator"* %__position) - %coerce.dive2 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call, %"struct.std::_Rb_tree_node_base"** %coerce.dive2, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - %call3 = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_M_endEv(%"class.std::_Rb_tree"* %this1) - %cmp = icmp eq %"struct.std::_Rb_tree_node_base"* %0, %call3 - br i1 %cmp, label %if.then, label %if.else12 - -if.then: ; preds = %entry - %call4 = call i64 @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE4sizeEv(%"class.std::_Rb_tree"* %this1) - %cmp5 = icmp ugt i64 %call4, 0 - br i1 %cmp5, label %land.lhs.true, label %if.else - -land.lhs.true: ; preds = %if.then - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %1 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to %"struct.std::_Rb_tree_key_compare"* - %_M_key_compare = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %1, i32 0, i32 0 - %call6 = call dereferenceable(8) %"struct.std::_Rb_tree_node_base"** @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE12_M_rightmostEv(%"class.std::_Rb_tree"* %this1) - %2 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %call6, align 8 - %call7 = call dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %2) - %3 = load i8*, i8** %__k.addr, align 8 - %call8 = call zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %_M_key_compare, i8* dereferenceable(1) %call7, i8* dereferenceable(1) %3) - br i1 %call8, label %if.then9, label %if.else - -if.then9: ; preds = %land.lhs.true - store %"struct.std::_Rb_tree_node_base"* null, %"struct.std::_Rb_tree_node_base"** %ref.tmp, align 8 - %call10 = call dereferenceable(8) %"struct.std::_Rb_tree_node_base"** @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE12_M_rightmostEv(%"class.std::_Rb_tree"* %this1) - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %ref.tmp, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %call10) - br label %return - -if.else: ; preds = %land.lhs.true, %if.then - %4 = load i8*, i8** %__k.addr, align 8 - %call11 = call { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE24_M_get_insert_unique_posERS1_(%"class.std::_Rb_tree"* %this1, i8* dereferenceable(1) %4) - %5 = bitcast %"struct.std::pair.11"* %retval to { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* - %6 = getelementptr inbounds { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }, { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* %5, i32 0, i32 0 - %7 = extractvalue { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %call11, 0 - store %"struct.std::_Rb_tree_node_base"* %7, %"struct.std::_Rb_tree_node_base"** %6, align 8 - %8 = getelementptr inbounds { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }, { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* %5, i32 0, i32 1 - %9 = extractvalue { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %call11, 1 - store %"struct.std::_Rb_tree_node_base"* %9, %"struct.std::_Rb_tree_node_base"** %8, align 8 - br label %return - -if.else12: ; preds = %entry - %_M_impl13 = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %10 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl13 to %"struct.std::_Rb_tree_key_compare"* - %_M_key_compare14 = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %10, i32 0, i32 0 - %11 = load i8*, i8** %__k.addr, align 8 - %_M_node15 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - %12 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node15, align 8 - %call16 = call dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %12) - %call17 = call zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %_M_key_compare14, i8* dereferenceable(1) %11, i8* dereferenceable(1) %call16) - br i1 %call17, label %if.then18, label %if.else44 - -if.then18: ; preds = %if.else12 - %13 = bitcast %"struct.std::_Rb_tree_iterator"* %__before to i8* - %14 = bitcast %"struct.std::_Rb_tree_iterator"* %__pos to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %13, i8* align 8 %14, i64 8, i1 false) - %_M_node19 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - %15 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node19, align 8 - %call20 = call dereferenceable(8) %"struct.std::_Rb_tree_node_base"** @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_leftmostEv(%"class.std::_Rb_tree"* %this1) - %16 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %call20, align 8 - %cmp21 = icmp eq %"struct.std::_Rb_tree_node_base"* %15, %16 - br i1 %cmp21, label %if.then22, label %if.else25 - -if.then22: ; preds = %if.then18 - %call23 = call dereferenceable(8) %"struct.std::_Rb_tree_node_base"** @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_leftmostEv(%"class.std::_Rb_tree"* %this1) - %call24 = call dereferenceable(8) %"struct.std::_Rb_tree_node_base"** @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_leftmostEv(%"class.std::_Rb_tree"* %this1) - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %call23, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %call24) - br label %return - -if.else25: ; preds = %if.then18 - %_M_impl26 = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %17 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl26 to %"struct.std::_Rb_tree_key_compare"* - %_M_key_compare27 = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %17, i32 0, i32 0 - %call28 = call dereferenceable(8) %"struct.std::_Rb_tree_iterator"* @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEmmEv(%"struct.std::_Rb_tree_iterator"* %__before) - %_M_node29 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %call28, i32 0, i32 0 - %18 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node29, align 8 - %call30 = call dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %18) - %19 = load i8*, i8** %__k.addr, align 8 - %call31 = call zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %_M_key_compare27, i8* dereferenceable(1) %call30, i8* dereferenceable(1) %19) - br i1 %call31, label %if.then32, label %if.else42 - -if.then32: ; preds = %if.else25 - %_M_node33 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__before, i32 0, i32 0 - %20 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node33, align 8 - %call34 = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_rightEPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %20) - %cmp35 = icmp eq %"struct.std::_Rb_tree_node"* %call34, null - br i1 %cmp35, label %if.then36, label %if.else39 - -if.then36: ; preds = %if.then32 - store %"struct.std::_Rb_tree_node_base"* null, %"struct.std::_Rb_tree_node_base"** %ref.tmp37, align 8 - %_M_node38 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__before, i32 0, i32 0 - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %ref.tmp37, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %_M_node38) - br label %return - -if.else39: ; preds = %if.then32 - %_M_node40 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - %_M_node41 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %_M_node40, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %_M_node41) - br label %return - -if.else42: ; preds = %if.else25 - %21 = load i8*, i8** %__k.addr, align 8 - %call43 = call { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE24_M_get_insert_unique_posERS1_(%"class.std::_Rb_tree"* %this1, i8* dereferenceable(1) %21) - %22 = bitcast %"struct.std::pair.11"* %retval to { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* - %23 = getelementptr inbounds { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }, { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* %22, i32 0, i32 0 - %24 = extractvalue { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %call43, 0 - store %"struct.std::_Rb_tree_node_base"* %24, %"struct.std::_Rb_tree_node_base"** %23, align 8 - %25 = getelementptr inbounds { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }, { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* %22, i32 0, i32 1 - %26 = extractvalue { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %call43, 1 - store %"struct.std::_Rb_tree_node_base"* %26, %"struct.std::_Rb_tree_node_base"** %25, align 8 - br label %return - -if.else44: ; preds = %if.else12 - %_M_impl45 = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %27 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl45 to %"struct.std::_Rb_tree_key_compare"* - %_M_key_compare46 = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %27, i32 0, i32 0 - %_M_node47 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - %28 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node47, align 8 - %call48 = call dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %28) - %29 = load i8*, i8** %__k.addr, align 8 - %call49 = call zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %_M_key_compare46, i8* dereferenceable(1) %call48, i8* dereferenceable(1) %29) - br i1 %call49, label %if.then50, label %if.else76 - -if.then50: ; preds = %if.else44 - %30 = bitcast %"struct.std::_Rb_tree_iterator"* %__after to i8* - %31 = bitcast %"struct.std::_Rb_tree_iterator"* %__pos to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %30, i8* align 8 %31, i64 8, i1 false) - %_M_node51 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - %32 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node51, align 8 - %call52 = call dereferenceable(8) %"struct.std::_Rb_tree_node_base"** @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE12_M_rightmostEv(%"class.std::_Rb_tree"* %this1) - %33 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %call52, align 8 - %cmp53 = icmp eq %"struct.std::_Rb_tree_node_base"* %32, %33 - br i1 %cmp53, label %if.then54, label %if.else57 - -if.then54: ; preds = %if.then50 - store %"struct.std::_Rb_tree_node_base"* null, %"struct.std::_Rb_tree_node_base"** %ref.tmp55, align 8 - %call56 = call dereferenceable(8) %"struct.std::_Rb_tree_node_base"** @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE12_M_rightmostEv(%"class.std::_Rb_tree"* %this1) - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %ref.tmp55, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %call56) - br label %return - -if.else57: ; preds = %if.then50 - %_M_impl58 = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %34 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl58 to %"struct.std::_Rb_tree_key_compare"* - %_M_key_compare59 = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %34, i32 0, i32 0 - %35 = load i8*, i8** %__k.addr, align 8 - %call60 = call dereferenceable(8) %"struct.std::_Rb_tree_iterator"* @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEppEv(%"struct.std::_Rb_tree_iterator"* %__after) - %_M_node61 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %call60, i32 0, i32 0 - %36 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node61, align 8 - %call62 = call dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %36) - %call63 = call zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %_M_key_compare59, i8* dereferenceable(1) %35, i8* dereferenceable(1) %call62) - br i1 %call63, label %if.then64, label %if.else74 - -if.then64: ; preds = %if.else57 - %_M_node65 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - %37 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node65, align 8 - %call66 = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_rightEPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %37) - %cmp67 = icmp eq %"struct.std::_Rb_tree_node"* %call66, null - br i1 %cmp67, label %if.then68, label %if.else71 - -if.then68: ; preds = %if.then64 - store %"struct.std::_Rb_tree_node_base"* null, %"struct.std::_Rb_tree_node_base"** %ref.tmp69, align 8 - %_M_node70 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %ref.tmp69, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %_M_node70) - br label %return - -if.else71: ; preds = %if.then64 - %_M_node72 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__after, i32 0, i32 0 - %_M_node73 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__after, i32 0, i32 0 - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %_M_node72, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %_M_node73) - br label %return - -if.else74: ; preds = %if.else57 - %38 = load i8*, i8** %__k.addr, align 8 - %call75 = call { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE24_M_get_insert_unique_posERS1_(%"class.std::_Rb_tree"* %this1, i8* dereferenceable(1) %38) - %39 = bitcast %"struct.std::pair.11"* %retval to { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* - %40 = getelementptr inbounds { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }, { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* %39, i32 0, i32 0 - %41 = extractvalue { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %call75, 0 - store %"struct.std::_Rb_tree_node_base"* %41, %"struct.std::_Rb_tree_node_base"** %40, align 8 - %42 = getelementptr inbounds { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }, { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* %39, i32 0, i32 1 - %43 = extractvalue { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %call75, 1 - store %"struct.std::_Rb_tree_node_base"* %43, %"struct.std::_Rb_tree_node_base"** %42, align 8 - br label %return - -if.else76: ; preds = %if.else44 - %_M_node77 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__pos, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* null, %"struct.std::_Rb_tree_node_base"** %ref.tmp78, align 8 - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %_M_node77, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %ref.tmp78) - br label %return - -return: ; preds = %if.else76, %if.else74, %if.else71, %if.then68, %if.then54, %if.else42, %if.else39, %if.then36, %if.then22, %if.else, %if.then9 - %44 = bitcast %"struct.std::pair.11"* %retval to { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* - %45 = load { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }, { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* %44, align 8 - ret { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %45 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE10_M_insert_INSB_11_Alloc_nodeEEESt17_Rb_tree_iteratorIS5_EPSt18_Rb_tree_node_baseSH_RKS5_RT_(%"class.std::_Rb_tree"* %this, %"struct.std::_Rb_tree_node_base"* %__x, %"struct.std::_Rb_tree_node_base"* %__p, %"struct.std::pair"* dereferenceable(48) %__v, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* dereferenceable(8) %__node_gen) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__x.addr = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %__p.addr = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %__v.addr = alloca %"struct.std::pair"*, align 8 - %__node_gen.addr = alloca %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"*, align 8 - %__insert_left = alloca i8, align 1 - %ref.tmp = alloca %"struct.std::_Select1st", align 1 - %__z = alloca %"struct.std::_Rb_tree_node"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store %"struct.std::_Rb_tree_node_base"* %__x, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - store %"struct.std::_Rb_tree_node_base"* %__p, %"struct.std::_Rb_tree_node_base"** %__p.addr, align 8 - store %"struct.std::pair"* %__v, %"struct.std::pair"** %__v.addr, align 8 - store %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* %__node_gen, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"** %__node_gen.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - %cmp = icmp ne %"struct.std::_Rb_tree_node_base"* %0, null - br i1 %cmp, label %lor.end, label %lor.lhs.false - -lor.lhs.false: ; preds = %entry - %1 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__p.addr, align 8 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_M_endEv(%"class.std::_Rb_tree"* %this1) - %cmp2 = icmp eq %"struct.std::_Rb_tree_node_base"* %1, %call - br i1 %cmp2, label %lor.end, label %lor.rhs - -lor.rhs: ; preds = %lor.lhs.false - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %2 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to %"struct.std::_Rb_tree_key_compare"* - %_M_key_compare = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %2, i32 0, i32 0 - %3 = load %"struct.std::pair"*, %"struct.std::pair"** %__v.addr, align 8 - %call3 = call dereferenceable(1) i8* @_ZNKSt10_Select1stISt4pairIKhSt6vectorIbSaIbEEEEclERKS5_(%"struct.std::_Select1st"* %ref.tmp, %"struct.std::pair"* dereferenceable(48) %3) - %4 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__p.addr, align 8 - %call4 = call dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %4) - %call5 = call zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %_M_key_compare, i8* dereferenceable(1) %call3, i8* dereferenceable(1) %call4) - br label %lor.end - -lor.end: ; preds = %lor.rhs, %lor.lhs.false, %entry - %5 = phi i1 [ true, %lor.lhs.false ], [ true, %entry ], [ %call5, %lor.rhs ] - %frombool = zext i1 %5 to i8 - store i8 %frombool, i8* %__insert_left, align 1 - %6 = load %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"*, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"** %__node_gen.addr, align 8 - %7 = load %"struct.std::pair"*, %"struct.std::pair"** %__v.addr, align 8 - %call6 = call %"struct.std::_Rb_tree_node"* @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_Alloc_nodeclIS5_EEPSt13_Rb_tree_nodeIS5_ERKT_(%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* %6, %"struct.std::pair"* dereferenceable(48) %7) - store %"struct.std::_Rb_tree_node"* %call6, %"struct.std::_Rb_tree_node"** %__z, align 8 - %8 = load i8, i8* %__insert_left, align 1 - %tobool = trunc i8 %8 to i1 - %9 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__z, align 8 - %10 = bitcast %"struct.std::_Rb_tree_node"* %9 to %"struct.std::_Rb_tree_node_base"* - %11 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__p.addr, align 8 - %_M_impl7 = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %12 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl7 to i8* - %add.ptr = getelementptr inbounds i8, i8* %12, i64 8 - %13 = bitcast i8* %add.ptr to %"struct.std::_Rb_tree_header"* - %_M_header = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %13, i32 0, i32 0 - call void @_ZSt29_Rb_tree_insert_and_rebalancebPSt18_Rb_tree_node_baseS0_RS_(i1 zeroext %tobool, %"struct.std::_Rb_tree_node_base"* %10, %"struct.std::_Rb_tree_node_base"* %11, %"struct.std::_Rb_tree_node_base"* dereferenceable(32) %_M_header) #3 - %_M_impl8 = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %14 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl8 to i8* - %add.ptr9 = getelementptr inbounds i8, i8* %14, i64 8 - %15 = bitcast i8* %add.ptr9 to %"struct.std::_Rb_tree_header"* - %_M_node_count = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %15, i32 0, i32 1 - %16 = load i64, i64* %_M_node_count, align 8 - %inc = add i64 %16, 1 - store i64 %inc, i64* %_M_node_count, align 8 - %17 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__z, align 8 - %18 = bitcast %"struct.std::_Rb_tree_node"* %17 to %"struct.std::_Rb_tree_node_base"* - call void @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2EPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_iterator"* %retval, %"struct.std::_Rb_tree_node_base"* %18) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %19 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - ret %"struct.std::_Rb_tree_node_base"* %19 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node_base"* @_ZNKSt23_Rb_tree_const_iteratorISt4pairIKhSt6vectorIbSaIbEEEE13_M_const_castEv(%"struct.std::_Rb_tree_const_iterator"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Rb_tree_iterator", align 8 - %this.addr = alloca %"struct.std::_Rb_tree_const_iterator"*, align 8 - store %"struct.std::_Rb_tree_const_iterator"* %this, %"struct.std::_Rb_tree_const_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_const_iterator"*, %"struct.std::_Rb_tree_const_iterator"** %this.addr, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_const_iterator", %"struct.std::_Rb_tree_const_iterator"* %this1, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - call void @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2EPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_iterator"* %retval, %"struct.std::_Rb_tree_node_base"* %0) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %retval, i32 0, i32 0 - %1 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - ret %"struct.std::_Rb_tree_node_base"* %1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE4sizeEv(%"class.std::_Rb_tree"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to i8* - %add.ptr = getelementptr inbounds i8, i8* %0, i64 8 - %1 = bitcast i8* %add.ptr to %"struct.std::_Rb_tree_header"* - %_M_node_count = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %1, i32 0, i32 1 - %2 = load i64, i64* %_M_node_count, align 8 - ret i64 %2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %__x) #0 comdat align 2 { -entry: - %__x.addr = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %ref.tmp = alloca %"struct.std::_Select1st", align 1 - store %"struct.std::_Rb_tree_node_base"* %__x, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - %call = call dereferenceable(48) %"struct.std::pair"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_valueEPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %0) - %call1 = call dereferenceable(1) i8* @_ZNKSt10_Select1stISt4pairIKhSt6vectorIbSaIbEEEEclERKS5_(%"struct.std::_Select1st"* %ref.tmp, %"struct.std::pair"* dereferenceable(48) %call) - ret i8* %call1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %"struct.std::_Rb_tree_node_base"** @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE12_M_rightmostEv(%"class.std::_Rb_tree"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to i8* - %add.ptr = getelementptr inbounds i8, i8* %0, i64 8 - %1 = bitcast i8* %add.ptr to %"struct.std::_Rb_tree_header"* - %_M_header = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %1, i32 0, i32 0 - %_M_right = getelementptr inbounds %"struct.std::_Rb_tree_node_base", %"struct.std::_Rb_tree_node_base"* %_M_header, i32 0, i32 3 - ret %"struct.std::_Rb_tree_node_base"** %_M_right -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %this, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %__a, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %__b) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::pair.11"*, align 8 - %__a.addr = alloca %"struct.std::_Rb_tree_node_base"**, align 8 - %__b.addr = alloca %"struct.std::_Rb_tree_node_base"**, align 8 - store %"struct.std::pair.11"* %this, %"struct.std::pair.11"** %this.addr, align 8 - store %"struct.std::_Rb_tree_node_base"** %__a, %"struct.std::_Rb_tree_node_base"*** %__a.addr, align 8 - store %"struct.std::_Rb_tree_node_base"** %__b, %"struct.std::_Rb_tree_node_base"*** %__b.addr, align 8 - %this1 = load %"struct.std::pair.11"*, %"struct.std::pair.11"** %this.addr, align 8 - %0 = bitcast %"struct.std::pair.11"* %this1 to %"class.std::__pair_base.12"* - %first = getelementptr inbounds %"struct.std::pair.11", %"struct.std::pair.11"* %this1, i32 0, i32 0 - %1 = load %"struct.std::_Rb_tree_node_base"**, %"struct.std::_Rb_tree_node_base"*** %__a.addr, align 8 - %2 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %1, align 8 - store %"struct.std::_Rb_tree_node_base"* %2, %"struct.std::_Rb_tree_node_base"** %first, align 8 - %second = getelementptr inbounds %"struct.std::pair.11", %"struct.std::pair.11"* %this1, i32 0, i32 1 - %3 = load %"struct.std::_Rb_tree_node_base"**, %"struct.std::_Rb_tree_node_base"*** %__b.addr, align 8 - %4 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %3, align 8 - store %"struct.std::_Rb_tree_node_base"* %4, %"struct.std::_Rb_tree_node_base"** %second, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE24_M_get_insert_unique_posERS1_(%"class.std::_Rb_tree"* %this, i8* dereferenceable(1) %__k) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::pair.11", align 8 - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__k.addr = alloca i8*, align 8 - %__x = alloca %"struct.std::_Rb_tree_node"*, align 8 - %__y = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %__comp = alloca i8, align 1 - %__j = alloca %"struct.std::_Rb_tree_iterator", align 8 - %ref.tmp = alloca %"struct.std::_Rb_tree_iterator", align 8 - %ref.tmp11 = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %ref.tmp19 = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - %ref.tmp22 = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store i8* %__k, i8** %__k.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %call = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_M_beginEv(%"class.std::_Rb_tree"* %this1) - store %"struct.std::_Rb_tree_node"* %call, %"struct.std::_Rb_tree_node"** %__x, align 8 - %call2 = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_M_endEv(%"class.std::_Rb_tree"* %this1) - store %"struct.std::_Rb_tree_node_base"* %call2, %"struct.std::_Rb_tree_node_base"** %__y, align 8 - store i8 1, i8* %__comp, align 1 - br label %while.cond - -while.cond: ; preds = %cond.end, %entry - %0 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x, align 8 - %cmp = icmp ne %"struct.std::_Rb_tree_node"* %0, null - br i1 %cmp, label %while.body, label %while.end - -while.body: ; preds = %while.cond - %1 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x, align 8 - %2 = bitcast %"struct.std::_Rb_tree_node"* %1 to %"struct.std::_Rb_tree_node_base"* - store %"struct.std::_Rb_tree_node_base"* %2, %"struct.std::_Rb_tree_node_base"** %__y, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %3 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to %"struct.std::_Rb_tree_key_compare"* - %_M_key_compare = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %3, i32 0, i32 0 - %4 = load i8*, i8** %__k.addr, align 8 - %5 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x, align 8 - %call3 = call dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt13_Rb_tree_nodeIS5_E(%"struct.std::_Rb_tree_node"* %5) - %call4 = call zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %_M_key_compare, i8* dereferenceable(1) %4, i8* dereferenceable(1) %call3) - %frombool = zext i1 %call4 to i8 - store i8 %frombool, i8* %__comp, align 1 - %6 = load i8, i8* %__comp, align 1 - %tobool = trunc i8 %6 to i1 - br i1 %tobool, label %cond.true, label %cond.false - -cond.true: ; preds = %while.body - %7 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x, align 8 - %8 = bitcast %"struct.std::_Rb_tree_node"* %7 to %"struct.std::_Rb_tree_node_base"* - %call5 = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE7_S_leftEPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %8) - br label %cond.end - -cond.false: ; preds = %while.body - %9 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x, align 8 - %10 = bitcast %"struct.std::_Rb_tree_node"* %9 to %"struct.std::_Rb_tree_node_base"* - %call6 = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_rightEPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %10) - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi %"struct.std::_Rb_tree_node"* [ %call5, %cond.true ], [ %call6, %cond.false ] - store %"struct.std::_Rb_tree_node"* %cond, %"struct.std::_Rb_tree_node"** %__x, align 8 - br label %while.cond - -while.end: ; preds = %while.cond - %11 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__y, align 8 - call void @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEC2EPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_iterator"* %__j, %"struct.std::_Rb_tree_node_base"* %11) - %12 = load i8, i8* %__comp, align 1 - %tobool7 = trunc i8 %12 to i1 - br i1 %tobool7, label %if.then, label %if.end13 - -if.then: ; preds = %while.end - %call8 = call %"struct.std::_Rb_tree_node_base"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE5beginEv(%"class.std::_Rb_tree"* %this1) - %coerce.dive = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %ref.tmp, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call8, %"struct.std::_Rb_tree_node_base"** %coerce.dive, align 8 - %call9 = call zeroext i1 @_ZNKSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEeqERKS6_(%"struct.std::_Rb_tree_iterator"* %__j, %"struct.std::_Rb_tree_iterator"* dereferenceable(8) %ref.tmp) - br i1 %call9, label %if.then10, label %if.else - -if.then10: ; preds = %if.then - %13 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x, align 8 - %14 = bitcast %"struct.std::_Rb_tree_node"* %13 to %"struct.std::_Rb_tree_node_base"* - store %"struct.std::_Rb_tree_node_base"* %14, %"struct.std::_Rb_tree_node_base"** %ref.tmp11, align 8 - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %ref.tmp11, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %__y) - br label %return - -if.else: ; preds = %if.then - %call12 = call dereferenceable(8) %"struct.std::_Rb_tree_iterator"* @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEmmEv(%"struct.std::_Rb_tree_iterator"* %__j) - br label %if.end - -if.end: ; preds = %if.else - br label %if.end13 - -if.end13: ; preds = %if.end, %while.end - %_M_impl14 = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %15 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl14 to %"struct.std::_Rb_tree_key_compare"* - %_M_key_compare15 = getelementptr inbounds %"struct.std::_Rb_tree_key_compare", %"struct.std::_Rb_tree_key_compare"* %15, i32 0, i32 0 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__j, i32 0, i32 0 - %16 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - %call16 = call dereferenceable(1) i8* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE6_S_keyEPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %16) - %17 = load i8*, i8** %__k.addr, align 8 - %call17 = call zeroext i1 @_ZNKSt4lessIhEclERKhS2_(%"struct.std::less"* %_M_key_compare15, i8* dereferenceable(1) %call16, i8* dereferenceable(1) %17) - br i1 %call17, label %if.then18, label %if.end20 - -if.then18: ; preds = %if.end13 - %18 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__x, align 8 - %19 = bitcast %"struct.std::_Rb_tree_node"* %18 to %"struct.std::_Rb_tree_node_base"* - store %"struct.std::_Rb_tree_node_base"* %19, %"struct.std::_Rb_tree_node_base"** %ref.tmp19, align 8 - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %ref.tmp19, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %__y) - br label %return - -if.end20: ; preds = %if.end13 - %_M_node21 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %__j, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* null, %"struct.std::_Rb_tree_node_base"** %ref.tmp22, align 8 - call void @_ZNSt4pairIPSt18_Rb_tree_node_baseS1_EC2ERKS1_S4_(%"struct.std::pair.11"* %retval, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %_M_node21, %"struct.std::_Rb_tree_node_base"** dereferenceable(8) %ref.tmp22) - br label %return - -return: ; preds = %if.end20, %if.then18, %if.then10 - %20 = bitcast %"struct.std::pair.11"* %retval to { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* - %21 = load { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }, { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }* %20, align 8 - ret { %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* } %21 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %"struct.std::_Rb_tree_node_base"** @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_leftmostEv(%"class.std::_Rb_tree"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"class.std::_Rb_tree", %"class.std::_Rb_tree"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Rb_tree_impl"* %_M_impl to i8* - %add.ptr = getelementptr inbounds i8, i8* %0, i64 8 - %1 = bitcast i8* %add.ptr to %"struct.std::_Rb_tree_header"* - %_M_header = getelementptr inbounds %"struct.std::_Rb_tree_header", %"struct.std::_Rb_tree_header"* %1, i32 0, i32 0 - %_M_left = getelementptr inbounds %"struct.std::_Rb_tree_node_base", %"struct.std::_Rb_tree_node_base"* %_M_header, i32 0, i32 2 - ret %"struct.std::_Rb_tree_node_base"** %_M_left -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %"struct.std::_Rb_tree_iterator"* @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEmmEv(%"struct.std::_Rb_tree_iterator"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_iterator"*, align 8 - store %"struct.std::_Rb_tree_iterator"* %this, %"struct.std::_Rb_tree_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_iterator"*, %"struct.std::_Rb_tree_iterator"** %this.addr, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %this1, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZSt18_Rb_tree_decrementPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %0) #10 - %_M_node2 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %this1, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call, %"struct.std::_Rb_tree_node_base"** %_M_node2, align 8 - ret %"struct.std::_Rb_tree_iterator"* %this1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %"struct.std::_Rb_tree_iterator"* @_ZNSt17_Rb_tree_iteratorISt4pairIKhSt6vectorIbSaIbEEEEppEv(%"struct.std::_Rb_tree_iterator"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree_iterator"*, align 8 - store %"struct.std::_Rb_tree_iterator"* %this, %"struct.std::_Rb_tree_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Rb_tree_iterator"*, %"struct.std::_Rb_tree_iterator"** %this.addr, align 8 - %_M_node = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %this1, i32 0, i32 0 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %_M_node, align 8 - %call = call %"struct.std::_Rb_tree_node_base"* @_ZSt18_Rb_tree_incrementPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %0) #10 - %_M_node2 = getelementptr inbounds %"struct.std::_Rb_tree_iterator", %"struct.std::_Rb_tree_iterator"* %this1, i32 0, i32 0 - store %"struct.std::_Rb_tree_node_base"* %call, %"struct.std::_Rb_tree_node_base"** %_M_node2, align 8 - ret %"struct.std::_Rb_tree_iterator"* %this1 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(48) %"struct.std::pair"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE8_S_valueEPKSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"* %__x) #0 comdat align 2 { -entry: - %__x.addr = alloca %"struct.std::_Rb_tree_node_base"*, align 8 - store %"struct.std::_Rb_tree_node_base"* %__x, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - %0 = load %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"** %__x.addr, align 8 - %1 = bitcast %"struct.std::_Rb_tree_node_base"* %0 to %"struct.std::_Rb_tree_node"* - %call = call %"struct.std::pair"* @_ZNKSt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEE9_M_valptrEv(%"struct.std::_Rb_tree_node"* %1) - ret %"struct.std::pair"* %call -} - -; Function Attrs: nounwind readonly -declare dso_local %"struct.std::_Rb_tree_node_base"* @_ZSt18_Rb_tree_decrementPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"*) #14 - -; Function Attrs: nounwind readonly -declare dso_local %"struct.std::_Rb_tree_node_base"* @_ZSt18_Rb_tree_incrementPSt18_Rb_tree_node_base(%"struct.std::_Rb_tree_node_base"*) #14 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node"* @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_Alloc_nodeclIS5_EEPSt13_Rb_tree_nodeIS5_ERKT_(%"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* %this, %"struct.std::pair"* dereferenceable(48) %__arg) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"*, align 8 - %__arg.addr = alloca %"struct.std::pair"*, align 8 - store %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* %this, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"** %this.addr, align 8 - store %"struct.std::pair"* %__arg, %"struct.std::pair"** %__arg.addr, align 8 - %this1 = load %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"*, %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"** %this.addr, align 8 - %_M_t = getelementptr inbounds %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node", %"struct.std::_Rb_tree > >, std::_Select1st > > >, std::less, std::allocator > > > >::_Alloc_node"* %this1, i32 0, i32 0 - %0 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %_M_t, align 8 - %1 = load %"struct.std::pair"*, %"struct.std::pair"** %__arg.addr, align 8 - %call = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE14_M_create_nodeERKS5_(%"class.std::_Rb_tree"* %0, %"struct.std::pair"* dereferenceable(48) %1) - ret %"struct.std::_Rb_tree_node"* %call -} - -; Function Attrs: nounwind -declare dso_local void @_ZSt29_Rb_tree_insert_and_rebalancebPSt18_Rb_tree_node_baseS0_RS_(i1 zeroext, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* dereferenceable(32)) #11 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE14_M_create_nodeERKS5_(%"class.std::_Rb_tree"* %this, %"struct.std::pair"* dereferenceable(48) %__x) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__x.addr = alloca %"struct.std::pair"*, align 8 - %__tmp = alloca %"struct.std::_Rb_tree_node"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store %"struct.std::pair"* %__x, %"struct.std::pair"** %__x.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %call = call %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_get_nodeEv(%"class.std::_Rb_tree"* %this1) - store %"struct.std::_Rb_tree_node"* %call, %"struct.std::_Rb_tree_node"** %__tmp, align 8 - %0 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__tmp, align 8 - %1 = load %"struct.std::pair"*, %"struct.std::pair"** %__x.addr, align 8 - call void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE17_M_construct_nodeEPSt13_Rb_tree_nodeIS5_ERKS5_(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node"* %0, %"struct.std::pair"* dereferenceable(48) %1) - %2 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__tmp, align 8 - ret %"struct.std::_Rb_tree_node"* %2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_get_nodeEv(%"class.std::_Rb_tree"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - %call = call dereferenceable(1) %"class.std::allocator.4"* @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE21_M_get_Node_allocatorEv(%"class.std::_Rb_tree"* %this1) - %call2 = call %"struct.std::_Rb_tree_node"* @_ZN9__gnu_cxx14__alloc_traitsISaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEE8allocateERS9_m(%"class.std::allocator.4"* dereferenceable(1) %call, i64 1) - ret %"struct.std::_Rb_tree_node"* %call2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE17_M_construct_nodeEPSt13_Rb_tree_nodeIS5_ERKS5_(%"class.std::_Rb_tree"* %this, %"struct.std::_Rb_tree_node"* %__node, %"struct.std::pair"* dereferenceable(48) %__x) #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"class.std::_Rb_tree"*, align 8 - %__node.addr = alloca %"struct.std::_Rb_tree_node"*, align 8 - %__x.addr = alloca %"struct.std::pair"*, align 8 - %ref.tmp = alloca %"class.std::allocator.7", align 1 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"class.std::_Rb_tree"* %this, %"class.std::_Rb_tree"** %this.addr, align 8 - store %"struct.std::_Rb_tree_node"* %__node, %"struct.std::_Rb_tree_node"** %__node.addr, align 8 - store %"struct.std::pair"* %__x, %"struct.std::pair"** %__x.addr, align 8 - %this1 = load %"class.std::_Rb_tree"*, %"class.std::_Rb_tree"** %this.addr, align 8 - invoke void @_ZNKSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE13get_allocatorEv(%"class.std::allocator.7"* sret %ref.tmp, %"class.std::_Rb_tree"* %this1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %0 = bitcast %"class.std::allocator.7"* %ref.tmp to %"class.__gnu_cxx::new_allocator.8"* - %1 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__node.addr, align 8 - %call = invoke %"struct.std::pair"* @_ZNSt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEE9_M_valptrEv(%"struct.std::_Rb_tree_node"* %1) - to label %invoke.cont3 unwind label %lpad2 - -invoke.cont3: ; preds = %invoke.cont - %2 = load %"struct.std::pair"*, %"struct.std::pair"** %__x.addr, align 8 - invoke void @_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEE9constructEPS6_RKS6_(%"class.__gnu_cxx::new_allocator.8"* %0, %"struct.std::pair"* %call, %"struct.std::pair"* dereferenceable(48) %2) - to label %invoke.cont4 unwind label %lpad2 - -invoke.cont4: ; preds = %invoke.cont3 - call void @_ZNSaISt4pairIKhSt6vectorIbSaIbEEEED2Ev(%"class.std::allocator.7"* %ref.tmp) #3 - br label %try.cont - -lpad: ; preds = %entry - %3 = landingpad { i8*, i32 } - catch i8* null - %4 = extractvalue { i8*, i32 } %3, 0 - store i8* %4, i8** %exn.slot, align 8 - %5 = extractvalue { i8*, i32 } %3, 1 - store i32 %5, i32* %ehselector.slot, align 4 - br label %catch - -lpad2: ; preds = %invoke.cont3, %invoke.cont - %6 = landingpad { i8*, i32 } - catch i8* null - %7 = extractvalue { i8*, i32 } %6, 0 - store i8* %7, i8** %exn.slot, align 8 - %8 = extractvalue { i8*, i32 } %6, 1 - store i32 %8, i32* %ehselector.slot, align 4 - call void @_ZNSaISt4pairIKhSt6vectorIbSaIbEEEED2Ev(%"class.std::allocator.7"* %ref.tmp) #3 - br label %catch - -catch: ; preds = %lpad2, %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %9 = call i8* @__cxa_begin_catch(i8* %exn) #3 - %10 = load %"struct.std::_Rb_tree_node"*, %"struct.std::_Rb_tree_node"** %__node.addr, align 8 - invoke void @_ZNSt8_Rb_treeIhSt4pairIKhSt6vectorIbSaIbEEESt10_Select1stIS5_ESt4lessIhESaIS5_EE11_M_put_nodeEPSt13_Rb_tree_nodeIS5_E(%"class.std::_Rb_tree"* %this1, %"struct.std::_Rb_tree_node"* %10) - to label %invoke.cont6 unwind label %lpad5 - -invoke.cont6: ; preds = %catch - invoke void @__cxa_rethrow() #19 - to label %unreachable unwind label %lpad5 - -lpad5: ; preds = %invoke.cont6, %catch - %11 = landingpad { i8*, i32 } - cleanup - %12 = extractvalue { i8*, i32 } %11, 0 - store i8* %12, i8** %exn.slot, align 8 - %13 = extractvalue { i8*, i32 } %11, 1 - store i32 %13, i32* %ehselector.slot, align 4 - invoke void @__cxa_end_catch() - to label %invoke.cont7 unwind label %terminate.lpad - -invoke.cont7: ; preds = %lpad5 - br label %eh.resume - -try.cont: ; preds = %invoke.cont4 - ret void - -eh.resume: ; preds = %invoke.cont7 - %exn8 = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn8, 0 - %lpad.val9 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val9 - -terminate.lpad: ; preds = %lpad5 - %14 = landingpad { i8*, i32 } - catch i8* null - %15 = extractvalue { i8*, i32 } %14, 0 - call void @__clang_call_terminate(i8* %15) #16 - unreachable - -unreachable: ; preds = %invoke.cont6 - unreachable -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node"* @_ZN9__gnu_cxx14__alloc_traitsISaISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEEE8allocateERS9_m(%"class.std::allocator.4"* dereferenceable(1) %__a, i64 %__n) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.4"*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::allocator.4"* %__a, %"class.std::allocator.4"** %__a.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load %"class.std::allocator.4"*, %"class.std::allocator.4"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator.4"* %0 to %"class.__gnu_cxx::new_allocator.5"* - %2 = load i64, i64* %__n.addr, align 8 - %call = call %"struct.std::_Rb_tree_node"* @_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEE8allocateEmPKv(%"class.__gnu_cxx::new_allocator.5"* %1, i64 %2, i8* null) - ret %"struct.std::_Rb_tree_node"* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %"struct.std::_Rb_tree_node"* @_ZN9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEE8allocateEmPKv(%"class.__gnu_cxx::new_allocator.5"* %this, i64 %__n, i8* %0) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.5"*, align 8 - %__n.addr = alloca i64, align 8 - %.addr = alloca i8*, align 8 - store %"class.__gnu_cxx::new_allocator.5"* %this, %"class.__gnu_cxx::new_allocator.5"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - store i8* %0, i8** %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.5"*, %"class.__gnu_cxx::new_allocator.5"** %this.addr, align 8 - %1 = load i64, i64* %__n.addr, align 8 - %call = call i64 @_ZNK9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEE8max_sizeEv(%"class.__gnu_cxx::new_allocator.5"* %this1) #3 - %cmp = icmp ugt i64 %1, %call - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - call void @_ZSt17__throw_bad_allocv() #19 - unreachable - -if.end: ; preds = %entry - %2 = load i64, i64* %__n.addr, align 8 - %mul = mul i64 %2, 80 - %call2 = call i8* @_Znwm(i64 %mul) - %3 = bitcast i8* %call2 to %"struct.std::_Rb_tree_node"* - ret %"struct.std::_Rb_tree_node"* %3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNK9__gnu_cxx13new_allocatorISt13_Rb_tree_nodeISt4pairIKhSt6vectorIbSaIbEEEEE8max_sizeEv(%"class.__gnu_cxx::new_allocator.5"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.5"*, align 8 - store %"class.__gnu_cxx::new_allocator.5"* %this, %"class.__gnu_cxx::new_allocator.5"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.5"*, %"class.__gnu_cxx::new_allocator.5"** %this.addr, align 8 - ret i64 230584300921369395 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorISt4pairIKhSt6vectorIbSaIbEEEE9constructEPS6_RKS6_(%"class.__gnu_cxx::new_allocator.8"* %this, %"struct.std::pair"* %__p, %"struct.std::pair"* dereferenceable(48) %__val) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.8"*, align 8 - %__p.addr = alloca %"struct.std::pair"*, align 8 - %__val.addr = alloca %"struct.std::pair"*, align 8 - store %"class.__gnu_cxx::new_allocator.8"* %this, %"class.__gnu_cxx::new_allocator.8"** %this.addr, align 8 - store %"struct.std::pair"* %__p, %"struct.std::pair"** %__p.addr, align 8 - store %"struct.std::pair"* %__val, %"struct.std::pair"** %__val.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.8"*, %"class.__gnu_cxx::new_allocator.8"** %this.addr, align 8 - %0 = load %"struct.std::pair"*, %"struct.std::pair"** %__p.addr, align 8 - %1 = bitcast %"struct.std::pair"* %0 to i8* - %2 = bitcast i8* %1 to %"struct.std::pair"* - %3 = load %"struct.std::pair"*, %"struct.std::pair"** %__val.addr, align 8 - call void @_ZNSt4pairIKhSt6vectorIbSaIbEEEC2ERKS4_(%"struct.std::pair"* %2, %"struct.std::pair"* dereferenceable(48) %3) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt4pairIKhSt6vectorIbSaIbEEEC2ERKS4_(%"struct.std::pair"* %this, %"struct.std::pair"* dereferenceable(48) %0) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::pair"*, align 8 - %.addr = alloca %"struct.std::pair"*, align 8 - store %"struct.std::pair"* %this, %"struct.std::pair"** %this.addr, align 8 - store %"struct.std::pair"* %0, %"struct.std::pair"** %.addr, align 8 - %this1 = load %"struct.std::pair"*, %"struct.std::pair"** %this.addr, align 8 - %1 = bitcast %"struct.std::pair"* %this1 to %"class.std::__pair_base"* - %2 = load %"struct.std::pair"*, %"struct.std::pair"** %.addr, align 8 - %3 = bitcast %"struct.std::pair"* %2 to %"class.std::__pair_base"* - %first = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %this1, i32 0, i32 0 - %4 = load %"struct.std::pair"*, %"struct.std::pair"** %.addr, align 8 - %first2 = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %4, i32 0, i32 0 - %5 = load i8, i8* %first2, align 8 - store i8 %5, i8* %first, align 8 - %second = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %this1, i32 0, i32 1 - %6 = load %"struct.std::pair"*, %"struct.std::pair"** %.addr, align 8 - %second3 = getelementptr inbounds %"struct.std::pair", %"struct.std::pair"* %6, i32 0, i32 1 - call void @_ZNSt6vectorIbSaIbEEC2ERKS1_(%"class.std::vector.0"* %second, %"class.std::vector.0"* dereferenceable(40) %second3) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorIbSaIbEE4sizeEv(%"class.std::vector.0"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %ref.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - %ref.tmp2 = alloca %"struct.std::_Bit_const_iterator", align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %call = call { i64*, i32 } @_ZNKSt6vectorIbSaIbEE3endEv(%"class.std::vector.0"* %this1) - %0 = bitcast %"struct.std::_Bit_const_iterator"* %ref.tmp to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - %2 = extractvalue { i64*, i32 } %call, 0 - store i64* %2, i64** %1, align 8 - %3 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - %4 = extractvalue { i64*, i32 } %call, 1 - store i32 %4, i32* %3, align 8 - %5 = bitcast %"struct.std::_Bit_const_iterator"* %ref.tmp to %"struct.std::_Bit_iterator_base"* - %call3 = call { i64*, i32 } @_ZNKSt6vectorIbSaIbEE5beginEv(%"class.std::vector.0"* %this1) - %6 = bitcast %"struct.std::_Bit_const_iterator"* %ref.tmp2 to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - %8 = extractvalue { i64*, i32 } %call3, 0 - store i64* %8, i64** %7, align 8 - %9 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - %10 = extractvalue { i64*, i32 } %call3, 1 - store i32 %10, i32* %9, align 8 - %11 = bitcast %"struct.std::_Bit_const_iterator"* %ref.tmp2 to %"struct.std::_Bit_iterator_base"* - %call4 = call i64 @_ZStmiRKSt18_Bit_iterator_baseS1_(%"struct.std::_Bit_iterator_base"* dereferenceable(16) %5, %"struct.std::_Bit_iterator_base"* dereferenceable(16) %11) - ret i64 %call4 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorIbSaIbEE8capacityEv(%"class.std::vector.0"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %ref.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - %ref.tmp2 = alloca %"struct.std::_Bit_const_iterator", align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %0, i32 0, i32 0 - %call = call i64* @_ZNKSt13_Bvector_baseISaIbEE13_Bvector_impl11_M_end_addrEv(%"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl) - call void @_ZNSt19_Bit_const_iteratorC2EPmj(%"struct.std::_Bit_const_iterator"* %ref.tmp, i64* %call, i32 0) - %1 = bitcast %"struct.std::_Bit_const_iterator"* %ref.tmp to %"struct.std::_Bit_iterator_base"* - %call3 = call { i64*, i32 } @_ZNKSt6vectorIbSaIbEE5beginEv(%"class.std::vector.0"* %this1) - %2 = bitcast %"struct.std::_Bit_const_iterator"* %ref.tmp2 to { i64*, i32 }* - %3 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %2, i32 0, i32 0 - %4 = extractvalue { i64*, i32 } %call3, 0 - store i64* %4, i64** %3, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %2, i32 0, i32 1 - %6 = extractvalue { i64*, i32 } %call3, 1 - store i32 %6, i32* %5, align 8 - %7 = bitcast %"struct.std::_Bit_const_iterator"* %ref.tmp2 to %"struct.std::_Bit_iterator_base"* - %call4 = call i64 @_ZStmiRKSt18_Bit_iterator_baseS1_(%"struct.std::_Bit_iterator_base"* dereferenceable(16) %1, %"struct.std::_Bit_iterator_base"* dereferenceable(16) %7) - ret i64 %call4 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt13_Bvector_baseISaIbEE13_M_deallocateEv(%"struct.std::_Bvector_base"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bvector_base"*, align 8 - %__n = alloca i64, align 8 - %ref.tmp = alloca %"struct.std::_Bit_iterator", align 8 - store %"struct.std::_Bvector_base"* %this, %"struct.std::_Bvector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Bvector_base"*, %"struct.std::_Bvector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 0 - %0 = bitcast %"struct.std::_Bit_iterator"* %_M_start to %"struct.std::_Bit_iterator_base"* - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %0, i32 0, i32 0 - %1 = load i64*, i64** %_M_p, align 8 - %tobool = icmp ne i64* %1, null - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - %_M_impl2 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %call = call i64* @_ZNKSt13_Bvector_baseISaIbEE13_Bvector_impl11_M_end_addrEv(%"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl2) - %_M_impl3 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %_M_start4 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl3, i32 0, i32 0 - %2 = bitcast %"struct.std::_Bit_iterator"* %_M_start4 to %"struct.std::_Bit_iterator_base"* - %_M_p5 = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %2, i32 0, i32 0 - %3 = load i64*, i64** %_M_p5, align 8 - %sub.ptr.lhs.cast = ptrtoint i64* %call to i64 - %sub.ptr.rhs.cast = ptrtoint i64* %3 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - store i64 %sub.ptr.div, i64* %__n, align 8 - %_M_impl6 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %4 = bitcast %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl6 to %"class.std::allocator.1"* - %_M_impl7 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl7, i32 0, i32 2 - %5 = load i64*, i64** %_M_end_of_storage, align 8 - %6 = load i64, i64* %__n, align 8 - %idx.neg = sub i64 0, %6 - %add.ptr = getelementptr inbounds i64, i64* %5, i64 %idx.neg - %7 = load i64, i64* %__n, align 8 - call void @_ZN9__gnu_cxx14__alloc_traitsISaImEE10deallocateERS1_Pmm(%"class.std::allocator.1"* dereferenceable(1) %4, i64* %add.ptr, i64 %7) - call void @_ZNSt13_Bit_iteratorC2Ev(%"struct.std::_Bit_iterator"* %ref.tmp) - %_M_impl8 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl8, i32 0, i32 1 - %8 = bitcast %"struct.std::_Bit_iterator"* %_M_finish to i8* - %9 = bitcast %"struct.std::_Bit_iterator"* %ref.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %8, i8* align 8 %9, i64 12, i1 false) - %_M_impl9 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %_M_start10 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl9, i32 0, i32 0 - %10 = bitcast %"struct.std::_Bit_iterator"* %_M_start10 to i8* - %11 = bitcast %"struct.std::_Bit_iterator"* %_M_finish to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %10, i8* align 8 %11, i64 12, i1 false) - %_M_impl11 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %_M_end_of_storage12 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl11, i32 0, i32 2 - store i64* null, i64** %_M_end_of_storage12, align 8 - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIbSaIbEE13_M_initializeEm(%"class.std::vector.0"* %this, i64 %__n) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__n.addr = alloca i64, align 8 - %__q = alloca i64*, align 8 - %ref.tmp = alloca %"struct.std::_Bit_iterator", align 8 - %ref.tmp7 = alloca %"struct.std::_Bit_iterator", align 8 - %ref.tmp10 = alloca %"struct.std::_Bit_iterator", align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = load i64, i64* %__n.addr, align 8 - %tobool = icmp ne i64 %0, 0 - br i1 %tobool, label %if.then, label %if.else - -if.then: ; preds = %entry - %1 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %2 = load i64, i64* %__n.addr, align 8 - %call = call i64* @_ZNSt13_Bvector_baseISaIbEE11_M_allocateEm(%"struct.std::_Bvector_base"* %1, i64 %2) - store i64* %call, i64** %__q, align 8 - %3 = load i64*, i64** %__q, align 8 - %4 = load i64, i64* %__n.addr, align 8 - %call2 = call i64 @_ZNSt13_Bvector_baseISaIbEE8_S_nwordEm(i64 %4) - %add.ptr = getelementptr inbounds i64, i64* %3, i64 %call2 - %5 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %5, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 2 - store i64* %add.ptr, i64** %_M_end_of_storage, align 8 - %6 = load i64*, i64** %__q, align 8 - %call3 = call i64* @_ZSt11__addressofImEPT_RS0_(i64* dereferenceable(8) %6) - call void @_ZNSt13_Bit_iteratorC2EPmj(%"struct.std::_Bit_iterator"* %ref.tmp, i64* %call3, i32 0) - %7 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl4 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %7, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl4, i32 0, i32 0 - %8 = bitcast %"struct.std::_Bit_iterator"* %_M_start to i8* - %9 = bitcast %"struct.std::_Bit_iterator"* %ref.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %8, i8* align 8 %9, i64 12, i1 false) - br label %if.end - -if.else: ; preds = %entry - %10 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl5 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %10, i32 0, i32 0 - %_M_end_of_storage6 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl5, i32 0, i32 2 - store i64* null, i64** %_M_end_of_storage6, align 8 - call void @_ZNSt13_Bit_iteratorC2EPmj(%"struct.std::_Bit_iterator"* %ref.tmp7, i64* null, i32 0) - %11 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl8 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %11, i32 0, i32 0 - %_M_start9 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl8, i32 0, i32 0 - %12 = bitcast %"struct.std::_Bit_iterator"* %_M_start9 to i8* - %13 = bitcast %"struct.std::_Bit_iterator"* %ref.tmp7 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %12, i8* align 8 %13, i64 12, i1 false) - br label %if.end - -if.end: ; preds = %if.else, %if.then - %14 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl11 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %14, i32 0, i32 0 - %_M_start12 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl11, i32 0, i32 0 - %15 = load i64, i64* %__n.addr, align 8 - %call13 = call { i64*, i32 } @_ZNKSt13_Bit_iteratorplEl(%"struct.std::_Bit_iterator"* %_M_start12, i64 %15) - %16 = bitcast %"struct.std::_Bit_iterator"* %ref.tmp10 to { i64*, i32 }* - %17 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 0 - %18 = extractvalue { i64*, i32 } %call13, 0 - store i64* %18, i64** %17, align 8 - %19 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 1 - %20 = extractvalue { i64*, i32 } %call13, 1 - store i32 %20, i32* %19, align 8 - %21 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl14 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %21, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl14, i32 0, i32 1 - %22 = bitcast %"struct.std::_Bit_iterator"* %_M_finish to i8* - %23 = bitcast %"struct.std::_Bit_iterator"* %ref.tmp10 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %22, i8* align 8 %23, i64 12, i1 false) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZNSt6vectorIbSaIbEE15_M_copy_alignedESt19_Bit_const_iteratorS2_St13_Bit_iterator(%"class.std::vector.0"* %this, i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, %"struct.std::_Bit_iterator"* byval(%"struct.std::_Bit_iterator") align 8 %__result) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_const_iterator", align 8 - %__last = alloca %"struct.std::_Bit_const_iterator", align 8 - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__q = alloca i64*, align 8 - %agg.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp5 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp6 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_const_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %6 = bitcast %"struct.std::_Bit_const_iterator"* %__first to %"struct.std::_Bit_iterator_base"* - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %6, i32 0, i32 0 - %7 = load i64*, i64** %_M_p, align 8 - %8 = bitcast %"struct.std::_Bit_const_iterator"* %__last to %"struct.std::_Bit_iterator_base"* - %_M_p2 = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %8, i32 0, i32 0 - %9 = load i64*, i64** %_M_p2, align 8 - %10 = bitcast %"struct.std::_Bit_iterator"* %__result to %"struct.std::_Bit_iterator_base"* - %_M_p3 = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %10, i32 0, i32 0 - %11 = load i64*, i64** %_M_p3, align 8 - %call = call i64* @_ZSt4copyIPmS0_ET0_T_S2_S1_(i64* %7, i64* %9, i64* %11) - store i64* %call, i64** %__q, align 8 - %12 = bitcast %"struct.std::_Bit_const_iterator"* %__last to %"struct.std::_Bit_iterator_base"* - %_M_p4 = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %12, i32 0, i32 0 - %13 = load i64*, i64** %_M_p4, align 8 - call void @_ZNSt19_Bit_const_iteratorC2EPmj(%"struct.std::_Bit_const_iterator"* %agg.tmp, i64* %13, i32 0) - %14 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp5 to i8* - %15 = bitcast %"struct.std::_Bit_const_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %14, i8* align 8 %15, i64 16, i1 false) - %16 = load i64*, i64** %__q, align 8 - call void @_ZNSt13_Bit_iteratorC2EPmj(%"struct.std::_Bit_iterator"* %agg.tmp6, i64* %16, i32 0) - %17 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %18 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %17, i32 0, i32 0 - %19 = load i64*, i64** %18, align 8 - %20 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %17, i32 0, i32 1 - %21 = load i32, i32* %20, align 8 - %22 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp5 to { i64*, i32 }* - %23 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %22, i32 0, i32 0 - %24 = load i64*, i64** %23, align 8 - %25 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %22, i32 0, i32 1 - %26 = load i32, i32* %25, align 8 - %27 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp6 to { i64*, i32 }* - %28 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %27, i32 0, i32 0 - %29 = load i64*, i64** %28, align 8 - %30 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %27, i32 0, i32 1 - %31 = load i32, i32* %30, align 8 - %call7 = call { i64*, i32 } @_ZSt4copyISt19_Bit_const_iteratorSt13_Bit_iteratorET0_T_S3_S2_(i64* %19, i32 %21, i64* %24, i32 %26, i64* %29, i32 %31) - %32 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %33 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %32, i32 0, i32 0 - %34 = extractvalue { i64*, i32 } %call7, 0 - store i64* %34, i64** %33, align 8 - %35 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %32, i32 0, i32 1 - %36 = extractvalue { i64*, i32 } %call7, 1 - store i32 %36, i32* %35, align 8 - %37 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %38 = load { i64*, i32 }, { i64*, i32 }* %37, align 8 - ret { i64*, i32 } %38 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZNSt6vectorIbSaIbEE5beginEv(%"class.std::vector.0"* %this) #6 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 0 - %1 = bitcast %"struct.std::_Bit_iterator"* %retval to i8* - %2 = bitcast %"struct.std::_Bit_iterator"* %_M_start to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %1, i8* align 8 %2, i64 16, i1 false) - %3 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %4 = load { i64*, i32 }, { i64*, i32 }* %3, align 8 - ret { i64*, i32 } %4 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64* @_ZNKSt13_Bvector_baseISaIbEE13_Bvector_impl11_M_end_addrEv(%"struct.std::_Bvector_base >::_Bvector_impl"* %this) #0 comdat align 2 { -entry: - %retval = alloca i64*, align 8 - %this.addr = alloca %"struct.std::_Bvector_base >::_Bvector_impl"*, align 8 - store %"struct.std::_Bvector_base >::_Bvector_impl"* %this, %"struct.std::_Bvector_base >::_Bvector_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Bvector_base >::_Bvector_impl"*, %"struct.std::_Bvector_base >::_Bvector_impl"** %this.addr, align 8 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %this1, i32 0, i32 2 - %0 = load i64*, i64** %_M_end_of_storage, align 8 - %tobool = icmp ne i64* %0, null - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - %_M_end_of_storage2 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %this1, i32 0, i32 2 - %1 = load i64*, i64** %_M_end_of_storage2, align 8 - %arrayidx = getelementptr inbounds i64, i64* %1, i64 -1 - %call = call i64* @_ZSt11__addressofImEPT_RS0_(i64* dereferenceable(8) %arrayidx) - %add.ptr = getelementptr inbounds i64, i64* %call, i64 1 - store i64* %add.ptr, i64** %retval, align 8 - br label %return - -if.end: ; preds = %entry - store i64* null, i64** %retval, align 8 - br label %return - -return: ; preds = %if.end, %if.then - %2 = load i64*, i64** %retval, align 8 - ret i64* %2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64* @_ZSt11__addressofImEPT_RS0_(i64* dereferenceable(8) %__r) #6 comdat { -entry: - %__r.addr = alloca i64*, align 8 - store i64* %__r, i64** %__r.addr, align 8 - %0 = load i64*, i64** %__r.addr, align 8 - ret i64* %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaImEE10deallocateERS1_Pmm(%"class.std::allocator.1"* dereferenceable(1) %__a, i64* %__p, i64 %__n) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.1"*, align 8 - %__p.addr = alloca i64*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::allocator.1"* %__a, %"class.std::allocator.1"** %__a.addr, align 8 - store i64* %__p, i64** %__p.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load %"class.std::allocator.1"*, %"class.std::allocator.1"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator.1"* %0 to %"class.__gnu_cxx::new_allocator.2"* - %2 = load i64*, i64** %__p.addr, align 8 - %3 = load i64, i64* %__n.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorImE10deallocateEPmm(%"class.__gnu_cxx::new_allocator.2"* %1, i64* %2, i64 %3) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorImE10deallocateEPmm(%"class.__gnu_cxx::new_allocator.2"* %this, i64* %__p, i64 %0) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.2"*, align 8 - %__p.addr = alloca i64*, align 8 - %.addr = alloca i64, align 8 - store %"class.__gnu_cxx::new_allocator.2"* %this, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - store i64* %__p, i64** %__p.addr, align 8 - store i64 %0, i64* %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.2"*, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - %1 = load i64*, i64** %__p.addr, align 8 - %2 = bitcast i64* %1 to i8* - call void @_ZdlPv(i8* %2) #3 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64* @_ZNSt13_Bvector_baseISaIbEE11_M_allocateEm(%"struct.std::_Bvector_base"* %this, i64 %__n) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bvector_base"*, align 8 - %__n.addr = alloca i64, align 8 - store %"struct.std::_Bvector_base"* %this, %"struct.std::_Bvector_base"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"struct.std::_Bvector_base"*, %"struct.std::_Bvector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl to %"class.std::allocator.1"* - %1 = load i64, i64* %__n.addr, align 8 - %call = call i64 @_ZNSt13_Bvector_baseISaIbEE8_S_nwordEm(i64 %1) - %call2 = call i64* @_ZN9__gnu_cxx14__alloc_traitsISaImEE8allocateERS1_m(%"class.std::allocator.1"* dereferenceable(1) %0, i64 %call) - ret i64* %call2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNSt13_Bvector_baseISaIbEE8_S_nwordEm(i64 %__n) #6 comdat align 2 { -entry: - %__n.addr = alloca i64, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load i64, i64* %__n.addr, align 8 - %add = add i64 %0, 64 - %sub = sub i64 %add, 1 - %div = udiv i64 %sub, 64 - ret i64 %div -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt13_Bit_iteratorC2EPmj(%"struct.std::_Bit_iterator"* %this, i64* %__x, i32 %__y) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_iterator"*, align 8 - %__x.addr = alloca i64*, align 8 - %__y.addr = alloca i32, align 4 - store %"struct.std::_Bit_iterator"* %this, %"struct.std::_Bit_iterator"** %this.addr, align 8 - store i64* %__x, i64** %__x.addr, align 8 - store i32 %__y, i32* %__y.addr, align 4 - %this1 = load %"struct.std::_Bit_iterator"*, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - %1 = load i64*, i64** %__x.addr, align 8 - %2 = load i32, i32* %__y.addr, align 4 - call void @_ZNSt18_Bit_iterator_baseC2EPmj(%"struct.std::_Bit_iterator_base"* %0, i64* %1, i32 %2) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZNKSt13_Bit_iteratorplEl(%"struct.std::_Bit_iterator"* %this, i64 %__i) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %this.addr = alloca %"struct.std::_Bit_iterator"*, align 8 - %__i.addr = alloca i64, align 8 - %__tmp = alloca %"struct.std::_Bit_iterator", align 8 - store %"struct.std::_Bit_iterator"* %this, %"struct.std::_Bit_iterator"** %this.addr, align 8 - store i64 %__i, i64* %__i.addr, align 8 - %this1 = load %"struct.std::_Bit_iterator"*, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__tmp to i8* - %1 = bitcast %"struct.std::_Bit_iterator"* %this1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %0, i8* align 8 %1, i64 16, i1 false) - %2 = load i64, i64* %__i.addr, align 8 - %call = call dereferenceable(16) %"struct.std::_Bit_iterator"* @_ZNSt13_Bit_iteratorpLEl(%"struct.std::_Bit_iterator"* %__tmp, i64 %2) - %3 = bitcast %"struct.std::_Bit_iterator"* %retval to i8* - %4 = bitcast %"struct.std::_Bit_iterator"* %call to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 16, i1 false) - %5 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %6 = load { i64*, i32 }, { i64*, i32 }* %5, align 8 - ret { i64*, i32 } %6 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64* @_ZN9__gnu_cxx14__alloc_traitsISaImEE8allocateERS1_m(%"class.std::allocator.1"* dereferenceable(1) %__a, i64 %__n) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.1"*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::allocator.1"* %__a, %"class.std::allocator.1"** %__a.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load %"class.std::allocator.1"*, %"class.std::allocator.1"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator.1"* %0 to %"class.__gnu_cxx::new_allocator.2"* - %2 = load i64, i64* %__n.addr, align 8 - %call = call i64* @_ZN9__gnu_cxx13new_allocatorImE8allocateEmPKv(%"class.__gnu_cxx::new_allocator.2"* %1, i64 %2, i8* null) - ret i64* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64* @_ZN9__gnu_cxx13new_allocatorImE8allocateEmPKv(%"class.__gnu_cxx::new_allocator.2"* %this, i64 %__n, i8* %0) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.2"*, align 8 - %__n.addr = alloca i64, align 8 - %.addr = alloca i8*, align 8 - store %"class.__gnu_cxx::new_allocator.2"* %this, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - store i8* %0, i8** %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.2"*, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - %1 = load i64, i64* %__n.addr, align 8 - %call = call i64 @_ZNK9__gnu_cxx13new_allocatorImE8max_sizeEv(%"class.__gnu_cxx::new_allocator.2"* %this1) #3 - %cmp = icmp ugt i64 %1, %call - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - call void @_ZSt17__throw_bad_allocv() #19 - unreachable - -if.end: ; preds = %entry - %2 = load i64, i64* %__n.addr, align 8 - %mul = mul i64 %2, 8 - %call2 = call i8* @_Znwm(i64 %mul) - %3 = bitcast i8* %call2 to i64* - ret i64* %3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNK9__gnu_cxx13new_allocatorImE8max_sizeEv(%"class.__gnu_cxx::new_allocator.2"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.2"*, align 8 - store %"class.__gnu_cxx::new_allocator.2"* %this, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.2"*, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - ret i64 2305843009213693951 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(16) %"struct.std::_Bit_iterator"* @_ZNSt13_Bit_iteratorpLEl(%"struct.std::_Bit_iterator"* %this, i64 %__i) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_iterator"*, align 8 - %__i.addr = alloca i64, align 8 - store %"struct.std::_Bit_iterator"* %this, %"struct.std::_Bit_iterator"** %this.addr, align 8 - store i64 %__i, i64* %__i.addr, align 8 - %this1 = load %"struct.std::_Bit_iterator"*, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - %1 = load i64, i64* %__i.addr, align 8 - call void @_ZNSt18_Bit_iterator_base7_M_incrEl(%"struct.std::_Bit_iterator_base"* %0, i64 %1) - ret %"struct.std::_Bit_iterator"* %this1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt18_Bit_iterator_base7_M_incrEl(%"struct.std::_Bit_iterator_base"* %this, i64 %__i) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_iterator_base"*, align 8 - %__i.addr = alloca i64, align 8 - %__n = alloca i64, align 8 - store %"struct.std::_Bit_iterator_base"* %this, %"struct.std::_Bit_iterator_base"** %this.addr, align 8 - store i64 %__i, i64* %__i.addr, align 8 - %this1 = load %"struct.std::_Bit_iterator_base"*, %"struct.std::_Bit_iterator_base"** %this.addr, align 8 - %0 = load i64, i64* %__i.addr, align 8 - %_M_offset = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 1 - %1 = load i32, i32* %_M_offset, align 8 - %conv = zext i32 %1 to i64 - %add = add nsw i64 %0, %conv - store i64 %add, i64* %__n, align 8 - %2 = load i64, i64* %__n, align 8 - %div = sdiv i64 %2, 64 - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 0 - %3 = load i64*, i64** %_M_p, align 8 - %add.ptr = getelementptr inbounds i64, i64* %3, i64 %div - store i64* %add.ptr, i64** %_M_p, align 8 - %4 = load i64, i64* %__n, align 8 - %rem = srem i64 %4, 64 - store i64 %rem, i64* %__n, align 8 - %5 = load i64, i64* %__n, align 8 - %cmp = icmp slt i64 %5, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %6 = load i64, i64* %__n, align 8 - %add2 = add nsw i64 %6, 64 - store i64 %add2, i64* %__n, align 8 - %_M_p3 = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 0 - %7 = load i64*, i64** %_M_p3, align 8 - %incdec.ptr = getelementptr inbounds i64, i64* %7, i32 -1 - store i64* %incdec.ptr, i64** %_M_p3, align 8 - br label %if.end - -if.end: ; preds = %if.then, %entry - %8 = load i64, i64* %__n, align 8 - %conv4 = trunc i64 %8 to i32 - %_M_offset5 = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 1 - store i32 %conv4, i32* %_M_offset5, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64* @_ZSt4copyIPmS0_ET0_T_S2_S1_(i64* %__first, i64* %__last, i64* %__result) #0 comdat { -entry: - %__first.addr = alloca i64*, align 8 - %__last.addr = alloca i64*, align 8 - %__result.addr = alloca i64*, align 8 - store i64* %__first, i64** %__first.addr, align 8 - store i64* %__last, i64** %__last.addr, align 8 - store i64* %__result, i64** %__result.addr, align 8 - %0 = load i64*, i64** %__first.addr, align 8 - %call = call i64* @_ZSt12__miter_baseIPmET_S1_(i64* %0) - %1 = load i64*, i64** %__last.addr, align 8 - %call1 = call i64* @_ZSt12__miter_baseIPmET_S1_(i64* %1) - %2 = load i64*, i64** %__result.addr, align 8 - %call2 = call i64* @_ZSt14__copy_move_a2ILb0EPmS0_ET1_T0_S2_S1_(i64* %call, i64* %call1, i64* %2) - ret i64* %call2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt4copyISt19_Bit_const_iteratorSt13_Bit_iteratorET0_T_S3_S2_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_const_iterator", align 8 - %__last = alloca %"struct.std::_Bit_const_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp1 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp2 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp3 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp5 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_const_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - %9 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp1 to i8* - %10 = bitcast %"struct.std::_Bit_const_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 16, i1 false) - %11 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp1 to { i64*, i32 }* - %12 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 0 - %13 = load i64*, i64** %12, align 8 - %14 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 1 - %15 = load i32, i32* %14, align 8 - %call = call { i64*, i32 } @_ZSt12__miter_baseISt19_Bit_const_iteratorET_S1_(i64* %13, i32 %15) - %16 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %17 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 0 - %18 = extractvalue { i64*, i32 } %call, 0 - store i64* %18, i64** %17, align 8 - %19 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 1 - %20 = extractvalue { i64*, i32 } %call, 1 - store i32 %20, i32* %19, align 8 - %21 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp3 to i8* - %22 = bitcast %"struct.std::_Bit_const_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 16, i1 false) - %23 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp3 to { i64*, i32 }* - %24 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 0 - %25 = load i64*, i64** %24, align 8 - %26 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 1 - %27 = load i32, i32* %26, align 8 - %call4 = call { i64*, i32 } @_ZSt12__miter_baseISt19_Bit_const_iteratorET_S1_(i64* %25, i32 %27) - %28 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp2 to { i64*, i32 }* - %29 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 0 - %30 = extractvalue { i64*, i32 } %call4, 0 - store i64* %30, i64** %29, align 8 - %31 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 1 - %32 = extractvalue { i64*, i32 } %call4, 1 - store i32 %32, i32* %31, align 8 - %33 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to i8* - %34 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %33, i8* align 8 %34, i64 16, i1 false) - %35 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %36 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 0 - %37 = load i64*, i64** %36, align 8 - %38 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 1 - %39 = load i32, i32* %38, align 8 - %40 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp2 to { i64*, i32 }* - %41 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 0 - %42 = load i64*, i64** %41, align 8 - %43 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 1 - %44 = load i32, i32* %43, align 8 - %45 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to { i64*, i32 }* - %46 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 0 - %47 = load i64*, i64** %46, align 8 - %48 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 1 - %49 = load i32, i32* %48, align 8 - %call6 = call { i64*, i32 } @_ZSt14__copy_move_a2ILb0ESt19_Bit_const_iteratorSt13_Bit_iteratorET1_T0_S3_S2_(i64* %37, i32 %39, i64* %42, i32 %44, i64* %47, i32 %49) - %50 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %51 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 0 - %52 = extractvalue { i64*, i32 } %call6, 0 - store i64* %52, i64** %51, align 8 - %53 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 1 - %54 = extractvalue { i64*, i32 } %call6, 1 - store i32 %54, i32* %53, align 8 - %55 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %56 = load { i64*, i32 }, { i64*, i32 }* %55, align 8 - ret { i64*, i32 } %56 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64* @_ZSt14__copy_move_a2ILb0EPmS0_ET1_T0_S2_S1_(i64* %__first, i64* %__last, i64* %__result) #0 comdat { -entry: - %__first.addr = alloca i64*, align 8 - %__last.addr = alloca i64*, align 8 - %__result.addr = alloca i64*, align 8 - store i64* %__first, i64** %__first.addr, align 8 - store i64* %__last, i64** %__last.addr, align 8 - store i64* %__result, i64** %__result.addr, align 8 - %0 = load i64*, i64** %__first.addr, align 8 - %call = call i64* @_ZSt12__niter_baseIPmET_S1_(i64* %0) - %1 = load i64*, i64** %__last.addr, align 8 - %call1 = call i64* @_ZSt12__niter_baseIPmET_S1_(i64* %1) - %2 = load i64*, i64** %__result.addr, align 8 - %call2 = call i64* @_ZSt12__niter_baseIPmET_S1_(i64* %2) - %call3 = call i64* @_ZSt13__copy_move_aILb0EPmS0_ET1_T0_S2_S1_(i64* %call, i64* %call1, i64* %call2) - ret i64* %call3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64* @_ZSt12__miter_baseIPmET_S1_(i64* %__it) #6 comdat { -entry: - %__it.addr = alloca i64*, align 8 - store i64* %__it, i64** %__it.addr, align 8 - %0 = load i64*, i64** %__it.addr, align 8 - ret i64* %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64* @_ZSt13__copy_move_aILb0EPmS0_ET1_T0_S2_S1_(i64* %__first, i64* %__last, i64* %__result) #0 comdat { -entry: - %__first.addr = alloca i64*, align 8 - %__last.addr = alloca i64*, align 8 - %__result.addr = alloca i64*, align 8 - %__simple = alloca i8, align 1 - store i64* %__first, i64** %__first.addr, align 8 - store i64* %__last, i64** %__last.addr, align 8 - store i64* %__result, i64** %__result.addr, align 8 - store i8 1, i8* %__simple, align 1 - %0 = load i64*, i64** %__first.addr, align 8 - %1 = load i64*, i64** %__last.addr, align 8 - %2 = load i64*, i64** %__result.addr, align 8 - %call = call i64* @_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mImEEPT_PKS3_S6_S4_(i64* %0, i64* %1, i64* %2) - ret i64* %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64* @_ZSt12__niter_baseIPmET_S1_(i64* %__it) #6 comdat { -entry: - %__it.addr = alloca i64*, align 8 - store i64* %__it, i64** %__it.addr, align 8 - %0 = load i64*, i64** %__it.addr, align 8 - ret i64* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64* @_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mImEEPT_PKS3_S6_S4_(i64* %__first, i64* %__last, i64* %__result) #6 comdat align 2 { -entry: - %__first.addr = alloca i64*, align 8 - %__last.addr = alloca i64*, align 8 - %__result.addr = alloca i64*, align 8 - %_Num = alloca i64, align 8 - store i64* %__first, i64** %__first.addr, align 8 - store i64* %__last, i64** %__last.addr, align 8 - store i64* %__result, i64** %__result.addr, align 8 - %0 = load i64*, i64** %__last.addr, align 8 - %1 = load i64*, i64** %__first.addr, align 8 - %sub.ptr.lhs.cast = ptrtoint i64* %0 to i64 - %sub.ptr.rhs.cast = ptrtoint i64* %1 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - store i64 %sub.ptr.div, i64* %_Num, align 8 - %2 = load i64, i64* %_Num, align 8 - %tobool = icmp ne i64 %2, 0 - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - %3 = load i64*, i64** %__result.addr, align 8 - %4 = bitcast i64* %3 to i8* - %5 = load i64*, i64** %__first.addr, align 8 - %6 = bitcast i64* %5 to i8* - %7 = load i64, i64* %_Num, align 8 - %mul = mul i64 8, %7 - call void @llvm.memmove.p0i8.p0i8.i64(i8* align 8 %4, i8* align 8 %6, i64 %mul, i1 false) - br label %if.end - -if.end: ; preds = %if.then, %entry - %8 = load i64*, i64** %__result.addr, align 8 - %9 = load i64, i64* %_Num, align 8 - %add.ptr = getelementptr inbounds i64, i64* %8, i64 %9 - ret i64* %add.ptr -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt14__copy_move_a2ILb0ESt19_Bit_const_iteratorSt13_Bit_iteratorET1_T0_S3_S2_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_const_iterator", align 8 - %__last = alloca %"struct.std::_Bit_const_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp1 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp2 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp3 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp5 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp6 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_const_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - %9 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp1 to i8* - %10 = bitcast %"struct.std::_Bit_const_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 16, i1 false) - %11 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp1 to { i64*, i32 }* - %12 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 0 - %13 = load i64*, i64** %12, align 8 - %14 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 1 - %15 = load i32, i32* %14, align 8 - %call = call { i64*, i32 } @_ZSt12__niter_baseISt19_Bit_const_iteratorET_S1_(i64* %13, i32 %15) - %16 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %17 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 0 - %18 = extractvalue { i64*, i32 } %call, 0 - store i64* %18, i64** %17, align 8 - %19 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 1 - %20 = extractvalue { i64*, i32 } %call, 1 - store i32 %20, i32* %19, align 8 - %21 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp3 to i8* - %22 = bitcast %"struct.std::_Bit_const_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 16, i1 false) - %23 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp3 to { i64*, i32 }* - %24 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 0 - %25 = load i64*, i64** %24, align 8 - %26 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 1 - %27 = load i32, i32* %26, align 8 - %call4 = call { i64*, i32 } @_ZSt12__niter_baseISt19_Bit_const_iteratorET_S1_(i64* %25, i32 %27) - %28 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp2 to { i64*, i32 }* - %29 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 0 - %30 = extractvalue { i64*, i32 } %call4, 0 - store i64* %30, i64** %29, align 8 - %31 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 1 - %32 = extractvalue { i64*, i32 } %call4, 1 - store i32 %32, i32* %31, align 8 - %33 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp6 to i8* - %34 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %33, i8* align 8 %34, i64 16, i1 false) - %35 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp6 to { i64*, i32 }* - %36 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 0 - %37 = load i64*, i64** %36, align 8 - %38 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 1 - %39 = load i32, i32* %38, align 8 - %call7 = call { i64*, i32 } @_ZSt12__niter_baseISt13_Bit_iteratorET_S1_(i64* %37, i32 %39) - %40 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to { i64*, i32 }* - %41 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 0 - %42 = extractvalue { i64*, i32 } %call7, 0 - store i64* %42, i64** %41, align 8 - %43 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 1 - %44 = extractvalue { i64*, i32 } %call7, 1 - store i32 %44, i32* %43, align 8 - %45 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %46 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 0 - %47 = load i64*, i64** %46, align 8 - %48 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 1 - %49 = load i32, i32* %48, align 8 - %50 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp2 to { i64*, i32 }* - %51 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 0 - %52 = load i64*, i64** %51, align 8 - %53 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 1 - %54 = load i32, i32* %53, align 8 - %55 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to { i64*, i32 }* - %56 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %55, i32 0, i32 0 - %57 = load i64*, i64** %56, align 8 - %58 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %55, i32 0, i32 1 - %59 = load i32, i32* %58, align 8 - %call8 = call { i64*, i32 } @_ZSt13__copy_move_aILb0ESt19_Bit_const_iteratorSt13_Bit_iteratorET1_T0_S3_S2_(i64* %47, i32 %49, i64* %52, i32 %54, i64* %57, i32 %59) - %60 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %61 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %60, i32 0, i32 0 - %62 = extractvalue { i64*, i32 } %call8, 0 - store i64* %62, i64** %61, align 8 - %63 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %60, i32 0, i32 1 - %64 = extractvalue { i64*, i32 } %call8, 1 - store i32 %64, i32* %63, align 8 - %65 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %66 = load { i64*, i32 }, { i64*, i32 }* %65, align 8 - ret { i64*, i32 } %66 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt12__miter_baseISt19_Bit_const_iteratorET_S1_(i64* %__it.coerce0, i32 %__it.coerce1) #6 comdat { -entry: - %retval = alloca %"struct.std::_Bit_const_iterator", align 8 - %__it = alloca %"struct.std::_Bit_const_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %__it to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__it.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__it.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_const_iterator"* %retval to i8* - %4 = bitcast %"struct.std::_Bit_const_iterator"* %__it to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 16, i1 false) - %5 = bitcast %"struct.std::_Bit_const_iterator"* %retval to { i64*, i32 }* - %6 = load { i64*, i32 }, { i64*, i32 }* %5, align 8 - ret { i64*, i32 } %6 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt13__copy_move_aILb0ESt19_Bit_const_iteratorSt13_Bit_iteratorET1_T0_S3_S2_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_const_iterator", align 8 - %__last = alloca %"struct.std::_Bit_const_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %__simple = alloca i8, align 1 - %agg.tmp = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp1 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp2 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_const_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - store i8 0, i8* %__simple, align 1 - %9 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to i8* - %10 = bitcast %"struct.std::_Bit_const_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 16, i1 false) - %11 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp1 to i8* - %12 = bitcast %"struct.std::_Bit_const_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %11, i8* align 8 %12, i64 16, i1 false) - %13 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to i8* - %14 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %13, i8* align 8 %14, i64 16, i1 false) - %15 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp to { i64*, i32 }* - %16 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %15, i32 0, i32 0 - %17 = load i64*, i64** %16, align 8 - %18 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %15, i32 0, i32 1 - %19 = load i32, i32* %18, align 8 - %20 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp1 to { i64*, i32 }* - %21 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %20, i32 0, i32 0 - %22 = load i64*, i64** %21, align 8 - %23 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %20, i32 0, i32 1 - %24 = load i32, i32* %23, align 8 - %25 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %26 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %25, i32 0, i32 0 - %27 = load i64*, i64** %26, align 8 - %28 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %25, i32 0, i32 1 - %29 = load i32, i32* %28, align 8 - %call = call { i64*, i32 } @_ZNSt11__copy_moveILb0ELb0ESt26random_access_iterator_tagE8__copy_mISt19_Bit_const_iteratorSt13_Bit_iteratorEET0_T_S6_S5_(i64* %17, i32 %19, i64* %22, i32 %24, i64* %27, i32 %29) - %30 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %31 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %30, i32 0, i32 0 - %32 = extractvalue { i64*, i32 } %call, 0 - store i64* %32, i64** %31, align 8 - %33 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %30, i32 0, i32 1 - %34 = extractvalue { i64*, i32 } %call, 1 - store i32 %34, i32* %33, align 8 - %35 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %36 = load { i64*, i32 }, { i64*, i32 }* %35, align 8 - ret { i64*, i32 } %36 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt12__niter_baseISt19_Bit_const_iteratorET_S1_(i64* %__it.coerce0, i32 %__it.coerce1) #6 comdat { -entry: - %retval = alloca %"struct.std::_Bit_const_iterator", align 8 - %__it = alloca %"struct.std::_Bit_const_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %__it to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__it.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__it.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_const_iterator"* %retval to i8* - %4 = bitcast %"struct.std::_Bit_const_iterator"* %__it to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 16, i1 false) - %5 = bitcast %"struct.std::_Bit_const_iterator"* %retval to { i64*, i32 }* - %6 = load { i64*, i32 }, { i64*, i32 }* %5, align 8 - ret { i64*, i32 } %6 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt12__niter_baseISt13_Bit_iteratorET_S1_(i64* %__it.coerce0, i32 %__it.coerce1) #6 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__it = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__it to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__it.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__it.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_iterator"* %retval to i8* - %4 = bitcast %"struct.std::_Bit_iterator"* %__it to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 16, i1 false) - %5 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %6 = load { i64*, i32 }, { i64*, i32 }* %5, align 8 - ret { i64*, i32 } %6 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZNSt11__copy_moveILb0ELb0ESt26random_access_iterator_tagE8__copy_mISt19_Bit_const_iteratorSt13_Bit_iteratorEET0_T_S6_S5_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_const_iterator", align 8 - %__last = alloca %"struct.std::_Bit_const_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %__n = alloca i64, align 8 - %ref.tmp = alloca %"struct.std::_Bit_reference", align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_const_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - %9 = bitcast %"struct.std::_Bit_const_iterator"* %__last to %"struct.std::_Bit_iterator_base"* - %10 = bitcast %"struct.std::_Bit_const_iterator"* %__first to %"struct.std::_Bit_iterator_base"* - %call = call i64 @_ZStmiRKSt18_Bit_iterator_baseS1_(%"struct.std::_Bit_iterator_base"* dereferenceable(16) %9, %"struct.std::_Bit_iterator_base"* dereferenceable(16) %10) - store i64 %call, i64* %__n, align 8 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %11 = load i64, i64* %__n, align 8 - %cmp = icmp sgt i64 %11, 0 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %call1 = call zeroext i1 @_ZNKSt19_Bit_const_iteratordeEv(%"struct.std::_Bit_const_iterator"* %__first) - %call2 = call { i64*, i64 } @_ZNKSt13_Bit_iteratordeEv(%"struct.std::_Bit_iterator"* %__result) - %12 = bitcast %"struct.std::_Bit_reference"* %ref.tmp to { i64*, i64 }* - %13 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %12, i32 0, i32 0 - %14 = extractvalue { i64*, i64 } %call2, 0 - store i64* %14, i64** %13, align 8 - %15 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %12, i32 0, i32 1 - %16 = extractvalue { i64*, i64 } %call2, 1 - store i64 %16, i64* %15, align 8 - %call3 = call dereferenceable(16) %"struct.std::_Bit_reference"* @_ZNSt14_Bit_referenceaSEb(%"struct.std::_Bit_reference"* %ref.tmp, i1 zeroext %call1) - %call4 = call dereferenceable(16) %"struct.std::_Bit_const_iterator"* @_ZNSt19_Bit_const_iteratorppEv(%"struct.std::_Bit_const_iterator"* %__first) - %call5 = call dereferenceable(16) %"struct.std::_Bit_iterator"* @_ZNSt13_Bit_iteratorppEv(%"struct.std::_Bit_iterator"* %__result) - br label %for.inc - -for.inc: ; preds = %for.body - %17 = load i64, i64* %__n, align 8 - %dec = add nsw i64 %17, -1 - store i64 %dec, i64* %__n, align 8 - br label %for.cond - -for.end: ; preds = %for.cond - %18 = bitcast %"struct.std::_Bit_iterator"* %retval to i8* - %19 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %18, i8* align 8 %19, i64 16, i1 false) - %20 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %21 = load { i64*, i32 }, { i64*, i32 }* %20, align 8 - ret { i64*, i32 } %21 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i64 } @_ZNKSt13_Bit_iteratordeEv(%"struct.std::_Bit_iterator"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_reference", align 8 - %this.addr = alloca %"struct.std::_Bit_iterator"*, align 8 - store %"struct.std::_Bit_iterator"* %this, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Bit_iterator"*, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %0, i32 0, i32 0 - %1 = load i64*, i64** %_M_p, align 8 - %2 = bitcast %"struct.std::_Bit_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - %_M_offset = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %2, i32 0, i32 1 - %3 = load i32, i32* %_M_offset, align 8 - %sh_prom = zext i32 %3 to i64 - %shl = shl i64 1, %sh_prom - call void @_ZNSt14_Bit_referenceC2EPmm(%"struct.std::_Bit_reference"* %retval, i64* %1, i64 %shl) - %4 = bitcast %"struct.std::_Bit_reference"* %retval to { i64*, i64 }* - %5 = load { i64*, i64 }, { i64*, i64 }* %4, align 8 - ret { i64*, i64 } %5 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(16) %"struct.std::_Bit_reference"* @_ZNSt14_Bit_referenceaSEb(%"struct.std::_Bit_reference"* %this, i1 zeroext %__x) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_reference"*, align 8 - %__x.addr = alloca i8, align 1 - store %"struct.std::_Bit_reference"* %this, %"struct.std::_Bit_reference"** %this.addr, align 8 - %frombool = zext i1 %__x to i8 - store i8 %frombool, i8* %__x.addr, align 1 - %this1 = load %"struct.std::_Bit_reference"*, %"struct.std::_Bit_reference"** %this.addr, align 8 - %0 = load i8, i8* %__x.addr, align 1 - %tobool = trunc i8 %0 to i1 - br i1 %tobool, label %if.then, label %if.else - -if.then: ; preds = %entry - %_M_mask = getelementptr inbounds %"struct.std::_Bit_reference", %"struct.std::_Bit_reference"* %this1, i32 0, i32 1 - %1 = load i64, i64* %_M_mask, align 8 - %_M_p = getelementptr inbounds %"struct.std::_Bit_reference", %"struct.std::_Bit_reference"* %this1, i32 0, i32 0 - %2 = load i64*, i64** %_M_p, align 8 - %3 = load i64, i64* %2, align 8 - %or = or i64 %3, %1 - store i64 %or, i64* %2, align 8 - br label %if.end - -if.else: ; preds = %entry - %_M_mask2 = getelementptr inbounds %"struct.std::_Bit_reference", %"struct.std::_Bit_reference"* %this1, i32 0, i32 1 - %4 = load i64, i64* %_M_mask2, align 8 - %neg = xor i64 %4, -1 - %_M_p3 = getelementptr inbounds %"struct.std::_Bit_reference", %"struct.std::_Bit_reference"* %this1, i32 0, i32 0 - %5 = load i64*, i64** %_M_p3, align 8 - %6 = load i64, i64* %5, align 8 - %and = and i64 %6, %neg - store i64 %and, i64* %5, align 8 - br label %if.end - -if.end: ; preds = %if.else, %if.then - ret %"struct.std::_Bit_reference"* %this1 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(16) %"struct.std::_Bit_const_iterator"* @_ZNSt19_Bit_const_iteratorppEv(%"struct.std::_Bit_const_iterator"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_const_iterator"*, align 8 - store %"struct.std::_Bit_const_iterator"* %this, %"struct.std::_Bit_const_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Bit_const_iterator"*, %"struct.std::_Bit_const_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_const_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - call void @_ZNSt18_Bit_iterator_base10_M_bump_upEv(%"struct.std::_Bit_iterator_base"* %0) - ret %"struct.std::_Bit_const_iterator"* %this1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(16) %"struct.std::_Bit_iterator"* @_ZNSt13_Bit_iteratorppEv(%"struct.std::_Bit_iterator"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_iterator"*, align 8 - store %"struct.std::_Bit_iterator"* %this, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Bit_iterator"*, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - call void @_ZNSt18_Bit_iterator_base10_M_bump_upEv(%"struct.std::_Bit_iterator_base"* %0) - ret %"struct.std::_Bit_iterator"* %this1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt18_Bit_iterator_base10_M_bump_upEv(%"struct.std::_Bit_iterator_base"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_iterator_base"*, align 8 - store %"struct.std::_Bit_iterator_base"* %this, %"struct.std::_Bit_iterator_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Bit_iterator_base"*, %"struct.std::_Bit_iterator_base"** %this.addr, align 8 - %_M_offset = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 1 - %0 = load i32, i32* %_M_offset, align 8 - %inc = add i32 %0, 1 - store i32 %inc, i32* %_M_offset, align 8 - %cmp = icmp eq i32 %0, 63 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %_M_offset2 = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 1 - store i32 0, i32* %_M_offset2, align 8 - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 0 - %1 = load i64*, i64** %_M_p, align 8 - %incdec.ptr = getelementptr inbounds i64, i64* %1, i32 1 - store i64* %incdec.ptr, i64** %_M_p, align 8 - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator.1"* @_ZN9__gnu_cxx14__alloc_traitsISaImEE17_S_select_on_copyERKS1_(%"class.std::allocator.1"* dereferenceable(1) %__a) #6 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.1"*, align 8 - store %"class.std::allocator.1"* %__a, %"class.std::allocator.1"** %__a.addr, align 8 - %0 = load %"class.std::allocator.1"*, %"class.std::allocator.1"** %__a.addr, align 8 - ret %"class.std::allocator.1"* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator.1"* @_ZNKSt13_Bvector_baseISaIbEE20_M_get_Bit_allocatorEv(%"struct.std::_Bvector_base"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bvector_base"*, align 8 - store %"struct.std::_Bvector_base"* %this, %"struct.std::_Bvector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Bvector_base"*, %"struct.std::_Bvector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl to %"class.std::allocator.1"* - ret %"class.std::allocator.1"* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaIbEC2ImEERKSaIT_E(%"class.std::allocator.13"* %this, %"class.std::allocator.1"* dereferenceable(1) %0) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.13"*, align 8 - %.addr = alloca %"class.std::allocator.1"*, align 8 - store %"class.std::allocator.13"* %this, %"class.std::allocator.13"** %this.addr, align 8 - store %"class.std::allocator.1"* %0, %"class.std::allocator.1"** %.addr, align 8 - %this1 = load %"class.std::allocator.13"*, %"class.std::allocator.13"** %this.addr, align 8 - %1 = bitcast %"class.std::allocator.13"* %this1 to %"class.__gnu_cxx::new_allocator.14"* - call void @_ZN9__gnu_cxx13new_allocatorIbEC2Ev(%"class.__gnu_cxx::new_allocator.14"* %1) #3 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt13_Bvector_baseISaIbEEC2ERKS0_(%"struct.std::_Bvector_base"* %this, %"class.std::allocator.13"* dereferenceable(1) %__a) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"struct.std::_Bvector_base"*, align 8 - %__a.addr = alloca %"class.std::allocator.13"*, align 8 - %ref.tmp = alloca %"class.std::allocator.1", align 1 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"struct.std::_Bvector_base"* %this, %"struct.std::_Bvector_base"** %this.addr, align 8 - store %"class.std::allocator.13"* %__a, %"class.std::allocator.13"** %__a.addr, align 8 - %this1 = load %"struct.std::_Bvector_base"*, %"struct.std::_Bvector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - %0 = load %"class.std::allocator.13"*, %"class.std::allocator.13"** %__a.addr, align 8 - call void @_ZNSaImEC2IbEERKSaIT_E(%"class.std::allocator.1"* %ref.tmp, %"class.std::allocator.13"* dereferenceable(1) %0) #3 - invoke void @_ZNSt13_Bvector_baseISaIbEE13_Bvector_implC2ERKSaImE(%"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, %"class.std::allocator.1"* dereferenceable(1) %ref.tmp) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - call void @_ZNSaImED2Ev(%"class.std::allocator.1"* %ref.tmp) #3 - ret void - -lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } - cleanup - %2 = extractvalue { i8*, i32 } %1, 0 - store i8* %2, i8** %exn.slot, align 8 - %3 = extractvalue { i8*, i32 } %1, 1 - store i32 %3, i32* %ehselector.slot, align 4 - call void @_ZNSaImED2Ev(%"class.std::allocator.1"* %ref.tmp) #3 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaIbED2Ev(%"class.std::allocator.13"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.13"*, align 8 - store %"class.std::allocator.13"* %this, %"class.std::allocator.13"** %this.addr, align 8 - %this1 = load %"class.std::allocator.13"*, %"class.std::allocator.13"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator.13"* %this1 to %"class.__gnu_cxx::new_allocator.14"* - call void @_ZN9__gnu_cxx13new_allocatorIbED2Ev(%"class.__gnu_cxx::new_allocator.14"* %0) #3 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt13_Bvector_baseISaIbEED2Ev(%"struct.std::_Bvector_base"* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"struct.std::_Bvector_base"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"struct.std::_Bvector_base"* %this, %"struct.std::_Bvector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Bvector_base"*, %"struct.std::_Bvector_base"** %this.addr, align 8 - invoke void @_ZNSt13_Bvector_baseISaIbEE13_M_deallocateEv(%"struct.std::_Bvector_base"* %this1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - call void @_ZNSt13_Bvector_baseISaIbEE13_Bvector_implD2Ev(%"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl) #3 - ret void - -lpad: ; preds = %entry - %0 = landingpad { i8*, i32 } - cleanup - %1 = extractvalue { i8*, i32 } %0, 0 - store i8* %1, i8** %exn.slot, align 8 - %2 = extractvalue { i8*, i32 } %0, 1 - store i32 %2, i32* %ehselector.slot, align 4 - %_M_impl2 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %this1, i32 0, i32 0 - call void @_ZNSt13_Bvector_baseISaIbEE13_Bvector_implD2Ev(%"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl2) #3 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorIbEC2Ev(%"class.__gnu_cxx::new_allocator.14"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.14"*, align 8 - store %"class.__gnu_cxx::new_allocator.14"* %this, %"class.__gnu_cxx::new_allocator.14"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.14"*, %"class.__gnu_cxx::new_allocator.14"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaImEC2IbEERKSaIT_E(%"class.std::allocator.1"* %this, %"class.std::allocator.13"* dereferenceable(1) %0) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.1"*, align 8 - %.addr = alloca %"class.std::allocator.13"*, align 8 - store %"class.std::allocator.1"* %this, %"class.std::allocator.1"** %this.addr, align 8 - store %"class.std::allocator.13"* %0, %"class.std::allocator.13"** %.addr, align 8 - %this1 = load %"class.std::allocator.1"*, %"class.std::allocator.1"** %this.addr, align 8 - %1 = bitcast %"class.std::allocator.1"* %this1 to %"class.__gnu_cxx::new_allocator.2"* - call void @_ZN9__gnu_cxx13new_allocatorImEC2Ev(%"class.__gnu_cxx::new_allocator.2"* %1) #3 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt13_Bvector_baseISaIbEE13_Bvector_implC2ERKSaImE(%"struct.std::_Bvector_base >::_Bvector_impl"* %this, %"class.std::allocator.1"* dereferenceable(1) %__a) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"struct.std::_Bvector_base >::_Bvector_impl"*, align 8 - %__a.addr = alloca %"class.std::allocator.1"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"struct.std::_Bvector_base >::_Bvector_impl"* %this, %"struct.std::_Bvector_base >::_Bvector_impl"** %this.addr, align 8 - store %"class.std::allocator.1"* %__a, %"class.std::allocator.1"** %__a.addr, align 8 - %this1 = load %"struct.std::_Bvector_base >::_Bvector_impl"*, %"struct.std::_Bvector_base >::_Bvector_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bvector_base >::_Bvector_impl"* %this1 to %"class.std::allocator.1"* - %1 = load %"class.std::allocator.1"*, %"class.std::allocator.1"** %__a.addr, align 8 - call void @_ZNSaImEC2ERKS_(%"class.std::allocator.1"* %0, %"class.std::allocator.1"* dereferenceable(1) %1) #3 - %_M_start = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %this1, i32 0, i32 0 - invoke void @_ZNSt13_Bit_iteratorC2Ev(%"struct.std::_Bit_iterator"* %_M_start) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %_M_finish = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %this1, i32 0, i32 1 - invoke void @_ZNSt13_Bit_iteratorC2Ev(%"struct.std::_Bit_iterator"* %_M_finish) - to label %invoke.cont2 unwind label %lpad - -invoke.cont2: ; preds = %invoke.cont - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %this1, i32 0, i32 2 - store i64* null, i64** %_M_end_of_storage, align 8 - ret void - -lpad: ; preds = %invoke.cont, %entry - %2 = landingpad { i8*, i32 } - cleanup - %3 = extractvalue { i8*, i32 } %2, 0 - store i8* %3, i8** %exn.slot, align 8 - %4 = extractvalue { i8*, i32 } %2, 1 - store i32 %4, i32* %ehselector.slot, align 4 - %5 = bitcast %"struct.std::_Bvector_base >::_Bvector_impl"* %this1 to %"class.std::allocator.1"* - call void @_ZNSaImED2Ev(%"class.std::allocator.1"* %5) #3 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val3 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaImED2Ev(%"class.std::allocator.1"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.1"*, align 8 - store %"class.std::allocator.1"* %this, %"class.std::allocator.1"** %this.addr, align 8 - %this1 = load %"class.std::allocator.1"*, %"class.std::allocator.1"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator.1"* %this1 to %"class.__gnu_cxx::new_allocator.2"* - call void @_ZN9__gnu_cxx13new_allocatorImED2Ev(%"class.__gnu_cxx::new_allocator.2"* %0) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaImEC2ERKS_(%"class.std::allocator.1"* %this, %"class.std::allocator.1"* dereferenceable(1) %__a) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.1"*, align 8 - %__a.addr = alloca %"class.std::allocator.1"*, align 8 - store %"class.std::allocator.1"* %this, %"class.std::allocator.1"** %this.addr, align 8 - store %"class.std::allocator.1"* %__a, %"class.std::allocator.1"** %__a.addr, align 8 - %this1 = load %"class.std::allocator.1"*, %"class.std::allocator.1"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator.1"* %this1 to %"class.__gnu_cxx::new_allocator.2"* - %1 = load %"class.std::allocator.1"*, %"class.std::allocator.1"** %__a.addr, align 8 - %2 = bitcast %"class.std::allocator.1"* %1 to %"class.__gnu_cxx::new_allocator.2"* - call void @_ZN9__gnu_cxx13new_allocatorImEC2ERKS1_(%"class.__gnu_cxx::new_allocator.2"* %0, %"class.__gnu_cxx::new_allocator.2"* dereferenceable(1) %2) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorImEC2ERKS1_(%"class.__gnu_cxx::new_allocator.2"* %this, %"class.__gnu_cxx::new_allocator.2"* dereferenceable(1) %0) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.2"*, align 8 - %.addr = alloca %"class.__gnu_cxx::new_allocator.2"*, align 8 - store %"class.__gnu_cxx::new_allocator.2"* %this, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - store %"class.__gnu_cxx::new_allocator.2"* %0, %"class.__gnu_cxx::new_allocator.2"** %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.2"*, %"class.__gnu_cxx::new_allocator.2"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorIbED2Ev(%"class.__gnu_cxx::new_allocator.14"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.14"*, align 8 - store %"class.__gnu_cxx::new_allocator.14"* %this, %"class.__gnu_cxx::new_allocator.14"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.14"*, %"class.__gnu_cxx::new_allocator.14"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt13_Bvector_baseISaIbEE13_Bvector_implD2Ev(%"struct.std::_Bvector_base >::_Bvector_impl"* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bvector_base >::_Bvector_impl"*, align 8 - store %"struct.std::_Bvector_base >::_Bvector_impl"* %this, %"struct.std::_Bvector_base >::_Bvector_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Bvector_base >::_Bvector_impl"*, %"struct.std::_Bvector_base >::_Bvector_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bvector_base >::_Bvector_impl"* %this1 to %"class.std::allocator.1"* - call void @_ZNSaImED2Ev(%"class.std::allocator.1"* %0) #3 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZNSt13_Bit_iteratorppEi(%"struct.std::_Bit_iterator"* %this, i32 %0) #6 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %this.addr = alloca %"struct.std::_Bit_iterator"*, align 8 - %.addr = alloca i32, align 4 - store %"struct.std::_Bit_iterator"* %this, %"struct.std::_Bit_iterator"** %this.addr, align 8 - store i32 %0, i32* %.addr, align 4 - %this1 = load %"struct.std::_Bit_iterator"*, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %1 = bitcast %"struct.std::_Bit_iterator"* %retval to i8* - %2 = bitcast %"struct.std::_Bit_iterator"* %this1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %1, i8* align 8 %2, i64 16, i1 false) - %3 = bitcast %"struct.std::_Bit_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - call void @_ZNSt18_Bit_iterator_base10_M_bump_upEv(%"struct.std::_Bit_iterator_base"* %3) - %4 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %5 = load { i64*, i32 }, { i64*, i32 }* %4, align 8 - ret { i64*, i32 } %5 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorIbSaIbEE13_M_insert_auxESt13_Bit_iteratorb(%"class.std::vector.0"* %this, i64* %__position.coerce0, i32 %__position.coerce1, i1 zeroext %__x) #0 comdat align 2 { -entry: - %__position = alloca %"struct.std::_Bit_iterator", align 8 - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__x.addr = alloca i8, align 1 - %agg.tmp = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp3 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp6 = alloca %"struct.std::_Bit_iterator", align 8 - %coerce = alloca %"struct.std::_Bit_iterator", align 8 - %ref.tmp = alloca %"struct.std::_Bit_reference", align 8 - %__len = alloca i64, align 8 - %__q = alloca i64*, align 8 - %__start = alloca %"struct.std::_Bit_iterator", align 8 - %__i = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp19 = alloca %"struct.std::_Bit_const_iterator", align 8 - %ref.tmp20 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp22 = alloca %"struct.std::_Bit_const_iterator", align 8 - %agg.tmp23 = alloca %"struct.std::_Bit_iterator", align 8 - %ref.tmp26 = alloca %"struct.std::_Bit_reference", align 8 - %ref.tmp27 = alloca %"struct.std::_Bit_iterator", align 8 - %__finish = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp31 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp32 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp34 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__position to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__position.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__position.coerce1, i32* %2, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %frombool = zext i1 %__x to i8 - store i8 %frombool, i8* %__x.addr, align 1 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %3 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %3, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 1 - %4 = bitcast %"struct.std::_Bit_iterator"* %_M_finish to %"struct.std::_Bit_iterator_base"* - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %4, i32 0, i32 0 - %5 = load i64*, i64** %_M_p, align 8 - %6 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %6, i32 0, i32 0 - %call = call i64* @_ZNKSt13_Bvector_baseISaIbEE13_Bvector_impl11_M_end_addrEv(%"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl2) - %cmp = icmp ne i64* %5, %call - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %7 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to i8* - %8 = bitcast %"struct.std::_Bit_iterator"* %__position to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %7, i8* align 8 %8, i64 16, i1 false) - %9 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl4 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %9, i32 0, i32 0 - %_M_finish5 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl4, i32 0, i32 1 - %10 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp3 to i8* - %11 = bitcast %"struct.std::_Bit_iterator"* %_M_finish5 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %10, i8* align 8 %11, i64 16, i1 false) - %12 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl7 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %12, i32 0, i32 0 - %_M_finish8 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl7, i32 0, i32 1 - %call9 = call { i64*, i32 } @_ZNKSt13_Bit_iteratorplEl(%"struct.std::_Bit_iterator"* %_M_finish8, i64 1) - %13 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp6 to { i64*, i32 }* - %14 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %13, i32 0, i32 0 - %15 = extractvalue { i64*, i32 } %call9, 0 - store i64* %15, i64** %14, align 8 - %16 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %13, i32 0, i32 1 - %17 = extractvalue { i64*, i32 } %call9, 1 - store i32 %17, i32* %16, align 8 - %18 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %19 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %18, i32 0, i32 0 - %20 = load i64*, i64** %19, align 8 - %21 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %18, i32 0, i32 1 - %22 = load i32, i32* %21, align 8 - %23 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp3 to { i64*, i32 }* - %24 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 0 - %25 = load i64*, i64** %24, align 8 - %26 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 1 - %27 = load i32, i32* %26, align 8 - %28 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp6 to { i64*, i32 }* - %29 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 0 - %30 = load i64*, i64** %29, align 8 - %31 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 1 - %32 = load i32, i32* %31, align 8 - %call10 = call { i64*, i32 } @_ZSt13copy_backwardISt13_Bit_iteratorS0_ET0_T_S2_S1_(i64* %20, i32 %22, i64* %25, i32 %27, i64* %30, i32 %32) - %33 = bitcast %"struct.std::_Bit_iterator"* %coerce to { i64*, i32 }* - %34 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %33, i32 0, i32 0 - %35 = extractvalue { i64*, i32 } %call10, 0 - store i64* %35, i64** %34, align 8 - %36 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %33, i32 0, i32 1 - %37 = extractvalue { i64*, i32 } %call10, 1 - store i32 %37, i32* %36, align 8 - %38 = load i8, i8* %__x.addr, align 1 - %tobool = trunc i8 %38 to i1 - %call11 = call { i64*, i64 } @_ZNKSt13_Bit_iteratordeEv(%"struct.std::_Bit_iterator"* %__position) - %39 = bitcast %"struct.std::_Bit_reference"* %ref.tmp to { i64*, i64 }* - %40 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %39, i32 0, i32 0 - %41 = extractvalue { i64*, i64 } %call11, 0 - store i64* %41, i64** %40, align 8 - %42 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %39, i32 0, i32 1 - %43 = extractvalue { i64*, i64 } %call11, 1 - store i64 %43, i64* %42, align 8 - %call12 = call dereferenceable(16) %"struct.std::_Bit_reference"* @_ZNSt14_Bit_referenceaSEb(%"struct.std::_Bit_reference"* %ref.tmp, i1 zeroext %tobool) - %44 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl13 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %44, i32 0, i32 0 - %_M_finish14 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl13, i32 0, i32 1 - %call15 = call dereferenceable(16) %"struct.std::_Bit_iterator"* @_ZNSt13_Bit_iteratorppEv(%"struct.std::_Bit_iterator"* %_M_finish14) - br label %if.end - -if.else: ; preds = %entry - %call16 = call i64 @_ZNKSt6vectorIbSaIbEE12_M_check_lenEmPKc(%"class.std::vector.0"* %this1, i64 1, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.27, i64 0, i64 0)) - store i64 %call16, i64* %__len, align 8 - %45 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %46 = load i64, i64* %__len, align 8 - %call17 = call i64* @_ZNSt13_Bvector_baseISaIbEE11_M_allocateEm(%"struct.std::_Bvector_base"* %45, i64 %46) - store i64* %call17, i64** %__q, align 8 - %47 = load i64*, i64** %__q, align 8 - %call18 = call i64* @_ZSt11__addressofImEPT_RS0_(i64* dereferenceable(8) %47) - call void @_ZNSt13_Bit_iteratorC2EPmj(%"struct.std::_Bit_iterator"* %__start, i64* %call18, i32 0) - %call21 = call { i64*, i32 } @_ZNSt6vectorIbSaIbEE5beginEv(%"class.std::vector.0"* %this1) - %48 = bitcast %"struct.std::_Bit_iterator"* %ref.tmp20 to { i64*, i32 }* - %49 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %48, i32 0, i32 0 - %50 = extractvalue { i64*, i32 } %call21, 0 - store i64* %50, i64** %49, align 8 - %51 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %48, i32 0, i32 1 - %52 = extractvalue { i64*, i32 } %call21, 1 - store i32 %52, i32* %51, align 8 - call void @_ZNSt19_Bit_const_iteratorC2ERKSt13_Bit_iterator(%"struct.std::_Bit_const_iterator"* %agg.tmp19, %"struct.std::_Bit_iterator"* dereferenceable(16) %ref.tmp20) - call void @_ZNSt19_Bit_const_iteratorC2ERKSt13_Bit_iterator(%"struct.std::_Bit_const_iterator"* %agg.tmp22, %"struct.std::_Bit_iterator"* dereferenceable(16) %__position) - %53 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp23 to i8* - %54 = bitcast %"struct.std::_Bit_iterator"* %__start to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %53, i8* align 8 %54, i64 16, i1 false) - %55 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp19 to { i64*, i32 }* - %56 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %55, i32 0, i32 0 - %57 = load i64*, i64** %56, align 8 - %58 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %55, i32 0, i32 1 - %59 = load i32, i32* %58, align 8 - %60 = bitcast %"struct.std::_Bit_const_iterator"* %agg.tmp22 to { i64*, i32 }* - %61 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %60, i32 0, i32 0 - %62 = load i64*, i64** %61, align 8 - %63 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %60, i32 0, i32 1 - %64 = load i32, i32* %63, align 8 - %call24 = call { i64*, i32 } @_ZNSt6vectorIbSaIbEE15_M_copy_alignedESt19_Bit_const_iteratorS2_St13_Bit_iterator(%"class.std::vector.0"* %this1, i64* %57, i32 %59, i64* %62, i32 %64, %"struct.std::_Bit_iterator"* byval(%"struct.std::_Bit_iterator") align 8 %agg.tmp23) - %65 = bitcast %"struct.std::_Bit_iterator"* %__i to { i64*, i32 }* - %66 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %65, i32 0, i32 0 - %67 = extractvalue { i64*, i32 } %call24, 0 - store i64* %67, i64** %66, align 8 - %68 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %65, i32 0, i32 1 - %69 = extractvalue { i64*, i32 } %call24, 1 - store i32 %69, i32* %68, align 8 - %70 = load i8, i8* %__x.addr, align 1 - %tobool25 = trunc i8 %70 to i1 - %call28 = call { i64*, i32 } @_ZNSt13_Bit_iteratorppEi(%"struct.std::_Bit_iterator"* %__i, i32 0) - %71 = bitcast %"struct.std::_Bit_iterator"* %ref.tmp27 to { i64*, i32 }* - %72 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %71, i32 0, i32 0 - %73 = extractvalue { i64*, i32 } %call28, 0 - store i64* %73, i64** %72, align 8 - %74 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %71, i32 0, i32 1 - %75 = extractvalue { i64*, i32 } %call28, 1 - store i32 %75, i32* %74, align 8 - %call29 = call { i64*, i64 } @_ZNKSt13_Bit_iteratordeEv(%"struct.std::_Bit_iterator"* %ref.tmp27) - %76 = bitcast %"struct.std::_Bit_reference"* %ref.tmp26 to { i64*, i64 }* - %77 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %76, i32 0, i32 0 - %78 = extractvalue { i64*, i64 } %call29, 0 - store i64* %78, i64** %77, align 8 - %79 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %76, i32 0, i32 1 - %80 = extractvalue { i64*, i64 } %call29, 1 - store i64 %80, i64* %79, align 8 - %call30 = call dereferenceable(16) %"struct.std::_Bit_reference"* @_ZNSt14_Bit_referenceaSEb(%"struct.std::_Bit_reference"* %ref.tmp26, i1 zeroext %tobool25) - %81 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp31 to i8* - %82 = bitcast %"struct.std::_Bit_iterator"* %__position to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %81, i8* align 8 %82, i64 16, i1 false) - %call33 = call { i64*, i32 } @_ZNSt6vectorIbSaIbEE3endEv(%"class.std::vector.0"* %this1) - %83 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp32 to { i64*, i32 }* - %84 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %83, i32 0, i32 0 - %85 = extractvalue { i64*, i32 } %call33, 0 - store i64* %85, i64** %84, align 8 - %86 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %83, i32 0, i32 1 - %87 = extractvalue { i64*, i32 } %call33, 1 - store i32 %87, i32* %86, align 8 - %88 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp34 to i8* - %89 = bitcast %"struct.std::_Bit_iterator"* %__i to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %88, i8* align 8 %89, i64 16, i1 false) - %90 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp31 to { i64*, i32 }* - %91 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %90, i32 0, i32 0 - %92 = load i64*, i64** %91, align 8 - %93 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %90, i32 0, i32 1 - %94 = load i32, i32* %93, align 8 - %95 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp32 to { i64*, i32 }* - %96 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %95, i32 0, i32 0 - %97 = load i64*, i64** %96, align 8 - %98 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %95, i32 0, i32 1 - %99 = load i32, i32* %98, align 8 - %100 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp34 to { i64*, i32 }* - %101 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %100, i32 0, i32 0 - %102 = load i64*, i64** %101, align 8 - %103 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %100, i32 0, i32 1 - %104 = load i32, i32* %103, align 8 - %call35 = call { i64*, i32 } @_ZSt4copyISt13_Bit_iteratorS0_ET0_T_S2_S1_(i64* %92, i32 %94, i64* %97, i32 %99, i64* %102, i32 %104) - %105 = bitcast %"struct.std::_Bit_iterator"* %__finish to { i64*, i32 }* - %106 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %105, i32 0, i32 0 - %107 = extractvalue { i64*, i32 } %call35, 0 - store i64* %107, i64** %106, align 8 - %108 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %105, i32 0, i32 1 - %109 = extractvalue { i64*, i32 } %call35, 1 - store i32 %109, i32* %108, align 8 - %110 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - call void @_ZNSt13_Bvector_baseISaIbEE13_M_deallocateEv(%"struct.std::_Bvector_base"* %110) - %111 = load i64*, i64** %__q, align 8 - %112 = load i64, i64* %__len, align 8 - %call36 = call i64 @_ZNSt13_Bvector_baseISaIbEE8_S_nwordEm(i64 %112) - %add.ptr = getelementptr inbounds i64, i64* %111, i64 %call36 - %113 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl37 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %113, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl37, i32 0, i32 2 - store i64* %add.ptr, i64** %_M_end_of_storage, align 8 - %114 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl38 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %114, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl38, i32 0, i32 0 - %115 = bitcast %"struct.std::_Bit_iterator"* %_M_start to i8* - %116 = bitcast %"struct.std::_Bit_iterator"* %__start to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %115, i8* align 8 %116, i64 12, i1 false) - %117 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl39 = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %117, i32 0, i32 0 - %_M_finish40 = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl39, i32 0, i32 1 - %118 = bitcast %"struct.std::_Bit_iterator"* %_M_finish40 to i8* - %119 = bitcast %"struct.std::_Bit_iterator"* %__finish to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %118, i8* align 8 %119, i64 12, i1 false) - br label %if.end - -if.end: ; preds = %if.else, %if.then - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZNSt6vectorIbSaIbEE3endEv(%"class.std::vector.0"* %this) #6 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Bvector_base", %"struct.std::_Bvector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Bvector_base >::_Bvector_impl", %"struct.std::_Bvector_base >::_Bvector_impl"* %_M_impl, i32 0, i32 1 - %1 = bitcast %"struct.std::_Bit_iterator"* %retval to i8* - %2 = bitcast %"struct.std::_Bit_iterator"* %_M_finish to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %1, i8* align 8 %2, i64 16, i1 false) - %3 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %4 = load { i64*, i32 }, { i64*, i32 }* %3, align 8 - ret { i64*, i32 } %4 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt13copy_backwardISt13_Bit_iteratorS0_ET0_T_S2_S1_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_iterator", align 8 - %__last = alloca %"struct.std::_Bit_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp1 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp2 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp3 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp5 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - %9 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to i8* - %10 = bitcast %"struct.std::_Bit_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 16, i1 false) - %11 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to { i64*, i32 }* - %12 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 0 - %13 = load i64*, i64** %12, align 8 - %14 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 1 - %15 = load i32, i32* %14, align 8 - %call = call { i64*, i32 } @_ZSt12__miter_baseISt13_Bit_iteratorET_S1_(i64* %13, i32 %15) - %16 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %17 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 0 - %18 = extractvalue { i64*, i32 } %call, 0 - store i64* %18, i64** %17, align 8 - %19 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 1 - %20 = extractvalue { i64*, i32 } %call, 1 - store i32 %20, i32* %19, align 8 - %21 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp3 to i8* - %22 = bitcast %"struct.std::_Bit_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 16, i1 false) - %23 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp3 to { i64*, i32 }* - %24 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 0 - %25 = load i64*, i64** %24, align 8 - %26 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 1 - %27 = load i32, i32* %26, align 8 - %call4 = call { i64*, i32 } @_ZSt12__miter_baseISt13_Bit_iteratorET_S1_(i64* %25, i32 %27) - %28 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %29 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 0 - %30 = extractvalue { i64*, i32 } %call4, 0 - store i64* %30, i64** %29, align 8 - %31 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 1 - %32 = extractvalue { i64*, i32 } %call4, 1 - store i32 %32, i32* %31, align 8 - %33 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to i8* - %34 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %33, i8* align 8 %34, i64 16, i1 false) - %35 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %36 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 0 - %37 = load i64*, i64** %36, align 8 - %38 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 1 - %39 = load i32, i32* %38, align 8 - %40 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %41 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 0 - %42 = load i64*, i64** %41, align 8 - %43 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 1 - %44 = load i32, i32* %43, align 8 - %45 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to { i64*, i32 }* - %46 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 0 - %47 = load i64*, i64** %46, align 8 - %48 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 1 - %49 = load i32, i32* %48, align 8 - %call6 = call { i64*, i32 } @_ZSt23__copy_move_backward_a2ILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_(i64* %37, i32 %39, i64* %42, i32 %44, i64* %47, i32 %49) - %50 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %51 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 0 - %52 = extractvalue { i64*, i32 } %call6, 0 - store i64* %52, i64** %51, align 8 - %53 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 1 - %54 = extractvalue { i64*, i32 } %call6, 1 - store i32 %54, i32* %53, align 8 - %55 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %56 = load { i64*, i32 }, { i64*, i32 }* %55, align 8 - ret { i64*, i32 } %56 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorIbSaIbEE12_M_check_lenEmPKc(%"class.std::vector.0"* %this, i64 %__n, i8* %__s) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__n.addr = alloca i64, align 8 - %__s.addr = alloca i8*, align 8 - %__len = alloca i64, align 8 - %ref.tmp = alloca i64, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - store i8* %__s, i8** %__s.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %call = call i64 @_ZNKSt6vectorIbSaIbEE8max_sizeEv(%"class.std::vector.0"* %this1) - %call2 = call i64 @_ZNKSt6vectorIbSaIbEE4sizeEv(%"class.std::vector.0"* %this1) - %sub = sub i64 %call, %call2 - %0 = load i64, i64* %__n.addr, align 8 - %cmp = icmp ult i64 %sub, %0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load i8*, i8** %__s.addr, align 8 - call void @_ZSt20__throw_length_errorPKc(i8* %1) #19 - unreachable - -if.end: ; preds = %entry - %call3 = call i64 @_ZNKSt6vectorIbSaIbEE4sizeEv(%"class.std::vector.0"* %this1) - %call4 = call i64 @_ZNKSt6vectorIbSaIbEE4sizeEv(%"class.std::vector.0"* %this1) - store i64 %call4, i64* %ref.tmp, align 8 - %call5 = call dereferenceable(8) i64* @_ZSt3maxImERKT_S2_S2_(i64* dereferenceable(8) %ref.tmp, i64* dereferenceable(8) %__n.addr) - %2 = load i64, i64* %call5, align 8 - %add = add i64 %call3, %2 - store i64 %add, i64* %__len, align 8 - %3 = load i64, i64* %__len, align 8 - %call6 = call i64 @_ZNKSt6vectorIbSaIbEE4sizeEv(%"class.std::vector.0"* %this1) - %cmp7 = icmp ult i64 %3, %call6 - br i1 %cmp7, label %cond.true, label %lor.lhs.false - -lor.lhs.false: ; preds = %if.end - %4 = load i64, i64* %__len, align 8 - %call8 = call i64 @_ZNKSt6vectorIbSaIbEE8max_sizeEv(%"class.std::vector.0"* %this1) - %cmp9 = icmp ugt i64 %4, %call8 - br i1 %cmp9, label %cond.true, label %cond.false - -cond.true: ; preds = %lor.lhs.false, %if.end - %call10 = call i64 @_ZNKSt6vectorIbSaIbEE8max_sizeEv(%"class.std::vector.0"* %this1) - br label %cond.end - -cond.false: ; preds = %lor.lhs.false - %5 = load i64, i64* %__len, align 8 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i64 [ %call10, %cond.true ], [ %5, %cond.false ] - ret i64 %cond -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt4copyISt13_Bit_iteratorS0_ET0_T_S2_S1_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_iterator", align 8 - %__last = alloca %"struct.std::_Bit_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp1 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp2 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp3 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp5 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - %9 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to i8* - %10 = bitcast %"struct.std::_Bit_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 16, i1 false) - %11 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to { i64*, i32 }* - %12 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 0 - %13 = load i64*, i64** %12, align 8 - %14 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 1 - %15 = load i32, i32* %14, align 8 - %call = call { i64*, i32 } @_ZSt12__miter_baseISt13_Bit_iteratorET_S1_(i64* %13, i32 %15) - %16 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %17 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 0 - %18 = extractvalue { i64*, i32 } %call, 0 - store i64* %18, i64** %17, align 8 - %19 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 1 - %20 = extractvalue { i64*, i32 } %call, 1 - store i32 %20, i32* %19, align 8 - %21 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp3 to i8* - %22 = bitcast %"struct.std::_Bit_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 16, i1 false) - %23 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp3 to { i64*, i32 }* - %24 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 0 - %25 = load i64*, i64** %24, align 8 - %26 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 1 - %27 = load i32, i32* %26, align 8 - %call4 = call { i64*, i32 } @_ZSt12__miter_baseISt13_Bit_iteratorET_S1_(i64* %25, i32 %27) - %28 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %29 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 0 - %30 = extractvalue { i64*, i32 } %call4, 0 - store i64* %30, i64** %29, align 8 - %31 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 1 - %32 = extractvalue { i64*, i32 } %call4, 1 - store i32 %32, i32* %31, align 8 - %33 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to i8* - %34 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %33, i8* align 8 %34, i64 16, i1 false) - %35 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %36 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 0 - %37 = load i64*, i64** %36, align 8 - %38 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 1 - %39 = load i32, i32* %38, align 8 - %40 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %41 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 0 - %42 = load i64*, i64** %41, align 8 - %43 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 1 - %44 = load i32, i32* %43, align 8 - %45 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to { i64*, i32 }* - %46 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 0 - %47 = load i64*, i64** %46, align 8 - %48 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 1 - %49 = load i32, i32* %48, align 8 - %call6 = call { i64*, i32 } @_ZSt14__copy_move_a2ILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_(i64* %37, i32 %39, i64* %42, i32 %44, i64* %47, i32 %49) - %50 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %51 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 0 - %52 = extractvalue { i64*, i32 } %call6, 0 - store i64* %52, i64** %51, align 8 - %53 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 1 - %54 = extractvalue { i64*, i32 } %call6, 1 - store i32 %54, i32* %53, align 8 - %55 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %56 = load { i64*, i32 }, { i64*, i32 }* %55, align 8 - ret { i64*, i32 } %56 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt23__copy_move_backward_a2ILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_iterator", align 8 - %__last = alloca %"struct.std::_Bit_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp1 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp2 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp3 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp5 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp6 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - %9 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to i8* - %10 = bitcast %"struct.std::_Bit_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 16, i1 false) - %11 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to { i64*, i32 }* - %12 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 0 - %13 = load i64*, i64** %12, align 8 - %14 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 1 - %15 = load i32, i32* %14, align 8 - %call = call { i64*, i32 } @_ZSt12__niter_baseISt13_Bit_iteratorET_S1_(i64* %13, i32 %15) - %16 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %17 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 0 - %18 = extractvalue { i64*, i32 } %call, 0 - store i64* %18, i64** %17, align 8 - %19 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 1 - %20 = extractvalue { i64*, i32 } %call, 1 - store i32 %20, i32* %19, align 8 - %21 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp3 to i8* - %22 = bitcast %"struct.std::_Bit_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 16, i1 false) - %23 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp3 to { i64*, i32 }* - %24 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 0 - %25 = load i64*, i64** %24, align 8 - %26 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 1 - %27 = load i32, i32* %26, align 8 - %call4 = call { i64*, i32 } @_ZSt12__niter_baseISt13_Bit_iteratorET_S1_(i64* %25, i32 %27) - %28 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %29 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 0 - %30 = extractvalue { i64*, i32 } %call4, 0 - store i64* %30, i64** %29, align 8 - %31 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 1 - %32 = extractvalue { i64*, i32 } %call4, 1 - store i32 %32, i32* %31, align 8 - %33 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp6 to i8* - %34 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %33, i8* align 8 %34, i64 16, i1 false) - %35 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp6 to { i64*, i32 }* - %36 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 0 - %37 = load i64*, i64** %36, align 8 - %38 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 1 - %39 = load i32, i32* %38, align 8 - %call7 = call { i64*, i32 } @_ZSt12__niter_baseISt13_Bit_iteratorET_S1_(i64* %37, i32 %39) - %40 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to { i64*, i32 }* - %41 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 0 - %42 = extractvalue { i64*, i32 } %call7, 0 - store i64* %42, i64** %41, align 8 - %43 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 1 - %44 = extractvalue { i64*, i32 } %call7, 1 - store i32 %44, i32* %43, align 8 - %45 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %46 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 0 - %47 = load i64*, i64** %46, align 8 - %48 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 1 - %49 = load i32, i32* %48, align 8 - %50 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %51 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 0 - %52 = load i64*, i64** %51, align 8 - %53 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 1 - %54 = load i32, i32* %53, align 8 - %55 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to { i64*, i32 }* - %56 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %55, i32 0, i32 0 - %57 = load i64*, i64** %56, align 8 - %58 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %55, i32 0, i32 1 - %59 = load i32, i32* %58, align 8 - %call8 = call { i64*, i32 } @_ZSt22__copy_move_backward_aILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_(i64* %47, i32 %49, i64* %52, i32 %54, i64* %57, i32 %59) - %60 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %61 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %60, i32 0, i32 0 - %62 = extractvalue { i64*, i32 } %call8, 0 - store i64* %62, i64** %61, align 8 - %63 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %60, i32 0, i32 1 - %64 = extractvalue { i64*, i32 } %call8, 1 - store i32 %64, i32* %63, align 8 - %65 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %66 = load { i64*, i32 }, { i64*, i32 }* %65, align 8 - ret { i64*, i32 } %66 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt12__miter_baseISt13_Bit_iteratorET_S1_(i64* %__it.coerce0, i32 %__it.coerce1) #6 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__it = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__it to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__it.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__it.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_iterator"* %retval to i8* - %4 = bitcast %"struct.std::_Bit_iterator"* %__it to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %3, i8* align 8 %4, i64 16, i1 false) - %5 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %6 = load { i64*, i32 }, { i64*, i32 }* %5, align 8 - ret { i64*, i32 } %6 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt22__copy_move_backward_aILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_iterator", align 8 - %__last = alloca %"struct.std::_Bit_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %__simple = alloca i8, align 1 - %agg.tmp = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp1 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp2 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - store i8 0, i8* %__simple, align 1 - %9 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to i8* - %10 = bitcast %"struct.std::_Bit_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 16, i1 false) - %11 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to i8* - %12 = bitcast %"struct.std::_Bit_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %11, i8* align 8 %12, i64 16, i1 false) - %13 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to i8* - %14 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %13, i8* align 8 %14, i64 16, i1 false) - %15 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %16 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %15, i32 0, i32 0 - %17 = load i64*, i64** %16, align 8 - %18 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %15, i32 0, i32 1 - %19 = load i32, i32* %18, align 8 - %20 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to { i64*, i32 }* - %21 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %20, i32 0, i32 0 - %22 = load i64*, i64** %21, align 8 - %23 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %20, i32 0, i32 1 - %24 = load i32, i32* %23, align 8 - %25 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %26 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %25, i32 0, i32 0 - %27 = load i64*, i64** %26, align 8 - %28 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %25, i32 0, i32 1 - %29 = load i32, i32* %28, align 8 - %call = call { i64*, i32 } @_ZNSt20__copy_move_backwardILb0ELb0ESt26random_access_iterator_tagE13__copy_move_bISt13_Bit_iteratorS3_EET0_T_S5_S4_(i64* %17, i32 %19, i64* %22, i32 %24, i64* %27, i32 %29) - %30 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %31 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %30, i32 0, i32 0 - %32 = extractvalue { i64*, i32 } %call, 0 - store i64* %32, i64** %31, align 8 - %33 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %30, i32 0, i32 1 - %34 = extractvalue { i64*, i32 } %call, 1 - store i32 %34, i32* %33, align 8 - %35 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %36 = load { i64*, i32 }, { i64*, i32 }* %35, align 8 - ret { i64*, i32 } %36 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZNSt20__copy_move_backwardILb0ELb0ESt26random_access_iterator_tagE13__copy_move_bISt13_Bit_iteratorS3_EET0_T_S5_S4_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_iterator", align 8 - %__last = alloca %"struct.std::_Bit_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %__n = alloca i64, align 8 - %ref.tmp = alloca %"struct.std::_Bit_reference", align 8 - %ref.tmp3 = alloca %"struct.std::_Bit_reference", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - %9 = bitcast %"struct.std::_Bit_iterator"* %__last to %"struct.std::_Bit_iterator_base"* - %10 = bitcast %"struct.std::_Bit_iterator"* %__first to %"struct.std::_Bit_iterator_base"* - %call = call i64 @_ZStmiRKSt18_Bit_iterator_baseS1_(%"struct.std::_Bit_iterator_base"* dereferenceable(16) %9, %"struct.std::_Bit_iterator_base"* dereferenceable(16) %10) - store i64 %call, i64* %__n, align 8 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %11 = load i64, i64* %__n, align 8 - %cmp = icmp sgt i64 %11, 0 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %call1 = call dereferenceable(16) %"struct.std::_Bit_iterator"* @_ZNSt13_Bit_iteratormmEv(%"struct.std::_Bit_iterator"* %__last) - %call2 = call { i64*, i64 } @_ZNKSt13_Bit_iteratordeEv(%"struct.std::_Bit_iterator"* %call1) - %12 = bitcast %"struct.std::_Bit_reference"* %ref.tmp to { i64*, i64 }* - %13 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %12, i32 0, i32 0 - %14 = extractvalue { i64*, i64 } %call2, 0 - store i64* %14, i64** %13, align 8 - %15 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %12, i32 0, i32 1 - %16 = extractvalue { i64*, i64 } %call2, 1 - store i64 %16, i64* %15, align 8 - %call4 = call dereferenceable(16) %"struct.std::_Bit_iterator"* @_ZNSt13_Bit_iteratormmEv(%"struct.std::_Bit_iterator"* %__result) - %call5 = call { i64*, i64 } @_ZNKSt13_Bit_iteratordeEv(%"struct.std::_Bit_iterator"* %call4) - %17 = bitcast %"struct.std::_Bit_reference"* %ref.tmp3 to { i64*, i64 }* - %18 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %17, i32 0, i32 0 - %19 = extractvalue { i64*, i64 } %call5, 0 - store i64* %19, i64** %18, align 8 - %20 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %17, i32 0, i32 1 - %21 = extractvalue { i64*, i64 } %call5, 1 - store i64 %21, i64* %20, align 8 - %call6 = call dereferenceable(16) %"struct.std::_Bit_reference"* @_ZNSt14_Bit_referenceaSERKS_(%"struct.std::_Bit_reference"* %ref.tmp3, %"struct.std::_Bit_reference"* dereferenceable(16) %ref.tmp) - br label %for.inc - -for.inc: ; preds = %for.body - %22 = load i64, i64* %__n, align 8 - %dec = add nsw i64 %22, -1 - store i64 %dec, i64* %__n, align 8 - br label %for.cond - -for.end: ; preds = %for.cond - %23 = bitcast %"struct.std::_Bit_iterator"* %retval to i8* - %24 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %23, i8* align 8 %24, i64 16, i1 false) - %25 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %26 = load { i64*, i32 }, { i64*, i32 }* %25, align 8 - ret { i64*, i32 } %26 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local dereferenceable(16) %"struct.std::_Bit_iterator"* @_ZNSt13_Bit_iteratormmEv(%"struct.std::_Bit_iterator"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_iterator"*, align 8 - store %"struct.std::_Bit_iterator"* %this, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %this1 = load %"struct.std::_Bit_iterator"*, %"struct.std::_Bit_iterator"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %this1 to %"struct.std::_Bit_iterator_base"* - call void @_ZNSt18_Bit_iterator_base12_M_bump_downEv(%"struct.std::_Bit_iterator_base"* %0) - ret %"struct.std::_Bit_iterator"* %this1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(16) %"struct.std::_Bit_reference"* @_ZNSt14_Bit_referenceaSERKS_(%"struct.std::_Bit_reference"* %this, %"struct.std::_Bit_reference"* dereferenceable(16) %__x) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_reference"*, align 8 - %__x.addr = alloca %"struct.std::_Bit_reference"*, align 8 - store %"struct.std::_Bit_reference"* %this, %"struct.std::_Bit_reference"** %this.addr, align 8 - store %"struct.std::_Bit_reference"* %__x, %"struct.std::_Bit_reference"** %__x.addr, align 8 - %this1 = load %"struct.std::_Bit_reference"*, %"struct.std::_Bit_reference"** %this.addr, align 8 - %0 = load %"struct.std::_Bit_reference"*, %"struct.std::_Bit_reference"** %__x.addr, align 8 - %call = call zeroext i1 @_ZNKSt14_Bit_referencecvbEv(%"struct.std::_Bit_reference"* %0) - %call2 = call dereferenceable(16) %"struct.std::_Bit_reference"* @_ZNSt14_Bit_referenceaSEb(%"struct.std::_Bit_reference"* %this1, i1 zeroext %call) - ret %"struct.std::_Bit_reference"* %call2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt18_Bit_iterator_base12_M_bump_downEv(%"struct.std::_Bit_iterator_base"* %this) #6 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Bit_iterator_base"*, align 8 - store %"struct.std::_Bit_iterator_base"* %this, %"struct.std::_Bit_iterator_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Bit_iterator_base"*, %"struct.std::_Bit_iterator_base"** %this.addr, align 8 - %_M_offset = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 1 - %0 = load i32, i32* %_M_offset, align 8 - %dec = add i32 %0, -1 - store i32 %dec, i32* %_M_offset, align 8 - %cmp = icmp eq i32 %0, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %_M_offset2 = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 1 - store i32 63, i32* %_M_offset2, align 8 - %_M_p = getelementptr inbounds %"struct.std::_Bit_iterator_base", %"struct.std::_Bit_iterator_base"* %this1, i32 0, i32 0 - %1 = load i64*, i64** %_M_p, align 8 - %incdec.ptr = getelementptr inbounds i64, i64* %1, i32 -1 - store i64* %incdec.ptr, i64** %_M_p, align 8 - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorIbSaIbEE8max_sizeEv(%"class.std::vector.0"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__isize = alloca i64, align 8 - %__asize = alloca i64, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - store i64 9223372036854775744, i64* %__isize, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Bvector_base"* - %call = call dereferenceable(1) %"class.std::allocator.1"* @_ZNKSt13_Bvector_baseISaIbEE20_M_get_Bit_allocatorEv(%"struct.std::_Bvector_base"* %0) - %call2 = call i64 @_ZN9__gnu_cxx14__alloc_traitsISaImEE8max_sizeERKS1_(%"class.std::allocator.1"* dereferenceable(1) %call) - store i64 %call2, i64* %__asize, align 8 - %1 = load i64, i64* %__asize, align 8 - %cmp = icmp ule i64 %1, 144115188075855871 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %2 = load i64, i64* %__asize, align 8 - %mul = mul i64 %2, 64 - br label %cond.end - -cond.false: ; preds = %entry - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i64 [ %mul, %cond.true ], [ 9223372036854775744, %cond.false ] - ret i64 %cond -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZN9__gnu_cxx14__alloc_traitsISaImEE8max_sizeERKS1_(%"class.std::allocator.1"* dereferenceable(1) %__a) #6 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.1"*, align 8 - store %"class.std::allocator.1"* %__a, %"class.std::allocator.1"** %__a.addr, align 8 - %0 = load %"class.std::allocator.1"*, %"class.std::allocator.1"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator.1"* %0 to %"class.__gnu_cxx::new_allocator.2"* - %call = call i64 @_ZNK9__gnu_cxx13new_allocatorImE8max_sizeEv(%"class.__gnu_cxx::new_allocator.2"* %1) #3 - ret i64 %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt14__copy_move_a2ILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_iterator", align 8 - %__last = alloca %"struct.std::_Bit_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp1 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp2 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp3 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp5 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp6 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - %9 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to i8* - %10 = bitcast %"struct.std::_Bit_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 16, i1 false) - %11 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to { i64*, i32 }* - %12 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 0 - %13 = load i64*, i64** %12, align 8 - %14 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %11, i32 0, i32 1 - %15 = load i32, i32* %14, align 8 - %call = call { i64*, i32 } @_ZSt12__niter_baseISt13_Bit_iteratorET_S1_(i64* %13, i32 %15) - %16 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %17 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 0 - %18 = extractvalue { i64*, i32 } %call, 0 - store i64* %18, i64** %17, align 8 - %19 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %16, i32 0, i32 1 - %20 = extractvalue { i64*, i32 } %call, 1 - store i32 %20, i32* %19, align 8 - %21 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp3 to i8* - %22 = bitcast %"struct.std::_Bit_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 16, i1 false) - %23 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp3 to { i64*, i32 }* - %24 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 0 - %25 = load i64*, i64** %24, align 8 - %26 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %23, i32 0, i32 1 - %27 = load i32, i32* %26, align 8 - %call4 = call { i64*, i32 } @_ZSt12__niter_baseISt13_Bit_iteratorET_S1_(i64* %25, i32 %27) - %28 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %29 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 0 - %30 = extractvalue { i64*, i32 } %call4, 0 - store i64* %30, i64** %29, align 8 - %31 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %28, i32 0, i32 1 - %32 = extractvalue { i64*, i32 } %call4, 1 - store i32 %32, i32* %31, align 8 - %33 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp6 to i8* - %34 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %33, i8* align 8 %34, i64 16, i1 false) - %35 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp6 to { i64*, i32 }* - %36 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 0 - %37 = load i64*, i64** %36, align 8 - %38 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %35, i32 0, i32 1 - %39 = load i32, i32* %38, align 8 - %call7 = call { i64*, i32 } @_ZSt12__niter_baseISt13_Bit_iteratorET_S1_(i64* %37, i32 %39) - %40 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to { i64*, i32 }* - %41 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 0 - %42 = extractvalue { i64*, i32 } %call7, 0 - store i64* %42, i64** %41, align 8 - %43 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %40, i32 0, i32 1 - %44 = extractvalue { i64*, i32 } %call7, 1 - store i32 %44, i32* %43, align 8 - %45 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %46 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 0 - %47 = load i64*, i64** %46, align 8 - %48 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %45, i32 0, i32 1 - %49 = load i32, i32* %48, align 8 - %50 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %51 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 0 - %52 = load i64*, i64** %51, align 8 - %53 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %50, i32 0, i32 1 - %54 = load i32, i32* %53, align 8 - %55 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp5 to { i64*, i32 }* - %56 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %55, i32 0, i32 0 - %57 = load i64*, i64** %56, align 8 - %58 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %55, i32 0, i32 1 - %59 = load i32, i32* %58, align 8 - %call8 = call { i64*, i32 } @_ZSt13__copy_move_aILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_(i64* %47, i32 %49, i64* %52, i32 %54, i64* %57, i32 %59) - %60 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %61 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %60, i32 0, i32 0 - %62 = extractvalue { i64*, i32 } %call8, 0 - store i64* %62, i64** %61, align 8 - %63 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %60, i32 0, i32 1 - %64 = extractvalue { i64*, i32 } %call8, 1 - store i32 %64, i32* %63, align 8 - %65 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %66 = load { i64*, i32 }, { i64*, i32 }* %65, align 8 - ret { i64*, i32 } %66 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZSt13__copy_move_aILb0ESt13_Bit_iteratorS0_ET1_T0_S2_S1_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_iterator", align 8 - %__last = alloca %"struct.std::_Bit_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %__simple = alloca i8, align 1 - %agg.tmp = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp1 = alloca %"struct.std::_Bit_iterator", align 8 - %agg.tmp2 = alloca %"struct.std::_Bit_iterator", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - store i8 0, i8* %__simple, align 1 - %9 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to i8* - %10 = bitcast %"struct.std::_Bit_iterator"* %__first to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 16, i1 false) - %11 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to i8* - %12 = bitcast %"struct.std::_Bit_iterator"* %__last to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %11, i8* align 8 %12, i64 16, i1 false) - %13 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to i8* - %14 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %13, i8* align 8 %14, i64 16, i1 false) - %15 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp to { i64*, i32 }* - %16 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %15, i32 0, i32 0 - %17 = load i64*, i64** %16, align 8 - %18 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %15, i32 0, i32 1 - %19 = load i32, i32* %18, align 8 - %20 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp1 to { i64*, i32 }* - %21 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %20, i32 0, i32 0 - %22 = load i64*, i64** %21, align 8 - %23 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %20, i32 0, i32 1 - %24 = load i32, i32* %23, align 8 - %25 = bitcast %"struct.std::_Bit_iterator"* %agg.tmp2 to { i64*, i32 }* - %26 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %25, i32 0, i32 0 - %27 = load i64*, i64** %26, align 8 - %28 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %25, i32 0, i32 1 - %29 = load i32, i32* %28, align 8 - %call = call { i64*, i32 } @_ZNSt11__copy_moveILb0ELb0ESt26random_access_iterator_tagE8__copy_mISt13_Bit_iteratorS3_EET0_T_S5_S4_(i64* %17, i32 %19, i64* %22, i32 %24, i64* %27, i32 %29) - %30 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %31 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %30, i32 0, i32 0 - %32 = extractvalue { i64*, i32 } %call, 0 - store i64* %32, i64** %31, align 8 - %33 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %30, i32 0, i32 1 - %34 = extractvalue { i64*, i32 } %call, 1 - store i32 %34, i32* %33, align 8 - %35 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %36 = load { i64*, i32 }, { i64*, i32 }* %35, align 8 - ret { i64*, i32 } %36 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local { i64*, i32 } @_ZNSt11__copy_moveILb0ELb0ESt26random_access_iterator_tagE8__copy_mISt13_Bit_iteratorS3_EET0_T_S5_S4_(i64* %__first.coerce0, i32 %__first.coerce1, i64* %__last.coerce0, i32 %__last.coerce1, i64* %__result.coerce0, i32 %__result.coerce1) #0 comdat align 2 { -entry: - %retval = alloca %"struct.std::_Bit_iterator", align 8 - %__first = alloca %"struct.std::_Bit_iterator", align 8 - %__last = alloca %"struct.std::_Bit_iterator", align 8 - %__result = alloca %"struct.std::_Bit_iterator", align 8 - %__n = alloca i64, align 8 - %ref.tmp = alloca %"struct.std::_Bit_reference", align 8 - %ref.tmp2 = alloca %"struct.std::_Bit_reference", align 8 - %0 = bitcast %"struct.std::_Bit_iterator"* %__first to { i64*, i32 }* - %1 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 0 - store i64* %__first.coerce0, i64** %1, align 8 - %2 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %0, i32 0, i32 1 - store i32 %__first.coerce1, i32* %2, align 8 - %3 = bitcast %"struct.std::_Bit_iterator"* %__last to { i64*, i32 }* - %4 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 0 - store i64* %__last.coerce0, i64** %4, align 8 - %5 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %3, i32 0, i32 1 - store i32 %__last.coerce1, i32* %5, align 8 - %6 = bitcast %"struct.std::_Bit_iterator"* %__result to { i64*, i32 }* - %7 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 0 - store i64* %__result.coerce0, i64** %7, align 8 - %8 = getelementptr inbounds { i64*, i32 }, { i64*, i32 }* %6, i32 0, i32 1 - store i32 %__result.coerce1, i32* %8, align 8 - %9 = bitcast %"struct.std::_Bit_iterator"* %__last to %"struct.std::_Bit_iterator_base"* - %10 = bitcast %"struct.std::_Bit_iterator"* %__first to %"struct.std::_Bit_iterator_base"* - %call = call i64 @_ZStmiRKSt18_Bit_iterator_baseS1_(%"struct.std::_Bit_iterator_base"* dereferenceable(16) %9, %"struct.std::_Bit_iterator_base"* dereferenceable(16) %10) - store i64 %call, i64* %__n, align 8 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %11 = load i64, i64* %__n, align 8 - %cmp = icmp sgt i64 %11, 0 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %call1 = call { i64*, i64 } @_ZNKSt13_Bit_iteratordeEv(%"struct.std::_Bit_iterator"* %__first) - %12 = bitcast %"struct.std::_Bit_reference"* %ref.tmp to { i64*, i64 }* - %13 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %12, i32 0, i32 0 - %14 = extractvalue { i64*, i64 } %call1, 0 - store i64* %14, i64** %13, align 8 - %15 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %12, i32 0, i32 1 - %16 = extractvalue { i64*, i64 } %call1, 1 - store i64 %16, i64* %15, align 8 - %call3 = call { i64*, i64 } @_ZNKSt13_Bit_iteratordeEv(%"struct.std::_Bit_iterator"* %__result) - %17 = bitcast %"struct.std::_Bit_reference"* %ref.tmp2 to { i64*, i64 }* - %18 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %17, i32 0, i32 0 - %19 = extractvalue { i64*, i64 } %call3, 0 - store i64* %19, i64** %18, align 8 - %20 = getelementptr inbounds { i64*, i64 }, { i64*, i64 }* %17, i32 0, i32 1 - %21 = extractvalue { i64*, i64 } %call3, 1 - store i64 %21, i64* %20, align 8 - %call4 = call dereferenceable(16) %"struct.std::_Bit_reference"* @_ZNSt14_Bit_referenceaSERKS_(%"struct.std::_Bit_reference"* %ref.tmp2, %"struct.std::_Bit_reference"* dereferenceable(16) %ref.tmp) - %call5 = call dereferenceable(16) %"struct.std::_Bit_iterator"* @_ZNSt13_Bit_iteratorppEv(%"struct.std::_Bit_iterator"* %__first) - %call6 = call dereferenceable(16) %"struct.std::_Bit_iterator"* @_ZNSt13_Bit_iteratorppEv(%"struct.std::_Bit_iterator"* %__result) - br label %for.inc - -for.inc: ; preds = %for.body - %22 = load i64, i64* %__n, align 8 - %dec = add nsw i64 %22, -1 - store i64 %dec, i64* %__n, align 8 - br label %for.cond - -for.end: ; preds = %for.cond - %23 = bitcast %"struct.std::_Bit_iterator"* %retval to i8* - %24 = bitcast %"struct.std::_Bit_iterator"* %__result to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %23, i8* align 8 %24, i64 16, i1 false) - %25 = bitcast %"struct.std::_Bit_iterator"* %retval to { i64*, i32 }* - %26 = load { i64*, i32 }, { i64*, i32 }* %25, align 8 - ret { i64*, i32 } %26 -} - -; Function Attrs: noinline uwtable -define internal void @_GLOBAL__sub_I_main_test_cu.cu() #2 section ".text.startup" { -entry: - call void @__cxx_global_var_init() - ret void -} - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i8*, i64, i32*)* @_Z12histo_kernelPhlPj to i8*), i8* getelementptr inbounds ([22 x i8], [22 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([22 x i8], [22 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %2 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*, i32*, i32*)* @_ZL26vlc_encode_kernel_sm64huffPjPKjS1_S_S_S_S_S_ to i8*), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @1, i64 0, i64 0), i8* getelementptr inbounds ([50 x i8], [50 x i8]* @1, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %3 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb1ELb0EEvPjPKjS0_iii to i8*), i8* getelementptr inbounds ([34 x i8], [34 x i8]* @2, i64 0, i64 0), i8* getelementptr inbounds ([34 x i8], [34 x i8]* @2, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %4 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb1ELb1EEvPjPKjS0_iii to i8*), i8* getelementptr inbounds ([34 x i8], [34 x i8]* @3, i64 0, i64 0), i8* getelementptr inbounds ([34 x i8], [34 x i8]* @3, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %5 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32*, i32*, i32, i32, i32)* @_ZL10uniformAddPjS_iii to i8*), i8* getelementptr inbounds ([23 x i8], [23 x i8]* @4, i64 0, i64 0), i8* getelementptr inbounds ([23 x i8], [23 x i8]* @4, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %6 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb0ELb0EEvPjPKjS0_iii to i8*), i8* getelementptr inbounds ([34 x i8], [34 x i8]* @5, i64 0, i64 0), i8* getelementptr inbounds ([34 x i8], [34 x i8]* @5, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %7 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32*, i32*, i32*, i32, i32, i32)* @_ZL7prescanILb0ELb1EEvPjPKjS0_iii to i8*), i8* getelementptr inbounds ([34 x i8], [34 x i8]* @6, i64 0, i64 0), i8* getelementptr inbounds ([34 x i8], [34 x i8]* @6, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %8 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32*, i32*, i32*, i32*, i32)* @_ZL5pack2PjS_S_S_j to i8*), i8* getelementptr inbounds ([19 x i8], [19 x i8]* @7, i64 0, i64 0), i8* getelementptr inbounds ([19 x i8], [19 x i8]* @7, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { nounwind } -attributes #4 = { argmemonly nounwind willreturn } -attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { noinline noreturn nounwind } -attributes #8 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #9 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #10 = { nounwind readonly } -attributes #11 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #12 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #13 = { nounwind readnone speculatable willreturn } -attributes #14 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #15 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #16 = { noreturn nounwind } -attributes #17 = { builtin } -attributes #18 = { builtin nounwind } -attributes #19 = { noreturn } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/huffman/main_test_cu.cu b/examples/huffman/main_test_cu.cu deleted file mode 100755 index 229250a..0000000 --- a/examples/huffman/main_test_cu.cu +++ /dev/null @@ -1,225 +0,0 @@ -/* - * PAVLE - Parallel Variable-Length Encoder for CUDA. Main file. - * - * Copyright (C) 2009 Ana Balevic - * All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it under - * the terms of the MIT License. Read the full licence: - * http://www.opensource.org/licenses/mit-license.php - * - * If you find this program useful, please contact me and reference PAVLE home - * page in your work. - * - */ - -#include "comparison_helpers.h" -#include "cuda_helpers.h" -#include "load_data.h" -#include "print_helpers.h" -#include "stats_logger.h" -#include "stdafx.h" -#include -#include - -//#include "vlc_kernel_gm32.cu" -//#include "vlc_kernel_sm32.cu" -#include "vlc_kernel_sm64huff.cu" -//#include "vlc_kernel_dpt.cu" -//#include "vlc_kernel_dptt.cu" -//#include "scan_kernel.cu" -#include "cpuencode.h" -#include "pack_kernels.cu" -#include "scan.cu" - -long long get_time() { - struct timeval tv; - gettimeofday(&tv, NULL); - return (tv.tv_sec * 1000000) + tv.tv_usec; -} -void runVLCTest(char *file_name, uint num_block_threads, uint num_blocks = 1); - -extern "C" void cpu_vlc_encode(unsigned int *indata, unsigned int num_elements, - unsigned int *outdata, unsigned int *outsize, - unsigned int *codewords, - unsigned int *codewordlens); - -int main(int argc, char *argv[]) { - if (!InitCUDA()) { - return 0; - } - unsigned int num_block_threads = 256; - if (argc > 1) - for (int i = 1; i < argc; i++) - runVLCTest(argv[i], num_block_threads); - else { - runVLCTest(NULL, num_block_threads, 1024); - } - return 0; -} - -void runVLCTest(char *file_name, uint num_block_threads, uint num_blocks) { - printf("CUDA! Starting VLC Tests!\n"); - unsigned int - num_elements; // uint num_elements = num_blocks * num_block_threads; - unsigned int mem_size; // uint mem_size = num_elements * sizeof(int); - unsigned int symbol_type_size = sizeof(int); - //////// LOAD DATA /////////////// - double H; // entropy - initParams(file_name, num_block_threads, num_blocks, num_elements, mem_size, - symbol_type_size); - printf("Parameters: num_elements: %d, num_blocks: %d, num_block_threads: " - "%d\n----------------------------\n", - num_elements, num_blocks, num_block_threads); - ////////LOAD DATA /////////////// - uint *sourceData = (uint *)malloc(mem_size); - uint *destData = (uint *)malloc(mem_size); - uint *crefData = (uint *)malloc(mem_size); - - uint *codewords = (uint *)malloc(NUM_SYMBOLS * symbol_type_size); - uint *codewordlens = (uint *)malloc(NUM_SYMBOLS * symbol_type_size); - - uint *cw32 = (uint *)malloc(mem_size); - uint *cw32len = (uint *)malloc(mem_size); - uint *cw32idx = (uint *)malloc(mem_size); - - uint *cindex2 = (uint *)malloc(num_blocks * sizeof(int)); - - memset(sourceData, 0, mem_size); - memset(destData, 0, mem_size); - memset(crefData, 0, mem_size); - memset(cw32, 0, mem_size); - memset(cw32len, 0, mem_size); - memset(cw32idx, 0, mem_size); - memset(codewords, 0, NUM_SYMBOLS * symbol_type_size); - memset(codewordlens, 0, NUM_SYMBOLS * symbol_type_size); - memset(cindex2, 0, num_blocks * sizeof(int)); - //////// LOAD DATA /////////////// - loadData(file_name, sourceData, codewords, codewordlens, num_elements, - mem_size, H); - - //////// LOAD DATA /////////////// - - unsigned int *d_sourceData, *d_destData, *d_destDataPacked; - unsigned int *d_codewords, *d_codewordlens; - unsigned int *d_cw32, *d_cw32len, *d_cw32idx, *d_cindex, *d_cindex2; - - CUDA_SAFE_CALL(cudaMalloc((void **)&d_sourceData, mem_size)); - CUDA_SAFE_CALL(cudaMalloc((void **)&d_destData, mem_size)); - CUDA_SAFE_CALL(cudaMalloc((void **)&d_destDataPacked, mem_size)); - - CUDA_SAFE_CALL( - cudaMalloc((void **)&d_codewords, NUM_SYMBOLS * symbol_type_size)); - CUDA_SAFE_CALL( - cudaMalloc((void **)&d_codewordlens, NUM_SYMBOLS * symbol_type_size)); - - CUDA_SAFE_CALL(cudaMalloc((void **)&d_cw32, mem_size)); - CUDA_SAFE_CALL(cudaMalloc((void **)&d_cw32len, mem_size)); - CUDA_SAFE_CALL(cudaMalloc((void **)&d_cw32idx, mem_size)); - - CUDA_SAFE_CALL( - cudaMalloc((void **)&d_cindex, num_blocks * sizeof(unsigned int))); - CUDA_SAFE_CALL( - cudaMalloc((void **)&d_cindex2, num_blocks * sizeof(unsigned int))); - // printf("source data\n"); - // for (int i = 0; i < 200; i++) { - // printf("%d ", sourceData[i]); - // } - // printf("\n"); - // printf("codewords\n"); - // for (int i = 0; i < 200; i++) { - // printf("%d ", codewords[i]); - // } - // printf("\n"); - // printf("codeword lens\n"); - // for (int i = 0; i < 200; i++) { - // printf("%d ", codewordlens[i]); - // } - // printf("\n"); - // return; - CUDA_SAFE_CALL( - cudaMemcpy(d_sourceData, sourceData, mem_size, cudaMemcpyHostToDevice)); - CUDA_SAFE_CALL(cudaMemcpy(d_codewords, codewords, - NUM_SYMBOLS * symbol_type_size, - cudaMemcpyHostToDevice)); - CUDA_SAFE_CALL(cudaMemcpy(d_codewordlens, codewordlens, - NUM_SYMBOLS * symbol_type_size, - cudaMemcpyHostToDevice)); - CUDA_SAFE_CALL( - cudaMemcpy(d_destData, destData, mem_size, cudaMemcpyHostToDevice)); - - dim3 grid_size(num_blocks, 1, 1); - dim3 block_size(num_block_threads, 1, 1); - unsigned int sm_size; - - unsigned int NT = 10; // number of runs for each execution time - - //////////////////* CPU ENCODER */////////////////////////////////// - unsigned int refbytesize; - long long timer = get_time(); - cpu_vlc_encode((unsigned int *)sourceData, num_elements, - (unsigned int *)crefData, &refbytesize, codewords, - codewordlens); - float msec = (float)((get_time() - timer) / 1000.0); - printf("CPU Encoding time (CPU): %f (ms)\n", msec); - printf("CPU Encoded to %d [B]\n", refbytesize); - unsigned int num_ints = refbytesize / 4 + ((refbytesize % 4 == 0) ? 0 : 1); - //////////////////* END CPU */////////////////////////////////// - - //////////////////* SM64HUFF KERNEL */////////////////////////////////// - grid_size.x = num_blocks; - block_size.x = num_block_threads; - sm_size = block_size.x * sizeof(unsigned int); -#ifdef CACHECWLUT - sm_size = 2 * NUM_SYMBOLS * sizeof(int) + block_size.x * sizeof(unsigned int); -#endif - - for (int i = 0; i < NT; i++) { - vlc_encode_kernel_sm64huff<<>>( - d_sourceData, d_codewords, d_codewordlens, -#ifdef TESTING - d_cw32, d_cw32len, d_cw32idx, -#endif - d_destData, d_cindex); // testedOK2 - cudaThreadSynchronize(); - } - // //////////////////* END KERNEL */////////////////////////////////// - -#ifdef TESTING - unsigned int num_scan_elements = grid_size.x; - preallocBlockSums(num_scan_elements); - cudaMemset(d_destDataPacked, 0, mem_size); - printf("Num_blocks to be passed to scan is %d.\n", num_scan_elements); - prescanArray(d_cindex2, d_cindex, num_scan_elements); - pack2<<>>( - (unsigned int *)d_destData, d_cindex, d_cindex2, - (unsigned int *)d_destDataPacked, num_elements / num_scan_elements); - cudaThreadSynchronize(); - CUT_CHECK_ERROR("Pack2 Kernel execution failed\n"); - deallocBlockSums(); - // return; - - CUDA_SAFE_CALL( - cudaMemcpy(destData, d_destDataPacked, mem_size, cudaMemcpyDeviceToHost)); - compare_vectors((unsigned int *)crefData, (unsigned int *)destData, num_ints); -#endif - - free(sourceData); - free(destData); - free(codewords); - free(codewordlens); - free(cw32); - free(cw32len); - free(crefData); - CUDA_SAFE_CALL(cudaFree(d_sourceData)); - CUDA_SAFE_CALL(cudaFree(d_destData)); - CUDA_SAFE_CALL(cudaFree(d_destDataPacked)); - CUDA_SAFE_CALL(cudaFree(d_codewords)); - CUDA_SAFE_CALL(cudaFree(d_codewordlens)); - CUDA_SAFE_CALL(cudaFree(d_cw32)); - CUDA_SAFE_CALL(cudaFree(d_cw32len)); - CUDA_SAFE_CALL(cudaFree(d_cw32idx)); - CUDA_SAFE_CALL(cudaFree(d_cindex)); - CUDA_SAFE_CALL(cudaFree(d_cindex2)); - free(cindex2); -} diff --git a/examples/huffman/pabio_kernels_v2.cu b/examples/huffman/pabio_kernels_v2.cu deleted file mode 100644 index 3474cb2..0000000 --- a/examples/huffman/pabio_kernels_v2.cu +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright Ana Balevic, 2008-2009. All rights reserved. - */ -#ifndef _PABIO_KERNEL2_H_ -#define _PABIO_KERNEL2_H_ - -#include "parameters.h" - -/* PARALLEL PUT BITS IMPLEMENTATION (CUDA1.1+ compatible) -* Set numbits in the destination word out[kc] starting from the position startbit -* Implementation comments: -* Second atomic operation actually sets these bits to the value stored in the codeword; the other bits are left unotuched -* First atomic operation is a necessary prepration - we change only the bits that will be affected by the codeword to be written to 1s -* in order for set bits to work with using atomicand. -* TODOs: benchmark performance 1) gm atomics vs sm atomics; 2) memset at init time vs. atomicOr -*/ -__device__ void static put_bits_atomic2(unsigned int* out, unsigned int kc, - unsigned int startbit, unsigned int numbits, - unsigned int codeword) { - unsigned int cw32 = codeword; - unsigned int restbits = 32-startbit-numbits; - - /* 1. Prepare the memory location */ -#ifndef MEMSET0 //Can remove this part if the contents of the memory are already set to all 0s - unsigned int mask = ((1< 0000...001111 - mask<<=restbits; //fill in zeros at the back positions -> 0000...001111000 -> 11111110000111111111111 (in order not to and other positions) - atomicAnd(&out[kc], ~mask); //set 0s in the destination from startbit in the len of numbits -#endif - - /* 2. Write the codeword */ - cw32 = cw32< 0000...001111 - mask<<=restbits; //fill in zeros at the back positions -> 0000...001111000 -> 11111110000111111111111 (in order not to and other positions) - atomicAnd(&out[kc], ~mask); //set 0s in the destination from startbit in the len of numbits -#endif - - /* 2. Write the codeword */ - if (startbit == 0 && restbits == 0) { - out[kc] = cw32; - } else { - cw32 = cw32<> bit; // cut off those bits that do not fit into the initial - // location in destData[] - atomicOr(&dstData[dword], tmp); // fill up this initial location - tmp = (bit == 0) ? 0 : (dw << 32 - bit); - for (i = 1; i < bitsize / 32; - i++) { // from now on, we have exclusive access to destData[] - dw = srcData[offset + i]; // load next dword from srcData[] - tmp |= dw >> bit; // fill up tmp - dstData[dword + i] = tmp; // write complete dword to destData[] - tmp = (bit == 0) ? 0 : (dw << 32 - bit); - } - // exclusive access to dstData[] ends here - // the remaining block can, or rather should be further optimized - // write the remaining bits in tmp, UNLESS bit is 0 and bitsize is divisible - // by 32, in this case do nothing - if (bit != 0 || bitsize % 32 != 0) - atomicOr(&dstData[dword + i], tmp); - if (bitsize % 32 != 0) { - dw = srcData[offset + i]; - atomicOr(&dstData[dword + i], dw >> bit); - atomicOr(&dstData[dword + i + 1], (bit == 0) ? 0 : (dw << 32 - bit)); - } -} - -#endif diff --git a/examples/huffman/parameters.h b/examples/huffman/parameters.h deleted file mode 100644 index d008df4..0000000 --- a/examples/huffman/parameters.h +++ /dev/null @@ -1,27 +0,0 @@ -#ifndef _PARAMS_H_ -#define _PARAMS_H_ - -typedef unsigned int uint; -typedef unsigned char uint8; - -#define BENCH 0 -/* 0 - MEASURE TIME, NO TESTING -** 1 - TEST -** 2 - TEST & VERBOSE -*/ -#define TESTING - -#define DPT 4 // data (dwords) per thread - -#define CACHECWLUT // MAX DPT = 8 -//#define CACHESRCDATA // MAX DPT = 4 - -#define SMATOMICS - -#define MEMSET0 - -#define MAX_SM_BLOCK_SIZE_GPU 16384 // B - -#define NUM_SYMBOLS 256 // fixed to 256. - -#endif diff --git a/examples/huffman/print_helpers.h b/examples/huffman/print_helpers.h deleted file mode 100644 index e84e990..0000000 --- a/examples/huffman/print_helpers.h +++ /dev/null @@ -1,217 +0,0 @@ -#ifndef _PRINT_HELPERS_H_ -#define _PRINT_HELPERS_H_ - -#include "parameters.h" -#include - -__inline void printdbg_data_bin(const char *filename, unsigned int *data, - unsigned int num_ints) { - FILE *dump = fopen((const char *)filename, "wt"); - for (unsigned int i = 0; i < num_ints; i++) { - unsigned int mask = 0x80000000; - for (unsigned int j = 0; j < 32; j++) { - if (data[i] & mask) - fprintf(dump, "1"); // printf("1"); - else - fprintf(dump, "0"); // printf("0"); - mask = mask >> 1; - } - fprintf(dump, "\n"); - } - fclose(dump); -} -__inline void printdbg_data_int(const char *filename, unsigned int *data, - unsigned int num_ints) { - FILE *dump = fopen((const char *)filename, "wt"); - for (unsigned int i = 0; i < num_ints; i++) { - fprintf(dump, "%d: %d\n", i, data[i]); - } - fclose(dump); -} - -__inline void printdbg_gpu_data_detailed(FILE *gpudump, unsigned int *cw32, - unsigned int *cw32len, - unsigned int *cw32idx, - unsigned int num_elements) { - for (unsigned int i = 0; i < num_elements; i++) { - fprintf(gpudump, "bp: %d, kc: %d, startbit: %d, cwlen: %d, cw:\t\t", - cw32idx[i], cw32idx[i] / 32, cw32idx[i] % 32, cw32len[i]); - // print codeword: - unsigned int mask = 0x80000000; - mask = mask >> (32 - cw32len[i]); - for (unsigned int j = 0; j < cw32len[i]; j++) { - if (cw32[i] & mask) - fprintf(gpudump, "1"); // printf("1"); - else - fprintf(gpudump, "0"); // printf("0"); - mask = mask >> 1; - } - fprintf(gpudump, "\n"); - } -} - -__inline void printdbg_gpu_data_detailed2(const char *filename, - unsigned int *cw32, - unsigned int *cw32len, - unsigned int *cw32idx, - unsigned int num_elements) { - FILE *gpudump = fopen((const char *)filename, "wt"); - for (unsigned int i = 0; i < num_elements; i++) { - fprintf(gpudump, "bp: %d, kc: %d, startbit: %d, cwlen: %d, cw:\t\t", - cw32idx[i], cw32idx[i] / 32, cw32idx[i] % 32, cw32len[i]); - // print codeword: - unsigned int mask = 0x80000000; - mask = mask >> (32 - cw32len[i]); - for (unsigned int j = 0; j < cw32len[i]; j++) { - if (cw32[i] & mask) - fprintf(gpudump, "1"); // printf("1"); - else - fprintf(gpudump, "0"); // printf("0"); - mask = mask >> 1; - } - fprintf(gpudump, "\n"); - } - fclose(gpudump); -} - -/************************************************************************/ -/* BIT PRINTS */ -/************************************************************************/ -__inline void printBits(unsigned char number) { - unsigned char mask = 0x80; - for (unsigned int j = 0; j < 8; j++) { - if (number & mask) - printf("1"); - else - printf("0"); - mask = mask >> 1; - } - printf(" "); -} -__inline void print32Bits(unsigned int number) { - unsigned int mask = 0x80000000; - for (unsigned int j = 0; j < 32; j++) { - if (number & mask) - printf("1"); - else - printf("0"); - mask = mask >> 1; - } - printf("\n"); -} -__inline void print32BitsM(unsigned int marker) { - for (unsigned int j = 0; j < 32; j++) { - if (marker == (j + 1)) - printf("|"); - else - printf("."); - } - printf("\n"); -} -__inline void print_array_char_as_bits(unsigned char *a, unsigned int len) { - - printf( - " ========================= Printing vector =======================\n"); - printf("Total number of elements is %d\n", len); - for (unsigned int i = 0; i < len; i++) { - printf("a[%d]=%d \t", i, a[i]); - printBits(a[i]); - printf("\n"); - } - printf("\n"); - printf( - " ==================================================================\n"); -} - -__inline void print_array_ints_as_bits(unsigned int *a, unsigned int len) { - - printf( - " ========================= Printing vector =======================\n"); - for (unsigned int i = 0; i < len; i++) { - print32Bits(a[i]); - printf("\n"); - } - printf("\n"); - printf( - " ==================================================================\n"); -} - -__inline void print_compare_array_ints_as_bits(unsigned int *a, unsigned int *b, - unsigned int len) { - - printf( - " ========================= Printing vector =======================\n"); - for (unsigned int i = 0; i < len; i++) { - print32Bits(a[i]); - print32Bits(b[i]); - printf("\n"); - } - printf("\n"); - printf( - " ==================================================================\n"); -} - -__inline void print_array_in_hex(unsigned int *a, unsigned int len) { - - printf( - " ========================= Printing vector =======================\n"); - // printf("Total number of elements is %d\n", len); - for (unsigned int i = 0; i < len; i++) { - printf("%#X\t", a[i]); - } - - printf("\n"); - printf( - " ==================================================================\n"); -} - -/************************************************************************/ -/* ARRAY PRINTS */ -/***********************************************************************/ - -template __inline void print_array(T *a, unsigned int len) { - - printf( - " ========================= Printing vector =======================\n"); - printf("Total number of elements is %d\n", len); - for (unsigned int i = 0; i < len; i++) { - printf("a[%d]=%d \t", i, a[i]); - } - - printf("\n"); - printf( - " ==================================================================\n"); -} - -template -__inline void print_rled_arrays(ST *rle_symbols, CT *rle_counts, - unsigned int rle_len) { - ST current_symbol; - CT current_count; - printf(" ========================= Printing RLE vector " - "=======================\n"); - printf(" Total number of RL Pairs is %d\n", rle_len); - for (unsigned int k = 0; k < rle_len; k++) { - current_symbol = rle_symbols[k]; - current_count = rle_counts[k]; - printf("(%d,%d) ,\t", current_symbol, current_count); - } - printf("\n"); -} - -__inline void print_packed_rle_array(unsigned int *rle, unsigned int rle_len) { - unsigned short current_symbol; - unsigned short current_count; - printf(" ========================= Printing RLE vector " - "=======================\n"); - printf(" Total number of RL Pairs is %d\n", rle_len); - for (unsigned int k = 0; k < rle_len; k++) { - current_symbol = (unsigned short)(rle[k] >> 16); // get the higher half-word - current_count = - (unsigned short)rle[k] & 0x0000FFFFF; // get the shorter half-word - printf("(%d,%d) ,\t", current_symbol, current_count); - } - printf("\n"); -} - -#endif // _PRINT_HELPERS_H_ diff --git a/examples/huffman/run.sh b/examples/huffman/run.sh deleted file mode 100644 index 97c57ec..0000000 --- a/examples/huffman/run.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -e -# clang++ main_test_cu.cu --cuda-path=/usr/local/cuda-10.1 --cuda-gpu-arch=sm_61 -L/usr/local/cuda-10.1/lib64 -lcudart_static -ldl -lrt -pthread -save-temps -v -clang -c -emit-llvm cpuencode.cpp -llvm-as main_test_cu-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as main_test_cu-host-x86_64-unknown-linux-gnu.ll - -../../build/compilation/kernelTranslator main_test_cu-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator main_test_cu-host-x86_64-unknown-linux-gnu.bc host.bc - -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc -llc --relocation-model=pic --filetype=obj cpuencode.bc - -g++ -Wall -L../../build/runtime \ - -L../../build/runtime/threadPool -o pavle \ - -fPIC -no-pie host.o kernel.o cpuencode.o -lc -lx86Runtime -lthreadPool -lpthread - -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -./pavle ../../rodinia-data/huffman/test1024_H2.206587175259.in diff --git a/examples/huffman/scan.cu b/examples/huffman/scan.cu deleted file mode 100755 index 2dd0ddf..0000000 --- a/examples/huffman/scan.cu +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO USER: - * - * This source code is subject to NVIDIA ownership rights under U.S. and - * international Copyright laws. - * - * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE - * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR - * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, - * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE - * OR PERFORMANCE OF THIS SOURCE CODE. - * - * U.S. Government End Users. This source code is a "commercial item" as - * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - * "commercial computer software" and "commercial computer software - * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) - * and is provided to the U.S. Government only as a commercial end item. - * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through - * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the - * source code with only those rights set forth herein. - */ - -#ifndef _PRESCAN_CU_ -#define _PRESCAN_CU_ - -// includes, kernels -#include "cutil.h" -#include "scanLargeArray_kernel.cu" -#include -#include - -#define max(a, b) (a > b ? a : b) -inline bool isPowerOfTwo(int n) { return ((n & (n - 1)) == 0); } - -inline int floorPow2(int n) { -#ifdef WIN32 - // method 2 - return 1 << (int)logb((float)n); -#else - // method 1 - // float nf = (float)n; - // return 1 << (((*(int*)&nf) >> 23) - 127); - int exp; - frexp((float)n, &exp); - return 1 << (exp - 1); -#endif -} - -#define BLOCK_SIZE 256 - -static unsigned int **g_scanBlockSums; -static unsigned int g_numEltsAllocated = 0; -static unsigned int g_numLevelsAllocated = 0; - -static void preallocBlockSums(unsigned int maxNumElements) { - assert(g_numEltsAllocated == 0); // shouldn't be called - - g_numEltsAllocated = maxNumElements; - - unsigned int blockSize = BLOCK_SIZE; // max size of the thread blocks - unsigned int numElts = maxNumElements; - int level = 0; - - do { - unsigned int numBlocks = - max(1, (int)ceil((float)numElts / (2.f * blockSize))); - if (numBlocks > 1) - level++; - numElts = numBlocks; - } while (numElts > 1); - - g_scanBlockSums = (unsigned int **)malloc(level * sizeof(unsigned int *)); - g_numLevelsAllocated = level; - numElts = maxNumElements; - level = 0; - - do { - unsigned int numBlocks = - max(1, (int)ceil((float)numElts / (2.f * blockSize))); - if (numBlocks > 1) - CUDA_SAFE_CALL(cudaMalloc((void **)&g_scanBlockSums[level++], - numBlocks * sizeof(unsigned int))); - numElts = numBlocks; - } while (numElts > 1); - - CUT_CHECK_ERROR("preallocBlockSums"); -} - -static void deallocBlockSums() { - for (unsigned int i = 0; i < g_numLevelsAllocated; i++) { - cudaFree(g_scanBlockSums[i]); - } - - CUT_CHECK_ERROR("deallocBlockSums"); - - free((void **)g_scanBlockSums); - - g_scanBlockSums = 0; - g_numEltsAllocated = 0; - g_numLevelsAllocated = 0; -} - -static void prescanArrayRecursive(unsigned int *outArray, - const unsigned int *inArray, int numElements, - int level) { - unsigned int blockSize = BLOCK_SIZE; // max size of the thread blocks - unsigned int numBlocks = - max(1, (int)ceil((float)numElements / (2.f * blockSize))); - unsigned int numThreads; - - if (numBlocks > 1) - numThreads = blockSize; - else if (isPowerOfTwo(numElements)) - numThreads = numElements / 2; - else - numThreads = floorPow2(numElements); - - unsigned int numEltsPerBlock = numThreads * 2; - - // if this is a non-power-of-2 array, the last block will be non-full - // compute the smallest power of 2 able to compute its scan. - unsigned int numEltsLastBlock = - numElements - (numBlocks - 1) * numEltsPerBlock; - unsigned int numThreadsLastBlock = max(1, numEltsLastBlock / 2); - unsigned int np2LastBlock = 0; - unsigned int sharedMemLastBlock = 0; - - if (numEltsLastBlock != numEltsPerBlock) { - np2LastBlock = 1; - - if (!isPowerOfTwo(numEltsLastBlock)) - numThreadsLastBlock = floorPow2(numEltsLastBlock); - - unsigned int extraSpace = (2 * numThreadsLastBlock) / NUM_BANKS; - sharedMemLastBlock = - sizeof(unsigned int) * (2 * numThreadsLastBlock + extraSpace); - } - - // padding space is used to avoid shared memory bank conflicts - unsigned int extraSpace = numEltsPerBlock / NUM_BANKS; - unsigned int sharedMemSize = - sizeof(unsigned int) * (numEltsPerBlock + extraSpace); - -#ifdef DEBUG - if (numBlocks > 1) { - assert(g_numEltsAllocated >= numElements); - } -#endif - - // setup execution parameters - // if NP2, we process the last block separately - dim3 grid(max(1, numBlocks - np2LastBlock), 1, 1); - dim3 threads(numThreads, 1, 1); - - // make sure there are no CUDA errors before we start - CUT_CHECK_ERROR("prescanArrayRecursive before kernels"); - - // execute the scan - if (numBlocks > 1) { - prescan<<>>( - outArray, inArray, g_scanBlockSums[level], numThreads * 2, 0, 0); - cudaThreadSynchronize(); - CUT_CHECK_ERROR("prescanWithBlockSums"); - if (np2LastBlock) { - prescan<<<1, numThreadsLastBlock>>>( - outArray, inArray, g_scanBlockSums[level], numEltsLastBlock, - numBlocks - 1, numElements - numEltsLastBlock); - cudaThreadSynchronize(); - CUT_CHECK_ERROR("prescanNP2WithBlockSums"); - } - - // After scanning all the sub-blocks, we are mostly done. But now we - // need to take all of the last values of the sub-blocks and scan those. - // This will give us a new value that must be sdded to each block to - // get the final results. - // recursive (CPU) call - prescanArrayRecursive(g_scanBlockSums[level], g_scanBlockSums[level], - numBlocks, level + 1); - - uniformAdd<<>>(outArray, g_scanBlockSums[level], - numElements - numEltsLastBlock, 0, 0); - cudaThreadSynchronize(); - CUT_CHECK_ERROR("uniformAdd"); - if (np2LastBlock) { - uniformAdd<<<1, numThreadsLastBlock>>>(outArray, g_scanBlockSums[level], - numEltsLastBlock, numBlocks - 1, - numElements - numEltsLastBlock); - cudaThreadSynchronize(); - CUT_CHECK_ERROR("uniformAdd"); - } - } else if (isPowerOfTwo(numElements)) { - prescan - <<>>(outArray, inArray, 0, numThreads * 2, 0, 0); - cudaThreadSynchronize(); - CUT_CHECK_ERROR("prescan"); - } else { - prescan - <<>>(outArray, inArray, 0, numElements, 0, 0); - cudaThreadSynchronize(); - CUT_CHECK_ERROR("prescanNP2"); - } -} - -static void prescanArray(unsigned int *outArray, unsigned int *inArray, - int numElements) { - prescanArrayRecursive(outArray, inArray, numElements, 0); -} - -#endif // _PRESCAN_CU_ diff --git a/examples/huffman/scanLargeArray_kernel.cu b/examples/huffman/scanLargeArray_kernel.cu deleted file mode 100644 index acfca30..0000000 --- a/examples/huffman/scanLargeArray_kernel.cu +++ /dev/null @@ -1,237 +0,0 @@ -/* - * Copyright 1993-2006 NVIDIA Corporation. All rights reserved. - * - * NOTICE TO USER: - * - * This source code is subject to NVIDIA ownership rights under U.S. and - * international Copyright laws. - * - * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE - * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR - * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH - * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. - * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, - * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS - * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE - * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE - * OR PERFORMANCE OF THIS SOURCE CODE. - * - * U.S. Government End Users. This source code is a "commercial item" as - * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of - * "commercial computer software" and "commercial computer software - * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995) - * and is provided to the U.S. Government only as a commercial end item. - * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through - * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the - * source code with only those rights set forth herein. - */ - -#ifndef _SCAN_BEST_KERNEL_CU_ -#define _SCAN_BEST_KERNEL_CU_ - -// Define this to more rigorously avoid bank conflicts, -// even at the lower (root) levels of the tree -// Note that due to the higher addressing overhead, performance -// is lower with ZERO_BANK_CONFLICTS enabled. It is provided -// as an example. -//#define ZERO_BANK_CONFLICTS - -// 16 banks on G80 -#define NUM_BANKS 16 -#define LOG_NUM_BANKS 4 - -#ifdef ZERO_BANK_CONFLICTS -#define CONFLICT_FREE_OFFSET(index) \ - ((index) >> LOG_NUM_BANKS + (index) >> (2 * LOG_NUM_BANKS)) -#else -#define CONFLICT_FREE_OFFSET(index) ((index) >> LOG_NUM_BANKS) -#endif - -/////////////////////////////////////////////////////////////////////////////// -// Work-efficient compute implementation of scan, one thread per 2 elements -// Work-efficient: O(log(n)) steps, and O(n) adds. -// Also shared storage efficient: Uses n + n/NUM_BANKS shared memory -- no -// ping-ponging Also avoids most bank conflicts using single-element offsets -// every NUM_BANKS elements. -// -// In addition, If ZERO_BANK_CONFLICTS is defined, uses -// n + n/NUM_BANKS + n/(NUM_BANKS*NUM_BANKS) -// shared memory. If ZERO_BANK_CONFLICTS is defined, avoids ALL bank conflicts -// using single-element offsets every NUM_BANKS elements, plus additional -// single-element offsets after every NUM_BANKS^2 elements. -// -// Uses a balanced tree type algorithm. See Blelloch, 1990 "Prefix Sums -// and Their Applications", or Prins and Chatterjee PRAM course notes: -// http://www.cs.unc.edu/~prins/Classes/203/Handouts/pram.pdf -// -// This work-efficient version is based on the algorithm presented in Guy -// Blelloch's excellent paper "Prefix sums and their applications". -// http://www-2.cs.cmu.edu/afs/cs.cmu.edu/project/scandal/public/papers/CMU-CS-90-190.html -// -// Pro: Work Efficient, very few bank conflicts (or zero if ZERO_BANK_CONFLICTS -// is defined) Con: More instructions to compute bank-conflict-free shared -// memory addressing, and slightly more shared memory storage used. -// - -template -__device__ static void -loadSharedChunkFromMem(unsigned int *s_data, const unsigned int *g_idata, int n, - int baseIndex, int &ai, int &bi, int &mem_ai, - int &mem_bi, int &bankOffsetA, int &bankOffsetB) { - int thid = threadIdx.x; - mem_ai = baseIndex + threadIdx.x; - mem_bi = mem_ai + blockDim.x; - - ai = thid; - bi = thid + blockDim.x; - - // compute spacing to avoid bank conflicts - bankOffsetA = CONFLICT_FREE_OFFSET(ai); - bankOffsetB = CONFLICT_FREE_OFFSET(bi); - - // Cache the computational window in shared memory - // pad values beyond n with zeros - s_data[ai + bankOffsetA] = g_idata[mem_ai]; - - if (isNP2) // compile-time decision - { - s_data[bi + bankOffsetB] = (bi < n) ? g_idata[mem_bi] : 0; - } else { - s_data[bi + bankOffsetB] = g_idata[mem_bi]; - } -} - -template -__device__ static void -storeSharedChunkToMem(unsigned int *g_odata, const unsigned int *s_data, int n, - int ai, int bi, int mem_ai, int mem_bi, int bankOffsetA, - int bankOffsetB) { - __syncthreads(); - - // write results to global memory - g_odata[mem_ai] = s_data[ai + bankOffsetA]; - if (isNP2) // compile-time decision - { - if (bi < n) - g_odata[mem_bi] = s_data[bi + bankOffsetB]; - } else { - g_odata[mem_bi] = s_data[bi + bankOffsetB]; - } -} - -template -__device__ static void clearLastElement(unsigned int *s_data, - unsigned int *g_blockSums, - int blockIndex) { - if (threadIdx.x == 0) { - int index = (blockDim.x << 1) - 1; - index += CONFLICT_FREE_OFFSET(index); - - if (storeSum) // compile-time decision - { - // write this block's total sum to the corresponding index in the - // blockSums array - g_blockSums[blockIndex] = s_data[index]; - } - - // zero the last element in the scan so it will propagate back to the front - s_data[index] = 0; - } -} - -__device__ static unsigned int buildSum(unsigned int *s_data) { - unsigned int thid = threadIdx.x; - unsigned int stride = 1; - - // build the sum in place up the tree - for (int d = blockDim.x; d > 0; d >>= 1) { - __syncthreads(); - - if (thid < d) { - int i = __mul24(__mul24(2, stride), thid); - int ai = i + stride - 1; - int bi = ai + stride; - - ai += CONFLICT_FREE_OFFSET(ai); - bi += CONFLICT_FREE_OFFSET(bi); - - s_data[bi] += s_data[ai]; - } - - stride *= 2; - } - - return stride; -} - -__device__ static void scanRootToLeaves(unsigned int *s_data, - unsigned int stride) { - unsigned int thid = threadIdx.x; - - // traverse down the tree building the scan in place - for (int d = 1; d <= blockDim.x; d *= 2) { - stride >>= 1; - - __syncthreads(); - - if (thid < d) { - int i = __mul24(__mul24(2, stride), thid); - int ai = i + stride - 1; - int bi = ai + stride; - - ai += CONFLICT_FREE_OFFSET(ai); - bi += CONFLICT_FREE_OFFSET(bi); - - unsigned int t = s_data[ai]; - s_data[ai] = s_data[bi]; - s_data[bi] += t; - } - } -} - -template -__device__ static void prescanBlock(unsigned int *data, int blockIndex, - unsigned int *blockSums) { - int stride = buildSum(data); // build the sum in place up the tree - clearLastElement(data, blockSums, - (blockIndex == 0) ? blockIdx.x : blockIndex); - scanRootToLeaves(data, stride); // traverse down tree to build the scan -} - -template -__global__ static void -prescan(unsigned int *g_odata, const unsigned int *g_idata, - unsigned int *g_blockSums, int n, int blockIndex, int baseIndex) { - int ai, bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB; - __shared__ unsigned int s_data[3072]; - - // load data into shared memory - loadSharedChunkFromMem( - s_data, g_idata, n, - (baseIndex == 0) ? __mul24(blockIdx.x, (blockDim.x << 1)) : baseIndex, ai, - bi, mem_ai, mem_bi, bankOffsetA, bankOffsetB); - // scan the data in each block - prescanBlock(s_data, blockIndex, g_blockSums); - // write results to device memory - storeSharedChunkToMem(g_odata, s_data, n, ai, bi, mem_ai, mem_bi, - bankOffsetA, bankOffsetB); -} - -__global__ static void uniformAdd(unsigned int *g_data, unsigned int *uniforms, - int n, int blockOffset, int baseIndex) { - __shared__ unsigned int uni; - if (threadIdx.x == 0) - uni = uniforms[blockIdx.x + blockOffset]; - - unsigned int address = - __mul24(blockIdx.x, (blockDim.x << 1)) + baseIndex + threadIdx.x; - - __syncthreads(); - - // note two adds per thread - g_data[address] += uni; - g_data[address + blockDim.x] += (threadIdx.x + blockDim.x < n) * uni; -} - -#endif // #ifndef _SCAN_BEST_KERNEL_CU_ diff --git a/examples/huffman/stats_logger.cpp b/examples/huffman/stats_logger.cpp deleted file mode 100644 index 460efac..0000000 --- a/examples/huffman/stats_logger.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2009 Tjark Bringewat. All rights reserved. - */ - -#include "stats_logger.h" -#include "stdafx.h" -#include -#include -#include - -std::map filenames; - -void LogStats(const char *graphname, const char *seriesname, float xValue, - float yValue, const char *xAxisQuantity, - const char *yAxisQuantity, const char *xAxisUnit, - const char *yAxisUnit, const char *xAxisScaleType, - const char *yAxisScaleType, unsigned int seriesnumber, - const char *description) { - std::ostringstream temp, temp2; - temp << graphname << "__" << seriesname; - size_t exists = filenames.count(temp.str()); - if (!exists) - filenames[temp.str()] = seriesnumber; - temp2 << graphname << "__" << filenames[temp.str()] << "_" << seriesname - << ".txt"; - FILE *f; - if (!exists) { - f = fopen(temp2.str().c_str(), "wt"); - fprintf(f, "SERIES_NAME\n%s\n", seriesname); - fprintf(f, "X_AXIS_QUANTITY\n%s\n", xAxisQuantity); - fprintf(f, "Y_AXIS_QUANTITY\n%s\n", yAxisQuantity); - fprintf(f, "X_AXIS_UNIT\n%s\n", xAxisUnit); - fprintf(f, "Y_AXIS_UNIT\n%s\n", yAxisUnit); - fprintf(f, "X_AXIS_SCALE_TYPE\n%s\n", xAxisScaleType); - fprintf(f, "Y_AXIS_SCALE_TYPE\n%s\n", yAxisScaleType); - fprintf(f, "DESCRIPTION\n%s\n", description); - fprintf(f, "__DATA__\n"); - } else { - f = fopen(temp2.str().c_str(), "at"); - } - fprintf(f, "%f %f\n", xValue, yValue); - fclose(f); -} diff --git a/examples/huffman/stats_logger.h b/examples/huffman/stats_logger.h deleted file mode 100644 index c9381ab..0000000 --- a/examples/huffman/stats_logger.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Copyright Tjark Bringewat. All rights reserved. - */ - -#ifndef _STATS_LOGGER_H_ -#define _STATS_LOGGER_H_ - -#include -#pragma warning(disable : 4996) - -extern "C" void -LogStats(const char *graphname, const char *seriesname, float xValue, - float yValue, const char *xAxisQuantity, const char *yAxisQuantity, - const char *xAxisUnit = "", const char *yAxisUnit = "", - const char *xAxisScaleType = "lin", const char *yAxisScaleType = "lin", - unsigned int seriesnumber = 0, const char *description = ""); - -inline void LogStats2( - const char *graph, // Groups several functions into one graph. Only appears - // in the file name. - const char *function, // Name of the particular function. Appears in file - // name and legend. - float yValue, float xValue, const char *yAxisName = "Time", - const char *yAxisUnit = "ms", const char *xAxisName = "Data size", - const char *xAxisUnit = "MB", - const char *yAxisScaleType = "lin", // Can be lin or log for linear or - // logarithmic scale, respectively. - const char *xAxisScaleType = "log", - unsigned int fId = - 0, // Determines the order in which different functions are plotted to a - // common graph. Only appears in the file name. - const char *description = "") { - LogStats(graph, function, xValue, yValue, xAxisName, yAxisName, xAxisUnit, - yAxisUnit, xAxisScaleType, yAxisScaleType, fId, description); - if (strcmp(xAxisUnit, "MB") == 0 && strcmp(yAxisUnit, "ms") == 0) { - char buffer[100]; - strcpy(buffer, graph); - strcat(buffer, "_datarate"); - LogStats(buffer, function, xValue, (xValue * 1000.0f) / (yValue * 1024.0f), - xAxisName, "Data rate", xAxisUnit, "GB/s", xAxisScaleType, - yAxisScaleType, fId, description); - } -} - -#endif diff --git a/examples/huffman/stdafx.h b/examples/huffman/stdafx.h deleted file mode 100644 index f75dd45..0000000 --- a/examples/huffman/stdafx.h +++ /dev/null @@ -1,11 +0,0 @@ -#pragma once - -#include "cutil.h" -#include -#include -#include -#include -#include -#include -#include -#include diff --git a/examples/huffman/testdatagen.h b/examples/huffman/testdatagen.h deleted file mode 100644 index 605a5af..0000000 --- a/examples/huffman/testdatagen.h +++ /dev/null @@ -1,83 +0,0 @@ -#ifndef _TESTDATA_GEN_H_ -#define _TESTDATA_GEN_H_ - -#include "parameters.h" - -template -__inline__ void generateRLETestData(T *data, unsigned int num_blocks, - unsigned int num_block_threads) { - unsigned int i, j; - - /* generate first block*/ - for (i = 0; i < num_block_threads; i += 8) { - data[i] = 1; - data[i + 1] = 2; - data[i + 2] = 3; - data[i + 3] = 3; - data[i + 4] = 3; - data[i + 5] = 4; - data[i + 6] = 4; - data[i + 7] = 5; - } - /* copy contents of the first block to all other blocks (for testing only)*/ - for (j = 1; j < num_blocks; j++) - for (i = 0; i < num_block_threads; i++) - *(data + j * num_block_threads + i) = data[i]; -} - -template -__inline__ void generateRLETestDataLongRuns1(T *data, unsigned int num_blocks, - unsigned int num_block_threads, - unsigned int avg_run_len) { - unsigned int i, j; - - /* generate first block*/ - for (i = 0; i < num_block_threads / avg_run_len; i++) - for (j = 0; j < avg_run_len; j++) - data[i * avg_run_len + j] = i; - - /* copy contents of the first block to all other blocks (for testing only)*/ - for (j = 1; j < num_blocks; j++) - for (i = 0; i < num_block_threads; i++) - *(data + j * num_block_threads + i) = data[i]; -} - -// VLE TEST DATA VER2.0 - -// for testing only: generates codewords of the following lengths: 1, 2, 3, 4, -// 4, 5, 6, 7 -// and dummy odewords: 1, 10, 100, 1000, 1000, 10000, 100000, 1000000 -// equals 0x01, 0x02, 0x4, 0x8, 0x8, 0x10, 0x20, 0x40 -// num_symbols =256. Must be multiple of 8. -inline void generateCodewords(unsigned int *codewords, - unsigned int *codewordlens, - unsigned int num_symbols) { - unsigned int idx, i, j, numbits, k; // val, k; - /* Generate codeword lengths*/ - for (j = 0; j < num_symbols / 8; j++) { - for (i = 0; i < 4; i++) { // generate first half of length 1,2 3, 4 - idx = j * 8 + i; - codewordlens[idx] = i % 4 + 1; - } - for (i = 0; i < 4; i++) { // generate first half of length 4, 5, 6, 7 - idx = j * 8 + 4 + i; - codewordlens[idx] = i % 4 + 4; - } - } - /* Generate codewords*/ - for (k = 0; k < num_symbols; k++) { - numbits = codewordlens[k]; - codewords[k] = 0x01 << (numbits - 1); - } -} - -inline void generateData(unsigned int *data, unsigned int num_elements, - unsigned int *codewords, unsigned int *codewordlens, - unsigned int num_symbols) { - unsigned int i; - for (i = 0; i < num_elements; i++) { - data[i] = (unsigned int)(((float)rand() / (RAND_MAX + 1)) * num_symbols); - } -} - -#endif diff --git a/examples/huffman/vlc_kernel_sm64huff.cu b/examples/huffman/vlc_kernel_sm64huff.cu deleted file mode 100755 index 9a88015..0000000 --- a/examples/huffman/vlc_kernel_sm64huff.cu +++ /dev/null @@ -1,160 +0,0 @@ -#ifndef _VLC_SM64HUFF_KERNEL_H_ -#define _VLC_SM64HUFF_KERNEL_H_ - -#include "pabio_kernels_v2.cu" -#include "parameters.h" -#include - -#ifdef SMATOMICS - -/* HUFFMAN-FRIENDLY PAVLE - CHARACTERISTICS: - 1. CACHE CW_LUT INTO SM, LOAD AS 2 INT ARRAYS - 2. PARALLEL PREFIX SUM - 3. PARALLEL BIT I/O USING SHARED-MEMORY ATOMIC OPERATIONS (COMAPTIBLE WITH - CUDA1.3+) - - NOTES & ASSUMPTIONS: - - HUFFMAN-CODING FRIENDLY, SUPPORTS CODEWORDS OF 2X SIZE OF ORIGINAL - SYMBOLS (BYTES). - NUMBER OF THREADS PER BLOCK IS 256; IF YOU WANT TO PLAY - WITH DIFFERENT NUMBERS, THE CW CACHING SHOULD BE MODIFIED (SEE DPT* KERNELS) - - SM usage: 1x size of the input data (REUSE) + size of CWLUT - TURN ON CACHING FOR HIGH ENTROPY DATA! -*/ - -__global__ static void -vlc_encode_kernel_sm64huff(unsigned int *data, const unsigned int *gm_codewords, - const unsigned int *gm_codewordlens, -#ifdef TESTING - unsigned int *cw32, unsigned int *cw32len, - unsigned int *cw32idx, -#endif - unsigned int *out, unsigned int *outidx) { - - unsigned int kn = blockIdx.x * blockDim.x + threadIdx.x; - unsigned int k = threadIdx.x; - unsigned int kc, startbit, wrbits; - - unsigned long long cw64 = 0; - unsigned int val32, codewordlen = 0; - unsigned char tmpbyte, tmpcwlen; - unsigned int tmpcw32; - - __shared__ unsigned int sm[3072]; - __shared__ unsigned int kcmax; - -#ifdef CACHECWLUT - unsigned int *codewords = (unsigned int *)sm; - unsigned int *codewordlens = (unsigned int *)(sm + NUM_SYMBOLS); - unsigned int *as = (unsigned int *)(sm + 2 * NUM_SYMBOLS); - - /* Load the codewords and the original data*/ - codewords[k] = gm_codewords[k]; - codewordlens[k] = gm_codewordlens[k]; - val32 = data[kn]; - __syncthreads(); - for (unsigned int i = 0; i < 4; i++) { - tmpbyte = (unsigned char)(val32 >> ((3 - i) * 8)); - tmpcw32 = codewords[tmpbyte]; - tmpcwlen = codewordlens[tmpbyte]; - cw64 = (cw64 << tmpcwlen) | tmpcw32; - codewordlen += tmpcwlen; - } -#else - unsigned int *as = (unsigned int *)sm; - val32 = data[kn]; - for (unsigned int i = 0; i < 4; i++) { - tmpbyte = (unsigned char)(val32 >> ((3 - i) * 8)); - tmpcw32 = gm_codewords[tmpbyte]; - tmpcwlen = gm_codewordlens[tmpbyte]; - cw64 = (cw64 << tmpcwlen) | tmpcw32; - codewordlen += tmpcwlen; - } -#endif - as[k] = codewordlen; - __syncthreads(); - - /* Prefix sum of codeword lengths (denoted in bits) [inplace implementation] - */ - unsigned int offset = 1; - - /* Build the sum in place up the tree */ - for (unsigned int d = (blockDim.x) >> 1; d > 0; d >>= 1) { - __syncthreads(); - if (k < d) { - unsigned char ai = offset * (2 * k + 1) - 1; - unsigned char bi = offset * (2 * k + 2) - 1; - as[bi] += as[ai]; - } - offset *= 2; - } - - /* scan back down the tree */ - /* clear the last element */ - if (k == 0) - as[blockDim.x - 1] = 0; - - // traverse down the tree building the scan in place - for (unsigned int d = 1; d < blockDim.x; d *= 2) { - offset >>= 1; - __syncthreads(); - if (k < d) { - unsigned char ai = offset * (2 * k + 1) - 1; - unsigned char bi = offset * (2 * k + 2) - 1; - unsigned int t = as[ai]; - as[ai] = as[bi]; - as[bi] += t; - } - } - __syncthreads(); - - if (k == blockDim.x - 1) { - outidx[blockIdx.x] = as[k] + codewordlen; - kcmax = (as[k] + codewordlen) / 32; - // printf("kcmax: %d\n", kcmax); - } - - /* Write the codes */ - kc = as[k] / 32; - startbit = as[k] % 32; - as[k] = 0U; - __syncthreads(); - - /* Part 1*/ - wrbits = codewordlen > (32 - startbit) ? (32 - startbit) : codewordlen; - tmpcw32 = (unsigned int)(cw64 >> (codewordlen - wrbits)); - // if (wrbits == 32) as[kc] = tmpcw32; - // //unnecessary overhead; increases number of branches else - atomicOr(&as[kc], tmpcw32 << (32 - startbit - - wrbits)); // shift left in case it's shorter - // then the available space - codewordlen -= wrbits; - - /*Part 2*/ - if (codewordlen) { - wrbits = codewordlen > 32 ? 32 : codewordlen; - tmpcw32 = - (unsigned int)(cw64 >> (codewordlen - wrbits)) & ((1 << wrbits) - 1); - // if (wrbits == 32) as[kc+1] = tmpcw32; - // else - atomicOr(&as[kc + 1], tmpcw32 << (32 - wrbits)); - codewordlen -= wrbits; - } - - /*Part 3*/ - if (codewordlen) { - tmpcw32 = (unsigned int)(cw64 & ((1 << codewordlen) - 1)); - // if (wrbits == 32) as[kc+2] = tmpcw32; - // else - atomicOr(&as[kc + 2], tmpcw32 << (32 - codewordlen)); - } - - __syncthreads(); - - if (k <= kcmax) - out[kn] = as[k]; -} -////////////////////////////////////////////////////////////////////////////// -#endif - -#endif diff --git a/examples/lud/common-host-x86_64-unknown-linux-gnu.ll b/examples/lud/common-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 194405b..0000000 --- a/examples/lud/common-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,1291 +0,0 @@ -; ModuleID = 'common-host-x86_64-unknown-linux-gnu.bc' -source_filename = "./common/common.c" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct.__stopwatch_t = type { %struct.timeval, %struct.timeval } -%struct.timeval = type { i64, i64 } -%struct.timezone = type { i32, i32 } -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } - -$_ZSt4fabsf = comdat any - -$_ZSt3expf = comdat any - -@.str = private unnamed_addr constant [3 x i8] c"rb\00", align 1 -@.str.1 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 -@.str.2 = private unnamed_addr constant [4 x i8] c"%f \00", align 1 -@.str.3 = private unnamed_addr constant [35 x i8] c"dismatch at (%d, %d): (o)%f (n)%f\0A\00", align 1 -@.str.4 = private unnamed_addr constant [6 x i8] c"PASS\0A\00", align 1 -@.str.5 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @stopwatch_start(%struct.__stopwatch_t* %sw) #0 { -entry: - %sw.addr = alloca %struct.__stopwatch_t*, align 8 - store %struct.__stopwatch_t* %sw, %struct.__stopwatch_t** %sw.addr, align 8 - %0 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %cmp = icmp eq %struct.__stopwatch_t* %0, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - br label %return - -if.end: ; preds = %entry - %1 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %begin = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %1, i32 0, i32 0 - %2 = bitcast %struct.timeval* %begin to i8* - call void @llvm.memset.p0i8.i64(i8* align 8 %2, i8 0, i64 16, i1 false) - %3 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %end = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %3, i32 0, i32 1 - %4 = bitcast %struct.timeval* %end to i8* - call void @llvm.memset.p0i8.i64(i8* align 8 %4, i8 0, i64 16, i1 false) - %5 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %begin1 = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %5, i32 0, i32 0 - %call = call i32 @gettimeofday(%struct.timeval* %begin1, %struct.timezone* null) #5 - br label %return - -return: ; preds = %if.end, %if.then - ret void -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1 immarg) #1 - -; Function Attrs: nounwind -declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #2 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @stopwatch_stop(%struct.__stopwatch_t* %sw) #0 { -entry: - %sw.addr = alloca %struct.__stopwatch_t*, align 8 - store %struct.__stopwatch_t* %sw, %struct.__stopwatch_t** %sw.addr, align 8 - %0 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %cmp = icmp eq %struct.__stopwatch_t* %0, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - br label %return - -if.end: ; preds = %entry - %1 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %end = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %1, i32 0, i32 1 - %call = call i32 @gettimeofday(%struct.timeval* %end, %struct.timezone* null) #5 - br label %return - -return: ; preds = %if.end, %if.then - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local double @get_interval_by_sec(%struct.__stopwatch_t* %sw) #0 { -entry: - %retval = alloca double, align 8 - %sw.addr = alloca %struct.__stopwatch_t*, align 8 - store %struct.__stopwatch_t* %sw, %struct.__stopwatch_t** %sw.addr, align 8 - %0 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %cmp = icmp eq %struct.__stopwatch_t* %0, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - store double 0.000000e+00, double* %retval, align 8 - br label %return - -if.end: ; preds = %entry - %1 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %end = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %1, i32 0, i32 1 - %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %end, i32 0, i32 0 - %2 = load i64, i64* %tv_sec, align 8 - %3 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %begin = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %3, i32 0, i32 0 - %tv_sec1 = getelementptr inbounds %struct.timeval, %struct.timeval* %begin, i32 0, i32 0 - %4 = load i64, i64* %tv_sec1, align 8 - %sub = sub nsw i64 %2, %4 - %conv = sitofp i64 %sub to double - %5 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %end2 = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %5, i32 0, i32 1 - %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %end2, i32 0, i32 1 - %6 = load i64, i64* %tv_usec, align 8 - %7 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %begin3 = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %7, i32 0, i32 0 - %tv_usec4 = getelementptr inbounds %struct.timeval, %struct.timeval* %begin3, i32 0, i32 1 - %8 = load i64, i64* %tv_usec4, align 8 - %sub5 = sub nsw i64 %6, %8 - %conv6 = sitofp i64 %sub5 to double - %div = fdiv double %conv6, 1.000000e+06 - %add = fadd double %conv, %div - store double %add, double* %retval, align 8 - br label %return - -return: ; preds = %if.end, %if.then - %9 = load double, double* %retval, align 8 - ret double %9 -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local i32 @get_interval_by_usec(%struct.__stopwatch_t* %sw) #0 { -entry: - %retval = alloca i32, align 4 - %sw.addr = alloca %struct.__stopwatch_t*, align 8 - store %struct.__stopwatch_t* %sw, %struct.__stopwatch_t** %sw.addr, align 8 - %0 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %cmp = icmp eq %struct.__stopwatch_t* %0, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - store i32 0, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - %1 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %end = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %1, i32 0, i32 1 - %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %end, i32 0, i32 0 - %2 = load i64, i64* %tv_sec, align 8 - %3 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %begin = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %3, i32 0, i32 0 - %tv_sec1 = getelementptr inbounds %struct.timeval, %struct.timeval* %begin, i32 0, i32 0 - %4 = load i64, i64* %tv_sec1, align 8 - %sub = sub nsw i64 %2, %4 - %mul = mul nsw i64 %sub, 1000000 - %5 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %end2 = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %5, i32 0, i32 1 - %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %end2, i32 0, i32 1 - %6 = load i64, i64* %tv_usec, align 8 - %7 = load %struct.__stopwatch_t*, %struct.__stopwatch_t** %sw.addr, align 8 - %begin3 = getelementptr inbounds %struct.__stopwatch_t, %struct.__stopwatch_t* %7, i32 0, i32 0 - %tv_usec4 = getelementptr inbounds %struct.timeval, %struct.timeval* %begin3, i32 0, i32 1 - %8 = load i64, i64* %tv_usec4, align 8 - %sub5 = sub nsw i64 %6, %8 - %add = add nsw i64 %mul, %sub5 - %conv = trunc i64 %add to i32 - store i32 %conv, i32* %retval, align 4 - br label %return - -return: ; preds = %if.end, %if.then - %9 = load i32, i32* %retval, align 4 - ret i32 %9 -} - -; Function Attrs: noinline optnone uwtable -define dso_local i32 @create_matrix_from_file(float** %mp, i8* %filename, i32* %size_p) #3 { -entry: - %retval = alloca i32, align 4 - %mp.addr = alloca float**, align 8 - %filename.addr = alloca i8*, align 8 - %size_p.addr = alloca i32*, align 8 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %size = alloca i32, align 4 - %m = alloca float*, align 8 - %fp = alloca %struct._IO_FILE*, align 8 - store float** %mp, float*** %mp.addr, align 8 - store i8* %filename, i8** %filename.addr, align 8 - store i32* %size_p, i32** %size_p.addr, align 8 - store %struct._IO_FILE* null, %struct._IO_FILE** %fp, align 8 - %0 = load i8*, i8** %filename.addr, align 8 - %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0)) - store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 - %1 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %cmp = icmp eq %struct._IO_FILE* %1, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - store i32 1, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - %2 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i32* %size) - %3 = load i32, i32* %size, align 4 - %conv = sext i32 %3 to i64 - %mul = mul i64 4, %conv - %4 = load i32, i32* %size, align 4 - %conv2 = sext i32 %4 to i64 - %mul3 = mul i64 %mul, %conv2 - %call4 = call noalias i8* @malloc(i64 %mul3) #5 - %5 = bitcast i8* %call4 to float* - store float* %5, float** %m, align 8 - %6 = load float*, float** %m, align 8 - %cmp5 = icmp eq float* %6, null - br i1 %cmp5, label %if.then6, label %if.end8 - -if.then6: ; preds = %if.end - %7 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call7 = call i32 @fclose(%struct._IO_FILE* %7) - store i32 1, i32* %retval, align 4 - br label %return - -if.end8: ; preds = %if.end - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc17, %if.end8 - %8 = load i32, i32* %i, align 4 - %9 = load i32, i32* %size, align 4 - %cmp9 = icmp slt i32 %8, %9 - br i1 %cmp9, label %for.body, label %for.end19 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond10 - -for.cond10: ; preds = %for.inc, %for.body - %10 = load i32, i32* %j, align 4 - %11 = load i32, i32* %size, align 4 - %cmp11 = icmp slt i32 %10, %11 - br i1 %cmp11, label %for.body12, label %for.end - -for.body12: ; preds = %for.cond10 - %12 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %13 = load float*, float** %m, align 8 - %14 = load i32, i32* %i, align 4 - %15 = load i32, i32* %size, align 4 - %mul13 = mul nsw i32 %14, %15 - %idx.ext = sext i32 %mul13 to i64 - %add.ptr = getelementptr inbounds float, float* %13, i64 %idx.ext - %16 = load i32, i32* %j, align 4 - %idx.ext14 = sext i32 %16 to i64 - %add.ptr15 = getelementptr inbounds float, float* %add.ptr, i64 %idx.ext14 - %call16 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %12, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), float* %add.ptr15) - br label %for.inc - -for.inc: ; preds = %for.body12 - %17 = load i32, i32* %j, align 4 - %inc = add nsw i32 %17, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond10 - -for.end: ; preds = %for.cond10 - br label %for.inc17 - -for.inc17: ; preds = %for.end - %18 = load i32, i32* %i, align 4 - %inc18 = add nsw i32 %18, 1 - store i32 %inc18, i32* %i, align 4 - br label %for.cond - -for.end19: ; preds = %for.cond - %19 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call20 = call i32 @fclose(%struct._IO_FILE* %19) - %20 = load i32, i32* %size, align 4 - %21 = load i32*, i32** %size_p.addr, align 8 - store i32 %20, i32* %21, align 4 - %22 = load float*, float** %m, align 8 - %23 = load float**, float*** %mp.addr, align 8 - store float* %22, float** %23, align 8 - store i32 0, i32* %retval, align 4 - br label %return - -return: ; preds = %for.end19, %if.then6, %if.then - %24 = load i32, i32* %retval, align 4 - ret i32 %24 -} - -declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #4 - -declare dso_local i32 @fscanf(%struct._IO_FILE*, i8*, ...) #4 - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #2 - -declare dso_local i32 @fclose(%struct._IO_FILE*) #4 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local i32 @create_matrix_from_random(float** %mp, i32 %size) #0 { -entry: - %retval = alloca i32, align 4 - %mp.addr = alloca float**, align 8 - %size.addr = alloca i32, align 4 - %l = alloca float*, align 8 - %u = alloca float*, align 8 - %m = alloca float*, align 8 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - store float** %mp, float*** %mp.addr, align 8 - store i32 %size, i32* %size.addr, align 4 - %call = call i64 @time(i64* null) #5 - %conv = trunc i64 %call to i32 - call void @srand(i32 %conv) #5 - %0 = load i32, i32* %size.addr, align 4 - %1 = load i32, i32* %size.addr, align 4 - %mul = mul nsw i32 %0, %1 - %conv1 = sext i32 %mul to i64 - %mul2 = mul i64 %conv1, 4 - %call3 = call noalias i8* @malloc(i64 %mul2) #5 - %2 = bitcast i8* %call3 to float* - store float* %2, float** %l, align 8 - %3 = load float*, float** %l, align 8 - %cmp = icmp eq float* %3, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - store i32 1, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - %4 = load i32, i32* %size.addr, align 4 - %5 = load i32, i32* %size.addr, align 4 - %mul4 = mul nsw i32 %4, %5 - %conv5 = sext i32 %mul4 to i64 - %mul6 = mul i64 %conv5, 4 - %call7 = call noalias i8* @malloc(i64 %mul6) #5 - %6 = bitcast i8* %call7 to float* - store float* %6, float** %u, align 8 - %7 = load float*, float** %u, align 8 - %cmp8 = icmp eq float* %7, null - br i1 %cmp8, label %if.then9, label %if.end10 - -if.then9: ; preds = %if.end - %8 = load float*, float** %l, align 8 - %9 = bitcast float* %8 to i8* - call void @free(i8* %9) #5 - store i32 1, i32* %retval, align 4 - br label %return - -if.end10: ; preds = %if.end - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc33, %if.end10 - %10 = load i32, i32* %i, align 4 - %11 = load i32, i32* %size.addr, align 4 - %cmp11 = icmp slt i32 %10, %11 - br i1 %cmp11, label %for.body, label %for.end35 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond12 - -for.cond12: ; preds = %for.inc, %for.body - %12 = load i32, i32* %j, align 4 - %13 = load i32, i32* %size.addr, align 4 - %cmp13 = icmp slt i32 %12, %13 - br i1 %cmp13, label %for.body14, label %for.end - -for.body14: ; preds = %for.cond12 - %14 = load i32, i32* %i, align 4 - %15 = load i32, i32* %j, align 4 - %cmp15 = icmp sgt i32 %14, %15 - br i1 %cmp15, label %if.then16, label %if.else - -if.then16: ; preds = %for.body14 - %call17 = call i32 @rand() #5 - %conv18 = sitofp i32 %call17 to float - %div = fdiv float %conv18, 0x41E0000000000000 - %16 = load float*, float** %l, align 8 - %17 = load i32, i32* %i, align 4 - %18 = load i32, i32* %size.addr, align 4 - %mul19 = mul nsw i32 %17, %18 - %19 = load i32, i32* %j, align 4 - %add = add nsw i32 %mul19, %19 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds float, float* %16, i64 %idxprom - store float %div, float* %arrayidx, align 4 - br label %if.end32 - -if.else: ; preds = %for.body14 - %20 = load i32, i32* %i, align 4 - %21 = load i32, i32* %j, align 4 - %cmp20 = icmp eq i32 %20, %21 - br i1 %cmp20, label %if.then21, label %if.else26 - -if.then21: ; preds = %if.else - %22 = load float*, float** %l, align 8 - %23 = load i32, i32* %i, align 4 - %24 = load i32, i32* %size.addr, align 4 - %mul22 = mul nsw i32 %23, %24 - %25 = load i32, i32* %j, align 4 - %add23 = add nsw i32 %mul22, %25 - %idxprom24 = sext i32 %add23 to i64 - %arrayidx25 = getelementptr inbounds float, float* %22, i64 %idxprom24 - store float 1.000000e+00, float* %arrayidx25, align 4 - br label %if.end31 - -if.else26: ; preds = %if.else - %26 = load float*, float** %l, align 8 - %27 = load i32, i32* %i, align 4 - %28 = load i32, i32* %size.addr, align 4 - %mul27 = mul nsw i32 %27, %28 - %29 = load i32, i32* %j, align 4 - %add28 = add nsw i32 %mul27, %29 - %idxprom29 = sext i32 %add28 to i64 - %arrayidx30 = getelementptr inbounds float, float* %26, i64 %idxprom29 - store float 0.000000e+00, float* %arrayidx30, align 4 - br label %if.end31 - -if.end31: ; preds = %if.else26, %if.then21 - br label %if.end32 - -if.end32: ; preds = %if.end31, %if.then16 - br label %for.inc - -for.inc: ; preds = %if.end32 - %30 = load i32, i32* %j, align 4 - %inc = add nsw i32 %30, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond12 - -for.end: ; preds = %for.cond12 - br label %for.inc33 - -for.inc33: ; preds = %for.end - %31 = load i32, i32* %i, align 4 - %inc34 = add nsw i32 %31, 1 - store i32 %inc34, i32* %i, align 4 - br label %for.cond - -for.end35: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond36 - -for.cond36: ; preds = %for.inc60, %for.end35 - %32 = load i32, i32* %j, align 4 - %33 = load i32, i32* %size.addr, align 4 - %cmp37 = icmp slt i32 %32, %33 - br i1 %cmp37, label %for.body38, label %for.end62 - -for.body38: ; preds = %for.cond36 - store i32 0, i32* %i, align 4 - br label %for.cond39 - -for.cond39: ; preds = %for.inc57, %for.body38 - %34 = load i32, i32* %i, align 4 - %35 = load i32, i32* %size.addr, align 4 - %cmp40 = icmp slt i32 %34, %35 - br i1 %cmp40, label %for.body41, label %for.end59 - -for.body41: ; preds = %for.cond39 - %36 = load i32, i32* %i, align 4 - %37 = load i32, i32* %j, align 4 - %cmp42 = icmp sgt i32 %36, %37 - br i1 %cmp42, label %if.then43, label %if.else48 - -if.then43: ; preds = %for.body41 - %38 = load float*, float** %u, align 8 - %39 = load i32, i32* %j, align 4 - %40 = load i32, i32* %size.addr, align 4 - %mul44 = mul nsw i32 %39, %40 - %41 = load i32, i32* %i, align 4 - %add45 = add nsw i32 %mul44, %41 - %idxprom46 = sext i32 %add45 to i64 - %arrayidx47 = getelementptr inbounds float, float* %38, i64 %idxprom46 - store float 0.000000e+00, float* %arrayidx47, align 4 - br label %if.end56 - -if.else48: ; preds = %for.body41 - %call49 = call i32 @rand() #5 - %conv50 = sitofp i32 %call49 to float - %div51 = fdiv float %conv50, 0x41E0000000000000 - %42 = load float*, float** %u, align 8 - %43 = load i32, i32* %j, align 4 - %44 = load i32, i32* %size.addr, align 4 - %mul52 = mul nsw i32 %43, %44 - %45 = load i32, i32* %i, align 4 - %add53 = add nsw i32 %mul52, %45 - %idxprom54 = sext i32 %add53 to i64 - %arrayidx55 = getelementptr inbounds float, float* %42, i64 %idxprom54 - store float %div51, float* %arrayidx55, align 4 - br label %if.end56 - -if.end56: ; preds = %if.else48, %if.then43 - br label %for.inc57 - -for.inc57: ; preds = %if.end56 - %46 = load i32, i32* %i, align 4 - %inc58 = add nsw i32 %46, 1 - store i32 %inc58, i32* %i, align 4 - br label %for.cond39 - -for.end59: ; preds = %for.cond39 - br label %for.inc60 - -for.inc60: ; preds = %for.end59 - %47 = load i32, i32* %j, align 4 - %inc61 = add nsw i32 %47, 1 - store i32 %inc61, i32* %j, align 4 - br label %for.cond36 - -for.end62: ; preds = %for.cond36 - store i32 0, i32* %i, align 4 - br label %for.cond63 - -for.cond63: ; preds = %for.inc92, %for.end62 - %48 = load i32, i32* %i, align 4 - %49 = load i32, i32* %size.addr, align 4 - %cmp64 = icmp slt i32 %48, %49 - br i1 %cmp64, label %for.body65, label %for.end94 - -for.body65: ; preds = %for.cond63 - store i32 0, i32* %j, align 4 - br label %for.cond66 - -for.cond66: ; preds = %for.inc89, %for.body65 - %50 = load i32, i32* %j, align 4 - %51 = load i32, i32* %size.addr, align 4 - %cmp67 = icmp slt i32 %50, %51 - br i1 %cmp67, label %for.body68, label %for.end91 - -for.body68: ; preds = %for.cond66 - store i32 0, i32* %k, align 4 - br label %for.cond69 - -for.cond69: ; preds = %for.inc86, %for.body68 - %52 = load i32, i32* %k, align 4 - %53 = load i32, i32* %i, align 4 - %54 = load i32, i32* %j, align 4 - %cmp70 = icmp slt i32 %53, %54 - br i1 %cmp70, label %cond.true, label %cond.false - -cond.true: ; preds = %for.cond69 - %55 = load i32, i32* %i, align 4 - br label %cond.end - -cond.false: ; preds = %for.cond69 - %56 = load i32, i32* %j, align 4 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %55, %cond.true ], [ %56, %cond.false ] - %cmp71 = icmp sle i32 %52, %cond - br i1 %cmp71, label %for.body72, label %for.end88 - -for.body72: ; preds = %cond.end - %57 = load float*, float** %l, align 8 - %58 = load i32, i32* %i, align 4 - %59 = load i32, i32* %size.addr, align 4 - %mul73 = mul nsw i32 %58, %59 - %60 = load i32, i32* %k, align 4 - %add74 = add nsw i32 %mul73, %60 - %idxprom75 = sext i32 %add74 to i64 - %arrayidx76 = getelementptr inbounds float, float* %57, i64 %idxprom75 - %61 = load float, float* %arrayidx76, align 4 - %62 = load float*, float** %u, align 8 - %63 = load i32, i32* %j, align 4 - %64 = load i32, i32* %size.addr, align 4 - %mul77 = mul nsw i32 %63, %64 - %65 = load i32, i32* %k, align 4 - %add78 = add nsw i32 %mul77, %65 - %idxprom79 = sext i32 %add78 to i64 - %arrayidx80 = getelementptr inbounds float, float* %62, i64 %idxprom79 - %66 = load float, float* %arrayidx80, align 4 - %mul81 = fmul float %61, %66 - %67 = load float*, float** %m, align 8 - %68 = load i32, i32* %i, align 4 - %69 = load i32, i32* %size.addr, align 4 - %mul82 = mul nsw i32 %68, %69 - %70 = load i32, i32* %j, align 4 - %add83 = add nsw i32 %mul82, %70 - %idxprom84 = sext i32 %add83 to i64 - %arrayidx85 = getelementptr inbounds float, float* %67, i64 %idxprom84 - store float %mul81, float* %arrayidx85, align 4 - br label %for.inc86 - -for.inc86: ; preds = %for.body72 - %71 = load i32, i32* %k, align 4 - %inc87 = add nsw i32 %71, 1 - store i32 %inc87, i32* %k, align 4 - br label %for.cond69 - -for.end88: ; preds = %cond.end - br label %for.inc89 - -for.inc89: ; preds = %for.end88 - %72 = load i32, i32* %j, align 4 - %inc90 = add nsw i32 %72, 1 - store i32 %inc90, i32* %j, align 4 - br label %for.cond66 - -for.end91: ; preds = %for.cond66 - br label %for.inc92 - -for.inc92: ; preds = %for.end91 - %73 = load i32, i32* %i, align 4 - %inc93 = add nsw i32 %73, 1 - store i32 %inc93, i32* %i, align 4 - br label %for.cond63 - -for.end94: ; preds = %for.cond63 - %74 = load float*, float** %l, align 8 - %75 = bitcast float* %74 to i8* - call void @free(i8* %75) #5 - %76 = load float*, float** %u, align 8 - %77 = bitcast float* %76 to i8* - call void @free(i8* %77) #5 - %78 = load float*, float** %m, align 8 - %79 = load float**, float*** %mp.addr, align 8 - store float* %78, float** %79, align 8 - store i32 0, i32* %retval, align 4 - br label %return - -return: ; preds = %for.end94, %if.then9, %if.then - %80 = load i32, i32* %retval, align 4 - ret i32 %80 -} - -; Function Attrs: nounwind -declare dso_local void @srand(i32) #2 - -; Function Attrs: nounwind -declare dso_local i64 @time(i64*) #2 - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #2 - -; Function Attrs: nounwind -declare dso_local i32 @rand() #2 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @matrix_multiply(float* %inputa, float* %inputb, float* %output, i32 %size) #0 { -entry: - %inputa.addr = alloca float*, align 8 - %inputb.addr = alloca float*, align 8 - %output.addr = alloca float*, align 8 - %size.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - store float* %inputa, float** %inputa.addr, align 8 - store float* %inputb, float** %inputb.addr, align 8 - store float* %output, float** %output.addr, align 8 - store i32 %size, i32* %size.addr, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc19, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %size.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end21 - -for.body: ; preds = %for.cond - store i32 0, i32* %k, align 4 - br label %for.cond1 - -for.cond1: ; preds = %for.inc16, %for.body - %2 = load i32, i32* %k, align 4 - %3 = load i32, i32* %size.addr, align 4 - %cmp2 = icmp slt i32 %2, %3 - br i1 %cmp2, label %for.body3, label %for.end18 - -for.body3: ; preds = %for.cond1 - store i32 0, i32* %j, align 4 - br label %for.cond4 - -for.cond4: ; preds = %for.inc, %for.body3 - %4 = load i32, i32* %j, align 4 - %5 = load i32, i32* %size.addr, align 4 - %cmp5 = icmp slt i32 %4, %5 - br i1 %cmp5, label %for.body6, label %for.end - -for.body6: ; preds = %for.cond4 - %6 = load float*, float** %inputa.addr, align 8 - %7 = load i32, i32* %i, align 4 - %8 = load i32, i32* %size.addr, align 4 - %mul = mul nsw i32 %7, %8 - %9 = load i32, i32* %k, align 4 - %add = add nsw i32 %mul, %9 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds float, float* %6, i64 %idxprom - %10 = load float, float* %arrayidx, align 4 - %11 = load float*, float** %inputb.addr, align 8 - %12 = load i32, i32* %k, align 4 - %13 = load i32, i32* %size.addr, align 4 - %mul7 = mul nsw i32 %12, %13 - %14 = load i32, i32* %j, align 4 - %add8 = add nsw i32 %mul7, %14 - %idxprom9 = sext i32 %add8 to i64 - %arrayidx10 = getelementptr inbounds float, float* %11, i64 %idxprom9 - %15 = load float, float* %arrayidx10, align 4 - %mul11 = fmul float %10, %15 - %16 = load float*, float** %output.addr, align 8 - %17 = load i32, i32* %i, align 4 - %18 = load i32, i32* %size.addr, align 4 - %mul12 = mul nsw i32 %17, %18 - %19 = load i32, i32* %j, align 4 - %add13 = add nsw i32 %mul12, %19 - %idxprom14 = sext i32 %add13 to i64 - %arrayidx15 = getelementptr inbounds float, float* %16, i64 %idxprom14 - store float %mul11, float* %arrayidx15, align 4 - br label %for.inc - -for.inc: ; preds = %for.body6 - %20 = load i32, i32* %j, align 4 - %inc = add nsw i32 %20, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond4 - -for.end: ; preds = %for.cond4 - br label %for.inc16 - -for.inc16: ; preds = %for.end - %21 = load i32, i32* %k, align 4 - %inc17 = add nsw i32 %21, 1 - store i32 %inc17, i32* %k, align 4 - br label %for.cond1 - -for.end18: ; preds = %for.cond1 - br label %for.inc19 - -for.inc19: ; preds = %for.end18 - %22 = load i32, i32* %i, align 4 - %inc20 = add nsw i32 %22, 1 - store i32 %inc20, i32* %i, align 4 - br label %for.cond - -for.end21: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local i32 @lud_verify(float* %m, float* %lu, i32 %matrix_dim) #3 { -entry: - %m.addr = alloca float*, align 8 - %lu.addr = alloca float*, align 8 - %matrix_dim.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %k = alloca i32, align 4 - %tmp = alloca float*, align 8 - %sum = alloca float, align 4 - %l = alloca float, align 4 - %u = alloca float, align 4 - store float* %m, float** %m.addr, align 8 - store float* %lu, float** %lu.addr, align 8 - store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 - %0 = load i32, i32* %matrix_dim.addr, align 4 - %1 = load i32, i32* %matrix_dim.addr, align 4 - %mul = mul nsw i32 %0, %1 - %conv = sext i32 %mul to i64 - %mul1 = mul i64 %conv, 4 - %call = call noalias i8* @malloc(i64 %mul1) #5 - %2 = bitcast i8* %call to float* - store float* %2, float** %tmp, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc24, %entry - %3 = load i32, i32* %i, align 4 - %4 = load i32, i32* %matrix_dim.addr, align 4 - %cmp = icmp slt i32 %3, %4 - br i1 %cmp, label %for.body, label %for.end26 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond2 - -for.cond2: ; preds = %for.inc21, %for.body - %5 = load i32, i32* %j, align 4 - %6 = load i32, i32* %matrix_dim.addr, align 4 - %cmp3 = icmp slt i32 %5, %6 - br i1 %cmp3, label %for.body4, label %for.end23 - -for.body4: ; preds = %for.cond2 - store float 0.000000e+00, float* %sum, align 4 - store i32 0, i32* %k, align 4 - br label %for.cond5 - -for.cond5: ; preds = %for.inc, %for.body4 - %7 = load i32, i32* %k, align 4 - %8 = load i32, i32* %i, align 4 - %9 = load i32, i32* %j, align 4 - %cmp6 = icmp slt i32 %8, %9 - br i1 %cmp6, label %cond.true, label %cond.false - -cond.true: ; preds = %for.cond5 - %10 = load i32, i32* %i, align 4 - br label %cond.end - -cond.false: ; preds = %for.cond5 - %11 = load i32, i32* %j, align 4 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %10, %cond.true ], [ %11, %cond.false ] - %cmp7 = icmp sle i32 %7, %cond - br i1 %cmp7, label %for.body8, label %for.end - -for.body8: ; preds = %cond.end - %12 = load i32, i32* %i, align 4 - %13 = load i32, i32* %k, align 4 - %cmp9 = icmp eq i32 %12, %13 - br i1 %cmp9, label %if.then, label %if.else - -if.then: ; preds = %for.body8 - store float 1.000000e+00, float* %l, align 4 - br label %if.end - -if.else: ; preds = %for.body8 - %14 = load float*, float** %lu.addr, align 8 - %15 = load i32, i32* %i, align 4 - %16 = load i32, i32* %matrix_dim.addr, align 4 - %mul10 = mul nsw i32 %15, %16 - %17 = load i32, i32* %k, align 4 - %add = add nsw i32 %mul10, %17 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds float, float* %14, i64 %idxprom - %18 = load float, float* %arrayidx, align 4 - store float %18, float* %l, align 4 - br label %if.end - -if.end: ; preds = %if.else, %if.then - %19 = load float*, float** %lu.addr, align 8 - %20 = load i32, i32* %k, align 4 - %21 = load i32, i32* %matrix_dim.addr, align 4 - %mul11 = mul nsw i32 %20, %21 - %22 = load i32, i32* %j, align 4 - %add12 = add nsw i32 %mul11, %22 - %idxprom13 = sext i32 %add12 to i64 - %arrayidx14 = getelementptr inbounds float, float* %19, i64 %idxprom13 - %23 = load float, float* %arrayidx14, align 4 - store float %23, float* %u, align 4 - %24 = load float, float* %l, align 4 - %25 = load float, float* %u, align 4 - %mul15 = fmul float %24, %25 - %26 = load float, float* %sum, align 4 - %add16 = fadd float %26, %mul15 - store float %add16, float* %sum, align 4 - br label %for.inc - -for.inc: ; preds = %if.end - %27 = load i32, i32* %k, align 4 - %inc = add nsw i32 %27, 1 - store i32 %inc, i32* %k, align 4 - br label %for.cond5 - -for.end: ; preds = %cond.end - %28 = load float, float* %sum, align 4 - %29 = load float*, float** %tmp, align 8 - %30 = load i32, i32* %i, align 4 - %31 = load i32, i32* %matrix_dim.addr, align 4 - %mul17 = mul nsw i32 %30, %31 - %32 = load i32, i32* %j, align 4 - %add18 = add nsw i32 %mul17, %32 - %idxprom19 = sext i32 %add18 to i64 - %arrayidx20 = getelementptr inbounds float, float* %29, i64 %idxprom19 - store float %28, float* %arrayidx20, align 4 - br label %for.inc21 - -for.inc21: ; preds = %for.end - %33 = load i32, i32* %j, align 4 - %inc22 = add nsw i32 %33, 1 - store i32 %inc22, i32* %j, align 4 - br label %for.cond2 - -for.end23: ; preds = %for.cond2 - br label %for.inc24 - -for.inc24: ; preds = %for.end23 - %34 = load i32, i32* %i, align 4 - %inc25 = add nsw i32 %34, 1 - store i32 %inc25, i32* %i, align 4 - br label %for.cond - -for.end26: ; preds = %for.cond - store i32 0, i32* %i, align 4 - br label %for.cond27 - -for.cond27: ; preds = %for.inc60, %for.end26 - %35 = load i32, i32* %i, align 4 - %36 = load i32, i32* %matrix_dim.addr, align 4 - %cmp28 = icmp slt i32 %35, %36 - br i1 %cmp28, label %for.body29, label %for.end62 - -for.body29: ; preds = %for.cond27 - store i32 0, i32* %j, align 4 - br label %for.cond30 - -for.cond30: ; preds = %for.inc57, %for.body29 - %37 = load i32, i32* %j, align 4 - %38 = load i32, i32* %matrix_dim.addr, align 4 - %cmp31 = icmp slt i32 %37, %38 - br i1 %cmp31, label %for.body32, label %for.end59 - -for.body32: ; preds = %for.cond30 - %39 = load float*, float** %m.addr, align 8 - %40 = load i32, i32* %i, align 4 - %41 = load i32, i32* %matrix_dim.addr, align 4 - %mul33 = mul nsw i32 %40, %41 - %42 = load i32, i32* %j, align 4 - %add34 = add nsw i32 %mul33, %42 - %idxprom35 = sext i32 %add34 to i64 - %arrayidx36 = getelementptr inbounds float, float* %39, i64 %idxprom35 - %43 = load float, float* %arrayidx36, align 4 - %44 = load float*, float** %tmp, align 8 - %45 = load i32, i32* %i, align 4 - %46 = load i32, i32* %matrix_dim.addr, align 4 - %mul37 = mul nsw i32 %45, %46 - %47 = load i32, i32* %j, align 4 - %add38 = add nsw i32 %mul37, %47 - %idxprom39 = sext i32 %add38 to i64 - %arrayidx40 = getelementptr inbounds float, float* %44, i64 %idxprom39 - %48 = load float, float* %arrayidx40, align 4 - %sub = fsub float %43, %48 - %call41 = call float @_ZSt4fabsf(float %sub) - %conv42 = fpext float %call41 to double - %cmp43 = fcmp ogt double %conv42, 1.000000e-04 - br i1 %cmp43, label %if.then44, label %if.end56 - -if.then44: ; preds = %for.body32 - %49 = load i32, i32* %i, align 4 - %50 = load i32, i32* %j, align 4 - %51 = load float*, float** %m.addr, align 8 - %52 = load i32, i32* %i, align 4 - %53 = load i32, i32* %matrix_dim.addr, align 4 - %mul45 = mul nsw i32 %52, %53 - %54 = load i32, i32* %j, align 4 - %add46 = add nsw i32 %mul45, %54 - %idxprom47 = sext i32 %add46 to i64 - %arrayidx48 = getelementptr inbounds float, float* %51, i64 %idxprom47 - %55 = load float, float* %arrayidx48, align 4 - %conv49 = fpext float %55 to double - %56 = load float*, float** %tmp, align 8 - %57 = load i32, i32* %i, align 4 - %58 = load i32, i32* %matrix_dim.addr, align 4 - %mul50 = mul nsw i32 %57, %58 - %59 = load i32, i32* %j, align 4 - %add51 = add nsw i32 %mul50, %59 - %idxprom52 = sext i32 %add51 to i64 - %arrayidx53 = getelementptr inbounds float, float* %56, i64 %idxprom52 - %60 = load float, float* %arrayidx53, align 4 - %conv54 = fpext float %60 to double - %call55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.3, i64 0, i64 0), i32 %49, i32 %50, double %conv49, double %conv54) - br label %if.end56 - -if.end56: ; preds = %if.then44, %for.body32 - br label %for.inc57 - -for.inc57: ; preds = %if.end56 - %61 = load i32, i32* %j, align 4 - %inc58 = add nsw i32 %61, 1 - store i32 %inc58, i32* %j, align 4 - br label %for.cond30 - -for.end59: ; preds = %for.cond30 - br label %for.inc60 - -for.inc60: ; preds = %for.end59 - %62 = load i32, i32* %i, align 4 - %inc61 = add nsw i32 %62, 1 - store i32 %inc61, i32* %i, align 4 - br label %for.cond27 - -for.end62: ; preds = %for.cond27 - %call63 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.4, i64 0, i64 0)) - %63 = load float*, float** %tmp, align 8 - %64 = bitcast float* %63 to i8* - call void @free(i8* %64) #5 - ret i32 0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local float @_ZSt4fabsf(float %__x) #0 comdat { -entry: - %__x.addr = alloca float, align 4 - store float %__x, float* %__x.addr, align 4 - %0 = load float, float* %__x.addr, align 4 - %1 = call float @llvm.fabs.f32(float %0) - ret float %1 -} - -declare dso_local i32 @printf(i8*, ...) #4 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @matrix_duplicate(float* %src, float** %dst, i32 %matrix_dim) #0 { -entry: - %src.addr = alloca float*, align 8 - %dst.addr = alloca float**, align 8 - %matrix_dim.addr = alloca i32, align 4 - %s = alloca i32, align 4 - %p = alloca float*, align 8 - store float* %src, float** %src.addr, align 8 - store float** %dst, float*** %dst.addr, align 8 - store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 - %0 = load i32, i32* %matrix_dim.addr, align 4 - %1 = load i32, i32* %matrix_dim.addr, align 4 - %mul = mul nsw i32 %0, %1 - %conv = sext i32 %mul to i64 - %mul1 = mul i64 %conv, 4 - %conv2 = trunc i64 %mul1 to i32 - store i32 %conv2, i32* %s, align 4 - %2 = load i32, i32* %s, align 4 - %conv3 = sext i32 %2 to i64 - %call = call noalias i8* @malloc(i64 %conv3) #5 - %3 = bitcast i8* %call to float* - store float* %3, float** %p, align 8 - %4 = load float*, float** %p, align 8 - %5 = bitcast float* %4 to i8* - %6 = load float*, float** %src.addr, align 8 - %7 = bitcast float* %6 to i8* - %8 = load i32, i32* %s, align 4 - %conv4 = sext i32 %8 to i64 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %5, i8* align 4 %7, i64 %conv4, i1 false) - %9 = load float*, float** %p, align 8 - %10 = load float**, float*** %dst.addr, align 8 - store float* %9, float** %10, align 8 - ret void -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @print_matrix(float* %m, i32 %matrix_dim) #3 { -entry: - %m.addr = alloca float*, align 8 - %matrix_dim.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - store float* %m, float** %m.addr, align 8 - store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc5, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %matrix_dim.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end7 - -for.body: ; preds = %for.cond - store i32 0, i32* %j, align 4 - br label %for.cond1 - -for.cond1: ; preds = %for.inc, %for.body - %2 = load i32, i32* %j, align 4 - %3 = load i32, i32* %matrix_dim.addr, align 4 - %cmp2 = icmp slt i32 %2, %3 - br i1 %cmp2, label %for.body3, label %for.end - -for.body3: ; preds = %for.cond1 - %4 = load float*, float** %m.addr, align 8 - %5 = load i32, i32* %i, align 4 - %6 = load i32, i32* %matrix_dim.addr, align 4 - %mul = mul nsw i32 %5, %6 - %7 = load i32, i32* %j, align 4 - %add = add nsw i32 %mul, %7 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds float, float* %4, i64 %idxprom - %8 = load float, float* %arrayidx, align 4 - %conv = fpext float %8 to double - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), double %conv) - br label %for.inc - -for.inc: ; preds = %for.body3 - %9 = load i32, i32* %j, align 4 - %inc = add nsw i32 %9, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond1 - -for.end: ; preds = %for.cond1 - %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.5, i64 0, i64 0)) - br label %for.inc5 - -for.inc5: ; preds = %for.end - %10 = load i32, i32* %i, align 4 - %inc6 = add nsw i32 %10, 1 - store i32 %inc6, i32* %i, align 4 - br label %for.cond - -for.end7: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local i32 @create_matrix(float** %mp, i32 %size) #3 { -entry: - %retval = alloca i32, align 4 - %mp.addr = alloca float**, align 8 - %size.addr = alloca i32, align 4 - %m = alloca float*, align 8 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %lamda = alloca float, align 4 - %saved_stack = alloca i8*, align 8 - %__vla_expr0 = alloca i64, align 8 - %coe_i = alloca float, align 4 - %cleanup.dest.slot = alloca i32, align 4 - store float** %mp, float*** %mp.addr, align 8 - store i32 %size, i32* %size.addr, align 4 - store float 0xBF50624DE0000000, float* %lamda, align 4 - %0 = load i32, i32* %size.addr, align 4 - %mul = mul nsw i32 2, %0 - %sub = sub nsw i32 %mul, 1 - %1 = zext i32 %sub to i64 - %2 = call i8* @llvm.stacksave() - store i8* %2, i8** %saved_stack, align 8 - %vla = alloca float, i64 %1, align 16 - store i64 %1, i64* %__vla_expr0, align 8 - store float 0.000000e+00, float* %coe_i, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %3 = load i32, i32* %i, align 4 - %4 = load i32, i32* %size.addr, align 4 - %cmp = icmp slt i32 %3, %4 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %5 = load float, float* %lamda, align 4 - %6 = load i32, i32* %i, align 4 - %conv = sitofp i32 %6 to float - %mul1 = fmul float %5, %conv - %call = call float @_ZSt3expf(float %mul1) - %mul2 = fmul float 1.000000e+01, %call - store float %mul2, float* %coe_i, align 4 - %7 = load i32, i32* %size.addr, align 4 - %sub3 = sub nsw i32 %7, 1 - %8 = load i32, i32* %i, align 4 - %add = add nsw i32 %sub3, %8 - store i32 %add, i32* %j, align 4 - %9 = load float, float* %coe_i, align 4 - %10 = load i32, i32* %j, align 4 - %idxprom = sext i32 %10 to i64 - %arrayidx = getelementptr inbounds float, float* %vla, i64 %idxprom - store float %9, float* %arrayidx, align 4 - %11 = load i32, i32* %size.addr, align 4 - %sub4 = sub nsw i32 %11, 1 - %12 = load i32, i32* %i, align 4 - %sub5 = sub nsw i32 %sub4, %12 - store i32 %sub5, i32* %j, align 4 - %13 = load float, float* %coe_i, align 4 - %14 = load i32, i32* %j, align 4 - %idxprom6 = sext i32 %14 to i64 - %arrayidx7 = getelementptr inbounds float, float* %vla, i64 %idxprom6 - store float %13, float* %arrayidx7, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %15 = load i32, i32* %i, align 4 - %inc = add nsw i32 %15, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %16 = load i32, i32* %size.addr, align 4 - %conv8 = sext i32 %16 to i64 - %mul9 = mul i64 4, %conv8 - %17 = load i32, i32* %size.addr, align 4 - %conv10 = sext i32 %17 to i64 - %mul11 = mul i64 %mul9, %conv10 - %call12 = call noalias i8* @malloc(i64 %mul11) #5 - %18 = bitcast i8* %call12 to float* - store float* %18, float** %m, align 8 - %19 = load float*, float** %m, align 8 - %cmp13 = icmp eq float* %19, null - br i1 %cmp13, label %if.then, label %if.end - -if.then: ; preds = %for.end - store i32 1, i32* %retval, align 4 - store i32 1, i32* %cleanup.dest.slot, align 4 - br label %cleanup - -if.end: ; preds = %for.end - store i32 0, i32* %i, align 4 - br label %for.cond14 - -for.cond14: ; preds = %for.inc32, %if.end - %20 = load i32, i32* %i, align 4 - %21 = load i32, i32* %size.addr, align 4 - %cmp15 = icmp slt i32 %20, %21 - br i1 %cmp15, label %for.body16, label %for.end34 - -for.body16: ; preds = %for.cond14 - store i32 0, i32* %j, align 4 - br label %for.cond17 - -for.cond17: ; preds = %for.inc29, %for.body16 - %22 = load i32, i32* %j, align 4 - %23 = load i32, i32* %size.addr, align 4 - %cmp18 = icmp slt i32 %22, %23 - br i1 %cmp18, label %for.body19, label %for.end31 - -for.body19: ; preds = %for.cond17 - %24 = load i32, i32* %size.addr, align 4 - %sub20 = sub nsw i32 %24, 1 - %25 = load i32, i32* %i, align 4 - %sub21 = sub nsw i32 %sub20, %25 - %26 = load i32, i32* %j, align 4 - %add22 = add nsw i32 %sub21, %26 - %idxprom23 = sext i32 %add22 to i64 - %arrayidx24 = getelementptr inbounds float, float* %vla, i64 %idxprom23 - %27 = load float, float* %arrayidx24, align 4 - %28 = load float*, float** %m, align 8 - %29 = load i32, i32* %i, align 4 - %30 = load i32, i32* %size.addr, align 4 - %mul25 = mul nsw i32 %29, %30 - %31 = load i32, i32* %j, align 4 - %add26 = add nsw i32 %mul25, %31 - %idxprom27 = sext i32 %add26 to i64 - %arrayidx28 = getelementptr inbounds float, float* %28, i64 %idxprom27 - store float %27, float* %arrayidx28, align 4 - br label %for.inc29 - -for.inc29: ; preds = %for.body19 - %32 = load i32, i32* %j, align 4 - %inc30 = add nsw i32 %32, 1 - store i32 %inc30, i32* %j, align 4 - br label %for.cond17 - -for.end31: ; preds = %for.cond17 - br label %for.inc32 - -for.inc32: ; preds = %for.end31 - %33 = load i32, i32* %i, align 4 - %inc33 = add nsw i32 %33, 1 - store i32 %inc33, i32* %i, align 4 - br label %for.cond14 - -for.end34: ; preds = %for.cond14 - %34 = load float*, float** %m, align 8 - %35 = load float**, float*** %mp.addr, align 8 - store float* %34, float** %35, align 8 - store i32 0, i32* %retval, align 4 - store i32 1, i32* %cleanup.dest.slot, align 4 - br label %cleanup - -cleanup: ; preds = %for.end34, %if.then - %36 = load i8*, i8** %saved_stack, align 8 - call void @llvm.stackrestore(i8* %36) - %37 = load i32, i32* %retval, align 4 - ret i32 %37 -} - -; Function Attrs: nounwind -declare i8* @llvm.stacksave() #5 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local float @_ZSt3expf(float %__x) #0 comdat { -entry: - %__x.addr = alloca float, align 4 - store float %__x, float* %__x.addr, align 4 - %0 = load float, float* %__x.addr, align 4 - %call = call float @expf(float %0) #5 - ret float %call -} - -; Function Attrs: nounwind -declare void @llvm.stackrestore(i8*) #5 - -; Function Attrs: nounwind readnone speculatable willreturn -declare float @llvm.fabs.f32(float) #6 - -; Function Attrs: nounwind -declare dso_local float @expf(float) #2 - -attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind } -attributes #6 = { nounwind readnone speculatable willreturn } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/lud/lud-host-x86_64-unknown-linux-gnu.ll b/examples/lud/lud-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 2044338..0000000 --- a/examples/lud/lud-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,326 +0,0 @@ -; ModuleID = 'lud-host-x86_64-unknown-linux-gnu.bc' -source_filename = "cuda/lud.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct.option = type { i8*, i32, i32*, i32 } -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.__stopwatch_t = type { %struct.timeval, %struct.timeval } -%struct.timeval = type { i64, i64 } - -@.str = private unnamed_addr constant [29 x i8] c"WG size of kernel = %d X %d\0A\00", align 1 -@.str.1 = private unnamed_addr constant [8 x i8] c"::vs:i:\00", align 1 -@_ZL12long_options = internal global [4 x %struct.option] [%struct.option { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.15, i32 0, i32 0), i32 1, i32* null, i32 105 }, %struct.option { i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.16, i32 0, i32 0), i32 1, i32* null, i32 115 }, %struct.option { i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.17, i32 0, i32 0), i32 0, i32* null, i32 118 }, %struct.option zeroinitializer], align 16 -@optarg = external dso_local global i8*, align 8 -@_ZL9do_verify = internal global i32 0, align 4 -@.str.2 = private unnamed_addr constant [44 x i8] c"Generate input matrix internally, size =%d\0A\00", align 1 -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str.3 = private unnamed_addr constant [16 x i8] c"invalid option\0A\00", align 1 -@.str.4 = private unnamed_addr constant [18 x i8] c"missing argument\0A\00", align 1 -@.str.5 = private unnamed_addr constant [47 x i8] c"Usage: %s [-v] [-s matrix_size|-i input_file]\0A\00", align 1 -@optind = external dso_local global i32, align 4 -@.str.6 = private unnamed_addr constant [29 x i8] c"Reading matrix from file %s\0A\00", align 1 -@.str.7 = private unnamed_addr constant [34 x i8] c"error create matrix from file %s\0A\00", align 1 -@.str.8 = private unnamed_addr constant [36 x i8] c"Creating matrix internally size=%d\0A\00", align 1 -@.str.9 = private unnamed_addr constant [40 x i8] c"error create matrix internally size=%d\0A\00", align 1 -@.str.10 = private unnamed_addr constant [26 x i8] c"No input file specified!\0A\00", align 1 -@.str.11 = private unnamed_addr constant [12 x i8] c"Before LUD\0A\00", align 1 -@.str.12 = private unnamed_addr constant [24 x i8] c"Time consumed(ms): %lf\0A\00", align 1 -@.str.13 = private unnamed_addr constant [11 x i8] c"After LUD\0A\00", align 1 -@.str.14 = private unnamed_addr constant [15 x i8] c">>>Verify<<<<\0A\00", align 1 -@.str.15 = private unnamed_addr constant [6 x i8] c"input\00", align 1 -@.str.16 = private unnamed_addr constant [5 x i8] c"size\00", align 1 -@.str.17 = private unnamed_addr constant [7 x i8] c"verify\00", align 1 - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #0 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %matrix_dim = alloca i32, align 4 - %opt = alloca i32, align 4 - %option_index = alloca i32, align 4 - %ret = alloca i32, align 4 - %input_file = alloca i8*, align 8 - %m = alloca float*, align 8 - %d_m = alloca float*, align 8 - %mm = alloca float*, align 8 - %sw = alloca %struct.__stopwatch_t, align 8 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call i32 @cudaSetDevice(i32 0) - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str, i64 0, i64 0), i32 16, i32 16) - store i32 32, i32* %matrix_dim, align 4 - store i32 0, i32* %option_index, align 4 - store i8* null, i8** %input_file, align 8 - br label %while.cond - -while.cond: ; preds = %sw.epilog, %entry - %0 = load i32, i32* %argc.addr, align 4 - %1 = load i8**, i8*** %argv.addr, align 8 - %call2 = call i32 @getopt_long(i32 %0, i8** %1, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.1, i64 0, i64 0), %struct.option* getelementptr inbounds ([4 x %struct.option], [4 x %struct.option]* @_ZL12long_options, i64 0, i64 0), i32* %option_index) #5 - store i32 %call2, i32* %opt, align 4 - %cmp = icmp ne i32 %call2, -1 - br i1 %cmp, label %while.body, label %while.end - -while.body: ; preds = %while.cond - %2 = load i32, i32* %opt, align 4 - switch i32 %2, label %sw.default [ - i32 105, label %sw.bb - i32 118, label %sw.bb3 - i32 115, label %sw.bb4 - i32 63, label %sw.bb7 - i32 58, label %sw.bb9 - ] - -sw.bb: ; preds = %while.body - %3 = load i8*, i8** @optarg, align 8 - store i8* %3, i8** %input_file, align 8 - br label %sw.epilog - -sw.bb3: ; preds = %while.body - store i32 1, i32* @_ZL9do_verify, align 4 - br label %sw.epilog - -sw.bb4: ; preds = %while.body - %4 = load i8*, i8** @optarg, align 8 - %call5 = call i32 @atoi(i8* %4) #6 - store i32 %call5, i32* %matrix_dim, align 4 - %5 = load i32, i32* %matrix_dim, align 4 - %call6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([44 x i8], [44 x i8]* @.str.2, i64 0, i64 0), i32 %5) - br label %sw.epilog - -sw.bb7: ; preds = %while.body - %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call8 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.3, i64 0, i64 0)) - br label %sw.epilog - -sw.bb9: ; preds = %while.body - %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call10 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.4, i64 0, i64 0)) - br label %sw.epilog - -sw.default: ; preds = %while.body - %8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %9 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %9, i64 0 - %10 = load i8*, i8** %arrayidx, align 8 - %call11 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.5, i64 0, i64 0), i8* %10) - call void @exit(i32 1) #7 - unreachable - -sw.epilog: ; preds = %sw.bb9, %sw.bb7, %sw.bb4, %sw.bb3, %sw.bb - br label %while.cond - -while.end: ; preds = %while.cond - %11 = load i32, i32* @optind, align 4 - %12 = load i32, i32* %argc.addr, align 4 - %cmp12 = icmp slt i32 %11, %12 - br i1 %cmp12, label %if.then, label %lor.lhs.false - -lor.lhs.false: ; preds = %while.end - %13 = load i32, i32* @optind, align 4 - %cmp13 = icmp eq i32 %13, 1 - br i1 %cmp13, label %if.then, label %if.end - -if.then: ; preds = %lor.lhs.false, %while.end - %14 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %15 = load i8**, i8*** %argv.addr, align 8 - %arrayidx14 = getelementptr inbounds i8*, i8** %15, i64 0 - %16 = load i8*, i8** %arrayidx14, align 8 - %call15 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %14, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.5, i64 0, i64 0), i8* %16) - call void @exit(i32 1) #7 - unreachable - -if.end: ; preds = %lor.lhs.false - %17 = load i8*, i8** %input_file, align 8 - %tobool = icmp ne i8* %17, null - br i1 %tobool, label %if.then16, label %if.else - -if.then16: ; preds = %if.end - %18 = load i8*, i8** %input_file, align 8 - %call17 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str.6, i64 0, i64 0), i8* %18) - %19 = load i8*, i8** %input_file, align 8 - %call18 = call i32 @create_matrix_from_file(float** %m, i8* %19, i32* %matrix_dim) - store i32 %call18, i32* %ret, align 4 - %20 = load i32, i32* %ret, align 4 - %cmp19 = icmp ne i32 %20, 0 - br i1 %cmp19, label %if.then20, label %if.end22 - -if.then20: ; preds = %if.then16 - store float* null, float** %m, align 8 - %21 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %22 = load i8*, i8** %input_file, align 8 - %call21 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %21, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.7, i64 0, i64 0), i8* %22) - call void @exit(i32 1) #7 - unreachable - -if.end22: ; preds = %if.then16 - br label %if.end34 - -if.else: ; preds = %if.end - %23 = load i32, i32* %matrix_dim, align 4 - %tobool23 = icmp ne i32 %23, 0 - br i1 %tobool23, label %if.then24, label %if.else31 - -if.then24: ; preds = %if.else - %24 = load i32, i32* %matrix_dim, align 4 - %call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.8, i64 0, i64 0), i32 %24) - %25 = load i32, i32* %matrix_dim, align 4 - %call26 = call i32 @create_matrix(float** %m, i32 %25) - store i32 %call26, i32* %ret, align 4 - %26 = load i32, i32* %ret, align 4 - %cmp27 = icmp ne i32 %26, 0 - br i1 %cmp27, label %if.then28, label %if.end30 - -if.then28: ; preds = %if.then24 - store float* null, float** %m, align 8 - %27 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %28 = load i32, i32* %matrix_dim, align 4 - %call29 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %27, i8* getelementptr inbounds ([40 x i8], [40 x i8]* @.str.9, i64 0, i64 0), i32 %28) - call void @exit(i32 1) #7 - unreachable - -if.end30: ; preds = %if.then24 - br label %if.end33 - -if.else31: ; preds = %if.else - %call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.10, i64 0, i64 0)) - call void @exit(i32 1) #7 - unreachable - -if.end33: ; preds = %if.end30 - br label %if.end34 - -if.end34: ; preds = %if.end33, %if.end22 - %29 = load i32, i32* @_ZL9do_verify, align 4 - %tobool35 = icmp ne i32 %29, 0 - br i1 %tobool35, label %if.then36, label %if.end38 - -if.then36: ; preds = %if.end34 - %call37 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.11, i64 0, i64 0)) - %30 = load float*, float** %m, align 8 - %31 = load i32, i32* %matrix_dim, align 4 - call void @matrix_duplicate(float* %30, float** %mm, i32 %31) - br label %if.end38 - -if.end38: ; preds = %if.then36, %if.end34 - %32 = bitcast float** %d_m to i8** - %33 = load i32, i32* %matrix_dim, align 4 - %34 = load i32, i32* %matrix_dim, align 4 - %mul = mul nsw i32 %33, %34 - %conv = sext i32 %mul to i64 - %mul39 = mul i64 %conv, 4 - %call40 = call i32 @cudaMalloc(i8** %32, i64 %mul39) - call void @stopwatch_start(%struct.__stopwatch_t* %sw) - %35 = load float*, float** %d_m, align 8 - %36 = bitcast float* %35 to i8* - %37 = load float*, float** %m, align 8 - %38 = bitcast float* %37 to i8* - %39 = load i32, i32* %matrix_dim, align 4 - %40 = load i32, i32* %matrix_dim, align 4 - %mul41 = mul nsw i32 %39, %40 - %conv42 = sext i32 %mul41 to i64 - %mul43 = mul i64 %conv42, 4 - %call44 = call i32 @cudaMemcpy(i8* %36, i8* %38, i64 %mul43, i32 1) - %41 = load float*, float** %d_m, align 8 - %42 = load i32, i32* %matrix_dim, align 4 - call void @_Z8lud_cudaPfi(float* %41, i32 %42) - %43 = load float*, float** %m, align 8 - %44 = bitcast float* %43 to i8* - %45 = load float*, float** %d_m, align 8 - %46 = bitcast float* %45 to i8* - %47 = load i32, i32* %matrix_dim, align 4 - %48 = load i32, i32* %matrix_dim, align 4 - %mul45 = mul nsw i32 %47, %48 - %conv46 = sext i32 %mul45 to i64 - %mul47 = mul i64 %conv46, 4 - %call48 = call i32 @cudaMemcpy(i8* %44, i8* %46, i64 %mul47, i32 2) - call void @stopwatch_stop(%struct.__stopwatch_t* %sw) - %call49 = call double @get_interval_by_sec(%struct.__stopwatch_t* %sw) - %mul50 = fmul contract double 1.000000e+03, %call49 - %call51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.12, i64 0, i64 0), double %mul50) - %49 = load float*, float** %d_m, align 8 - %50 = bitcast float* %49 to i8* - %call52 = call i32 @cudaFree(i8* %50) - %51 = load i32, i32* @_ZL9do_verify, align 4 - %tobool53 = icmp ne i32 %51, 0 - br i1 %tobool53, label %if.then54, label %if.end58 - -if.then54: ; preds = %if.end38 - %call55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.13, i64 0, i64 0)) - %call56 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.14, i64 0, i64 0)) - %52 = load float*, float** %mm, align 8 - %53 = load float*, float** %m, align 8 - %54 = load i32, i32* %matrix_dim, align 4 - %call57 = call i32 @lud_verify(float* %52, float* %53, i32 %54) - %55 = load float*, float** %mm, align 8 - %56 = bitcast float* %55 to i8* - call void @free(i8* %56) #5 - br label %if.end58 - -if.end58: ; preds = %if.then54, %if.end38 - %57 = load float*, float** %m, align 8 - %58 = bitcast float* %57 to i8* - call void @free(i8* %58) #5 - ret i32 0 -} - -declare dso_local i32 @cudaSetDevice(i32) #1 - -declare dso_local i32 @printf(i8*, ...) #1 - -; Function Attrs: nounwind -declare dso_local i32 @getopt_long(i32, i8**, i8*, %struct.option*, i32*) #2 - -; Function Attrs: nounwind readonly -declare dso_local i32 @atoi(i8*) #3 - -declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1 - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #4 - -declare dso_local i32 @create_matrix_from_file(float**, i8*, i32*) #1 - -declare dso_local i32 @create_matrix(float**, i32) #1 - -declare dso_local void @matrix_duplicate(float*, float**, i32) #1 - -declare dso_local i32 @cudaMalloc(i8**, i64) #1 - -declare dso_local void @stopwatch_start(%struct.__stopwatch_t*) #1 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #1 - -declare dso_local void @_Z8lud_cudaPfi(float*, i32) #1 - -declare dso_local void @stopwatch_stop(%struct.__stopwatch_t*) #1 - -declare dso_local double @get_interval_by_sec(%struct.__stopwatch_t*) #1 - -declare dso_local i32 @cudaFree(i8*) #1 - -declare dso_local i32 @lud_verify(float*, float*, i32) #1 - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #2 - -attributes #0 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind } -attributes #6 = { nounwind readonly } -attributes #7 = { noreturn nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/lud/lud_kernel-cuda-nvptx64-nvidia-cuda-sm_50.ll b/examples/lud/lud_kernel-cuda-nvptx64-nvidia-cuda-sm_50.ll deleted file mode 100644 index 9bdbe7d..0000000 --- a/examples/lud/lud_kernel-cuda-nvptx64-nvidia-cuda-sm_50.ll +++ /dev/null @@ -1,1001 +0,0 @@ -; ModuleID = 'lud_kernel-cuda-nvptx64-nvidia-cuda-sm_50.bc' -source_filename = "cuda/lud_kernel.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv = comdat any - -@_ZZ12lud_diagonalPfiiE6shadow = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 -@_ZZ13lud_perimeterPfiiE3dia = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ13lud_perimeterPfiiE8peri_row = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ13lud_perimeterPfiiE8peri_col = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@_ZZ12lud_internalPfiiE8peri_row = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ12lud_internalPfiiE8peri_col = internal addrspace(3) global [16 x [16 x float]] undef, align 4 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z12lud_diagonalPfii(float* %m, i32 %matrix_dim, i32 %offset) #0 { -entry: - %m.addr = alloca float*, align 8 - %matrix_dim.addr = alloca i32, align 4 - %offset.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %array_offset = alloca i32, align 4 - store float* %m, float** %m.addr, align 8 - store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 - store i32 %offset, i32* %offset.addr, align 4 - %0 = load i32, i32* %offset.addr, align 4 - %1 = load i32, i32* %matrix_dim.addr, align 4 - %mul = mul nsw i32 %0, %1 - %2 = load i32, i32* %offset.addr, align 4 - %add = add nsw i32 %mul, %2 - store i32 %add, i32* %array_offset, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %3 = load i32, i32* %i, align 4 - %cmp = icmp slt i32 %3, 16 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %4 = load float*, float** %m.addr, align 8 - %5 = load i32, i32* %array_offset, align 4 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %add1 = add i32 %5, %call - %idxprom = zext i32 %add1 to i64 - %arrayidx = getelementptr inbounds float, float* %4, i64 %idxprom - %6 = load float, float* %arrayidx, align 4 - %7 = load i32, i32* %i, align 4 - %idxprom2 = sext i32 %7 to i64 - %arrayidx3 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_diagonalPfiiE6shadow to [16 x [16 x float]]*), i64 0, i64 %idxprom2 - %call4 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom5 = zext i32 %call4 to i64 - %arrayidx6 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx3, i64 0, i64 %idxprom5 - store float %6, float* %arrayidx6, align 4 - %8 = load i32, i32* %matrix_dim.addr, align 4 - %9 = load i32, i32* %array_offset, align 4 - %add7 = add nsw i32 %9, %8 - store i32 %add7, i32* %array_offset, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %10 = load i32, i32* %i, align 4 - %inc = add nsw i32 %10, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - call void @llvm.nvvm.barrier0() - store i32 0, i32* %i, align 4 - br label %for.cond8 - -for.cond8: ; preds = %for.inc72, %for.end - %11 = load i32, i32* %i, align 4 - %cmp9 = icmp slt i32 %11, 15 - br i1 %cmp9, label %for.body10, label %for.end74 - -for.body10: ; preds = %for.cond8 - %call11 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %12 = load i32, i32* %i, align 4 - %cmp12 = icmp ugt i32 %call11, %12 - br i1 %cmp12, label %if.then, label %if.end - -if.then: ; preds = %for.body10 - store i32 0, i32* %j, align 4 - br label %for.cond13 - -for.cond13: ; preds = %for.inc31, %if.then - %13 = load i32, i32* %j, align 4 - %14 = load i32, i32* %i, align 4 - %cmp14 = icmp slt i32 %13, %14 - br i1 %cmp14, label %for.body15, label %for.end33 - -for.body15: ; preds = %for.cond13 - %call16 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom17 = zext i32 %call16 to i64 - %arrayidx18 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_diagonalPfiiE6shadow to [16 x [16 x float]]*), i64 0, i64 %idxprom17 - %15 = load i32, i32* %j, align 4 - %idxprom19 = sext i32 %15 to i64 - %arrayidx20 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx18, i64 0, i64 %idxprom19 - %16 = load float, float* %arrayidx20, align 4 - %17 = load i32, i32* %j, align 4 - %idxprom21 = sext i32 %17 to i64 - %arrayidx22 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_diagonalPfiiE6shadow to [16 x [16 x float]]*), i64 0, i64 %idxprom21 - %18 = load i32, i32* %i, align 4 - %idxprom23 = sext i32 %18 to i64 - %arrayidx24 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx22, i64 0, i64 %idxprom23 - %19 = load float, float* %arrayidx24, align 4 - %mul25 = fmul contract float %16, %19 - %call26 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom27 = zext i32 %call26 to i64 - %arrayidx28 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_diagonalPfiiE6shadow to [16 x [16 x float]]*), i64 0, i64 %idxprom27 - %20 = load i32, i32* %i, align 4 - %idxprom29 = sext i32 %20 to i64 - %arrayidx30 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx28, i64 0, i64 %idxprom29 - %21 = load float, float* %arrayidx30, align 4 - %sub = fsub contract float %21, %mul25 - store float %sub, float* %arrayidx30, align 4 - br label %for.inc31 - -for.inc31: ; preds = %for.body15 - %22 = load i32, i32* %j, align 4 - %inc32 = add nsw i32 %22, 1 - store i32 %inc32, i32* %j, align 4 - br label %for.cond13 - -for.end33: ; preds = %for.cond13 - %23 = load i32, i32* %i, align 4 - %idxprom34 = sext i32 %23 to i64 - %arrayidx35 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_diagonalPfiiE6shadow to [16 x [16 x float]]*), i64 0, i64 %idxprom34 - %24 = load i32, i32* %i, align 4 - %idxprom36 = sext i32 %24 to i64 - %arrayidx37 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx35, i64 0, i64 %idxprom36 - %25 = load float, float* %arrayidx37, align 4 - %call38 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom39 = zext i32 %call38 to i64 - %arrayidx40 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_diagonalPfiiE6shadow to [16 x [16 x float]]*), i64 0, i64 %idxprom39 - %26 = load i32, i32* %i, align 4 - %idxprom41 = sext i32 %26 to i64 - %arrayidx42 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx40, i64 0, i64 %idxprom41 - %27 = load float, float* %arrayidx42, align 4 - %div = fdiv float %27, %25 - store float %div, float* %arrayidx42, align 4 - br label %if.end - -if.end: ; preds = %for.end33, %for.body10 - call void @llvm.nvvm.barrier0() - %call43 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %28 = load i32, i32* %i, align 4 - %cmp44 = icmp ugt i32 %call43, %28 - br i1 %cmp44, label %if.then45, label %if.end71 - -if.then45: ; preds = %if.end - store i32 0, i32* %j, align 4 - br label %for.cond46 - -for.cond46: ; preds = %for.inc68, %if.then45 - %29 = load i32, i32* %j, align 4 - %30 = load i32, i32* %i, align 4 - %add47 = add nsw i32 %30, 1 - %cmp48 = icmp slt i32 %29, %add47 - br i1 %cmp48, label %for.body49, label %for.end70 - -for.body49: ; preds = %for.cond46 - %31 = load i32, i32* %i, align 4 - %add50 = add nsw i32 %31, 1 - %idxprom51 = sext i32 %add50 to i64 - %arrayidx52 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_diagonalPfiiE6shadow to [16 x [16 x float]]*), i64 0, i64 %idxprom51 - %32 = load i32, i32* %j, align 4 - %idxprom53 = sext i32 %32 to i64 - %arrayidx54 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx52, i64 0, i64 %idxprom53 - %33 = load float, float* %arrayidx54, align 4 - %34 = load i32, i32* %j, align 4 - %idxprom55 = sext i32 %34 to i64 - %arrayidx56 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_diagonalPfiiE6shadow to [16 x [16 x float]]*), i64 0, i64 %idxprom55 - %call57 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom58 = zext i32 %call57 to i64 - %arrayidx59 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx56, i64 0, i64 %idxprom58 - %35 = load float, float* %arrayidx59, align 4 - %mul60 = fmul contract float %33, %35 - %36 = load i32, i32* %i, align 4 - %add61 = add nsw i32 %36, 1 - %idxprom62 = sext i32 %add61 to i64 - %arrayidx63 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_diagonalPfiiE6shadow to [16 x [16 x float]]*), i64 0, i64 %idxprom62 - %call64 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom65 = zext i32 %call64 to i64 - %arrayidx66 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx63, i64 0, i64 %idxprom65 - %37 = load float, float* %arrayidx66, align 4 - %sub67 = fsub contract float %37, %mul60 - store float %sub67, float* %arrayidx66, align 4 - br label %for.inc68 - -for.inc68: ; preds = %for.body49 - %38 = load i32, i32* %j, align 4 - %inc69 = add nsw i32 %38, 1 - store i32 %inc69, i32* %j, align 4 - br label %for.cond46 - -for.end70: ; preds = %for.cond46 - br label %if.end71 - -if.end71: ; preds = %for.end70, %if.end - call void @llvm.nvvm.barrier0() - br label %for.inc72 - -for.inc72: ; preds = %if.end71 - %39 = load i32, i32* %i, align 4 - %inc73 = add nsw i32 %39, 1 - store i32 %inc73, i32* %i, align 4 - br label %for.cond8 - -for.end74: ; preds = %for.cond8 - %40 = load i32, i32* %offset.addr, align 4 - %add75 = add nsw i32 %40, 1 - %41 = load i32, i32* %matrix_dim.addr, align 4 - %mul76 = mul nsw i32 %add75, %41 - %42 = load i32, i32* %offset.addr, align 4 - %add77 = add nsw i32 %mul76, %42 - store i32 %add77, i32* %array_offset, align 4 - store i32 1, i32* %i, align 4 - br label %for.cond78 - -for.cond78: ; preds = %for.inc91, %for.end74 - %43 = load i32, i32* %i, align 4 - %cmp79 = icmp slt i32 %43, 16 - br i1 %cmp79, label %for.body80, label %for.end93 - -for.body80: ; preds = %for.cond78 - %44 = load i32, i32* %i, align 4 - %idxprom81 = sext i32 %44 to i64 - %arrayidx82 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_diagonalPfiiE6shadow to [16 x [16 x float]]*), i64 0, i64 %idxprom81 - %call83 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom84 = zext i32 %call83 to i64 - %arrayidx85 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx82, i64 0, i64 %idxprom84 - %45 = load float, float* %arrayidx85, align 4 - %46 = load float*, float** %m.addr, align 8 - %47 = load i32, i32* %array_offset, align 4 - %call86 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %add87 = add i32 %47, %call86 - %idxprom88 = zext i32 %add87 to i64 - %arrayidx89 = getelementptr inbounds float, float* %46, i64 %idxprom88 - store float %45, float* %arrayidx89, align 4 - %48 = load i32, i32* %matrix_dim.addr, align 4 - %49 = load i32, i32* %array_offset, align 4 - %add90 = add nsw i32 %49, %48 - store i32 %add90, i32* %array_offset, align 4 - br label %for.inc91 - -for.inc91: ; preds = %for.body80 - %50 = load i32, i32* %i, align 4 - %inc92 = add nsw i32 %50, 1 - store i32 %inc92, i32* %i, align 4 - br label %for.cond78 - -for.end93: ; preds = %for.cond78 - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: convergent nounwind -declare void @llvm.nvvm.barrier0() #2 - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z13lud_perimeterPfii(float* %m, i32 %matrix_dim, i32 %offset) #0 { -entry: - %m.addr = alloca float*, align 8 - %matrix_dim.addr = alloca i32, align 4 - %offset.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %array_offset = alloca i32, align 4 - %idx = alloca i32, align 4 - store float* %m, float** %m.addr, align 8 - store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 - store i32 %offset, i32* %offset.addr, align 4 - %call = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %cmp = icmp ult i32 %call, 16 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call1, i32* %idx, align 4 - %0 = load i32, i32* %offset.addr, align 4 - %1 = load i32, i32* %matrix_dim.addr, align 4 - %mul = mul nsw i32 %0, %1 - %2 = load i32, i32* %offset.addr, align 4 - %add = add nsw i32 %mul, %2 - store i32 %add, i32* %array_offset, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.then - %3 = load i32, i32* %i, align 4 - %cmp2 = icmp slt i32 %3, 8 - br i1 %cmp2, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %4 = load float*, float** %m.addr, align 8 - %5 = load i32, i32* %array_offset, align 4 - %6 = load i32, i32* %idx, align 4 - %add3 = add nsw i32 %5, %6 - %idxprom = sext i32 %add3 to i64 - %arrayidx = getelementptr inbounds float, float* %4, i64 %idxprom - %7 = load float, float* %arrayidx, align 4 - %8 = load i32, i32* %i, align 4 - %idxprom4 = sext i32 %8 to i64 - %arrayidx5 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE3dia to [16 x [16 x float]]*), i64 0, i64 %idxprom4 - %9 = load i32, i32* %idx, align 4 - %idxprom6 = sext i32 %9 to i64 - %arrayidx7 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx5, i64 0, i64 %idxprom6 - store float %7, float* %arrayidx7, align 4 - %10 = load i32, i32* %matrix_dim.addr, align 4 - %11 = load i32, i32* %array_offset, align 4 - %add8 = add nsw i32 %11, %10 - store i32 %add8, i32* %array_offset, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %12 = load i32, i32* %i, align 4 - %inc = add nsw i32 %12, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %13 = load i32, i32* %offset.addr, align 4 - %14 = load i32, i32* %matrix_dim.addr, align 4 - %mul9 = mul nsw i32 %13, %14 - %15 = load i32, i32* %offset.addr, align 4 - %add10 = add nsw i32 %mul9, %15 - store i32 %add10, i32* %array_offset, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond11 - -for.cond11: ; preds = %for.inc26, %for.end - %16 = load i32, i32* %i, align 4 - %cmp12 = icmp slt i32 %16, 16 - br i1 %cmp12, label %for.body13, label %for.end28 - -for.body13: ; preds = %for.cond11 - %17 = load float*, float** %m.addr, align 8 - %18 = load i32, i32* %array_offset, align 4 - %call14 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %add15 = add i32 %call14, 1 - %mul16 = mul i32 %add15, 16 - %add17 = add i32 %18, %mul16 - %19 = load i32, i32* %idx, align 4 - %add18 = add i32 %add17, %19 - %idxprom19 = zext i32 %add18 to i64 - %arrayidx20 = getelementptr inbounds float, float* %17, i64 %idxprom19 - %20 = load float, float* %arrayidx20, align 4 - %21 = load i32, i32* %i, align 4 - %idxprom21 = sext i32 %21 to i64 - %arrayidx22 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE8peri_row to [16 x [16 x float]]*), i64 0, i64 %idxprom21 - %22 = load i32, i32* %idx, align 4 - %idxprom23 = sext i32 %22 to i64 - %arrayidx24 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx22, i64 0, i64 %idxprom23 - store float %20, float* %arrayidx24, align 4 - %23 = load i32, i32* %matrix_dim.addr, align 4 - %24 = load i32, i32* %array_offset, align 4 - %add25 = add nsw i32 %24, %23 - store i32 %add25, i32* %array_offset, align 4 - br label %for.inc26 - -for.inc26: ; preds = %for.body13 - %25 = load i32, i32* %i, align 4 - %inc27 = add nsw i32 %25, 1 - store i32 %inc27, i32* %i, align 4 - br label %for.cond11 - -for.end28: ; preds = %for.cond11 - br label %if.end - -if.else: ; preds = %entry - %call29 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %sub = sub i32 %call29, 16 - store i32 %sub, i32* %idx, align 4 - %26 = load i32, i32* %offset.addr, align 4 - %add30 = add nsw i32 %26, 8 - %27 = load i32, i32* %matrix_dim.addr, align 4 - %mul31 = mul nsw i32 %add30, %27 - %28 = load i32, i32* %offset.addr, align 4 - %add32 = add nsw i32 %mul31, %28 - store i32 %add32, i32* %array_offset, align 4 - store i32 8, i32* %i, align 4 - br label %for.cond33 - -for.cond33: ; preds = %for.inc44, %if.else - %29 = load i32, i32* %i, align 4 - %cmp34 = icmp slt i32 %29, 16 - br i1 %cmp34, label %for.body35, label %for.end46 - -for.body35: ; preds = %for.cond33 - %30 = load float*, float** %m.addr, align 8 - %31 = load i32, i32* %array_offset, align 4 - %32 = load i32, i32* %idx, align 4 - %add36 = add nsw i32 %31, %32 - %idxprom37 = sext i32 %add36 to i64 - %arrayidx38 = getelementptr inbounds float, float* %30, i64 %idxprom37 - %33 = load float, float* %arrayidx38, align 4 - %34 = load i32, i32* %i, align 4 - %idxprom39 = sext i32 %34 to i64 - %arrayidx40 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE3dia to [16 x [16 x float]]*), i64 0, i64 %idxprom39 - %35 = load i32, i32* %idx, align 4 - %idxprom41 = sext i32 %35 to i64 - %arrayidx42 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx40, i64 0, i64 %idxprom41 - store float %33, float* %arrayidx42, align 4 - %36 = load i32, i32* %matrix_dim.addr, align 4 - %37 = load i32, i32* %array_offset, align 4 - %add43 = add nsw i32 %37, %36 - store i32 %add43, i32* %array_offset, align 4 - br label %for.inc44 - -for.inc44: ; preds = %for.body35 - %38 = load i32, i32* %i, align 4 - %inc45 = add nsw i32 %38, 1 - store i32 %inc45, i32* %i, align 4 - br label %for.cond33 - -for.end46: ; preds = %for.cond33 - %39 = load i32, i32* %offset.addr, align 4 - %call47 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %add48 = add i32 %call47, 1 - %mul49 = mul i32 %add48, 16 - %add50 = add i32 %39, %mul49 - %40 = load i32, i32* %matrix_dim.addr, align 4 - %mul51 = mul i32 %add50, %40 - %41 = load i32, i32* %offset.addr, align 4 - %add52 = add i32 %mul51, %41 - store i32 %add52, i32* %array_offset, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond53 - -for.cond53: ; preds = %for.inc64, %for.end46 - %42 = load i32, i32* %i, align 4 - %cmp54 = icmp slt i32 %42, 16 - br i1 %cmp54, label %for.body55, label %for.end66 - -for.body55: ; preds = %for.cond53 - %43 = load float*, float** %m.addr, align 8 - %44 = load i32, i32* %array_offset, align 4 - %45 = load i32, i32* %idx, align 4 - %add56 = add nsw i32 %44, %45 - %idxprom57 = sext i32 %add56 to i64 - %arrayidx58 = getelementptr inbounds float, float* %43, i64 %idxprom57 - %46 = load float, float* %arrayidx58, align 4 - %47 = load i32, i32* %i, align 4 - %idxprom59 = sext i32 %47 to i64 - %arrayidx60 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE8peri_col to [16 x [16 x float]]*), i64 0, i64 %idxprom59 - %48 = load i32, i32* %idx, align 4 - %idxprom61 = sext i32 %48 to i64 - %arrayidx62 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx60, i64 0, i64 %idxprom61 - store float %46, float* %arrayidx62, align 4 - %49 = load i32, i32* %matrix_dim.addr, align 4 - %50 = load i32, i32* %array_offset, align 4 - %add63 = add nsw i32 %50, %49 - store i32 %add63, i32* %array_offset, align 4 - br label %for.inc64 - -for.inc64: ; preds = %for.body55 - %51 = load i32, i32* %i, align 4 - %inc65 = add nsw i32 %51, 1 - store i32 %inc65, i32* %i, align 4 - br label %for.cond53 - -for.end66: ; preds = %for.cond53 - br label %if.end - -if.end: ; preds = %for.end66, %for.end28 - call void @llvm.nvvm.barrier0() - %call67 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %cmp68 = icmp ult i32 %call67, 16 - br i1 %cmp68, label %if.then69, label %if.else97 - -if.then69: ; preds = %if.end - %call70 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call70, i32* %idx, align 4 - store i32 1, i32* %i, align 4 - br label %for.cond71 - -for.cond71: ; preds = %for.inc94, %if.then69 - %52 = load i32, i32* %i, align 4 - %cmp72 = icmp slt i32 %52, 16 - br i1 %cmp72, label %for.body73, label %for.end96 - -for.body73: ; preds = %for.cond71 - store i32 0, i32* %j, align 4 - br label %for.cond74 - -for.cond74: ; preds = %for.inc91, %for.body73 - %53 = load i32, i32* %j, align 4 - %54 = load i32, i32* %i, align 4 - %cmp75 = icmp slt i32 %53, %54 - br i1 %cmp75, label %for.body76, label %for.end93 - -for.body76: ; preds = %for.cond74 - %55 = load i32, i32* %i, align 4 - %idxprom77 = sext i32 %55 to i64 - %arrayidx78 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE3dia to [16 x [16 x float]]*), i64 0, i64 %idxprom77 - %56 = load i32, i32* %j, align 4 - %idxprom79 = sext i32 %56 to i64 - %arrayidx80 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx78, i64 0, i64 %idxprom79 - %57 = load float, float* %arrayidx80, align 4 - %58 = load i32, i32* %j, align 4 - %idxprom81 = sext i32 %58 to i64 - %arrayidx82 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE8peri_row to [16 x [16 x float]]*), i64 0, i64 %idxprom81 - %59 = load i32, i32* %idx, align 4 - %idxprom83 = sext i32 %59 to i64 - %arrayidx84 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx82, i64 0, i64 %idxprom83 - %60 = load float, float* %arrayidx84, align 4 - %mul85 = fmul contract float %57, %60 - %61 = load i32, i32* %i, align 4 - %idxprom86 = sext i32 %61 to i64 - %arrayidx87 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE8peri_row to [16 x [16 x float]]*), i64 0, i64 %idxprom86 - %62 = load i32, i32* %idx, align 4 - %idxprom88 = sext i32 %62 to i64 - %arrayidx89 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx87, i64 0, i64 %idxprom88 - %63 = load float, float* %arrayidx89, align 4 - %sub90 = fsub contract float %63, %mul85 - store float %sub90, float* %arrayidx89, align 4 - br label %for.inc91 - -for.inc91: ; preds = %for.body76 - %64 = load i32, i32* %j, align 4 - %inc92 = add nsw i32 %64, 1 - store i32 %inc92, i32* %j, align 4 - br label %for.cond74 - -for.end93: ; preds = %for.cond74 - br label %for.inc94 - -for.inc94: ; preds = %for.end93 - %65 = load i32, i32* %i, align 4 - %inc95 = add nsw i32 %65, 1 - store i32 %inc95, i32* %i, align 4 - br label %for.cond71 - -for.end96: ; preds = %for.cond71 - br label %if.end134 - -if.else97: ; preds = %if.end - %call98 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %sub99 = sub i32 %call98, 16 - store i32 %sub99, i32* %idx, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond100 - -for.cond100: ; preds = %for.inc131, %if.else97 - %66 = load i32, i32* %i, align 4 - %cmp101 = icmp slt i32 %66, 16 - br i1 %cmp101, label %for.body102, label %for.end133 - -for.body102: ; preds = %for.cond100 - store i32 0, i32* %j, align 4 - br label %for.cond103 - -for.cond103: ; preds = %for.inc120, %for.body102 - %67 = load i32, i32* %j, align 4 - %68 = load i32, i32* %i, align 4 - %cmp104 = icmp slt i32 %67, %68 - br i1 %cmp104, label %for.body105, label %for.end122 - -for.body105: ; preds = %for.cond103 - %69 = load i32, i32* %idx, align 4 - %idxprom106 = sext i32 %69 to i64 - %arrayidx107 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE8peri_col to [16 x [16 x float]]*), i64 0, i64 %idxprom106 - %70 = load i32, i32* %j, align 4 - %idxprom108 = sext i32 %70 to i64 - %arrayidx109 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx107, i64 0, i64 %idxprom108 - %71 = load float, float* %arrayidx109, align 4 - %72 = load i32, i32* %j, align 4 - %idxprom110 = sext i32 %72 to i64 - %arrayidx111 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE3dia to [16 x [16 x float]]*), i64 0, i64 %idxprom110 - %73 = load i32, i32* %i, align 4 - %idxprom112 = sext i32 %73 to i64 - %arrayidx113 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx111, i64 0, i64 %idxprom112 - %74 = load float, float* %arrayidx113, align 4 - %mul114 = fmul contract float %71, %74 - %75 = load i32, i32* %idx, align 4 - %idxprom115 = sext i32 %75 to i64 - %arrayidx116 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE8peri_col to [16 x [16 x float]]*), i64 0, i64 %idxprom115 - %76 = load i32, i32* %i, align 4 - %idxprom117 = sext i32 %76 to i64 - %arrayidx118 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx116, i64 0, i64 %idxprom117 - %77 = load float, float* %arrayidx118, align 4 - %sub119 = fsub contract float %77, %mul114 - store float %sub119, float* %arrayidx118, align 4 - br label %for.inc120 - -for.inc120: ; preds = %for.body105 - %78 = load i32, i32* %j, align 4 - %inc121 = add nsw i32 %78, 1 - store i32 %inc121, i32* %j, align 4 - br label %for.cond103 - -for.end122: ; preds = %for.cond103 - %79 = load i32, i32* %i, align 4 - %idxprom123 = sext i32 %79 to i64 - %arrayidx124 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE3dia to [16 x [16 x float]]*), i64 0, i64 %idxprom123 - %80 = load i32, i32* %i, align 4 - %idxprom125 = sext i32 %80 to i64 - %arrayidx126 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx124, i64 0, i64 %idxprom125 - %81 = load float, float* %arrayidx126, align 4 - %82 = load i32, i32* %idx, align 4 - %idxprom127 = sext i32 %82 to i64 - %arrayidx128 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE8peri_col to [16 x [16 x float]]*), i64 0, i64 %idxprom127 - %83 = load i32, i32* %i, align 4 - %idxprom129 = sext i32 %83 to i64 - %arrayidx130 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx128, i64 0, i64 %idxprom129 - %84 = load float, float* %arrayidx130, align 4 - %div = fdiv float %84, %81 - store float %div, float* %arrayidx130, align 4 - br label %for.inc131 - -for.inc131: ; preds = %for.end122 - %85 = load i32, i32* %i, align 4 - %inc132 = add nsw i32 %85, 1 - store i32 %inc132, i32* %i, align 4 - br label %for.cond100 - -for.end133: ; preds = %for.cond100 - br label %if.end134 - -if.end134: ; preds = %for.end133, %for.end96 - call void @llvm.nvvm.barrier0() - %call135 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %cmp136 = icmp ult i32 %call135, 16 - br i1 %cmp136, label %if.then137, label %if.else160 - -if.then137: ; preds = %if.end134 - %call138 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call138, i32* %idx, align 4 - %86 = load i32, i32* %offset.addr, align 4 - %add139 = add nsw i32 %86, 1 - %87 = load i32, i32* %matrix_dim.addr, align 4 - %mul140 = mul nsw i32 %add139, %87 - %88 = load i32, i32* %offset.addr, align 4 - %add141 = add nsw i32 %mul140, %88 - store i32 %add141, i32* %array_offset, align 4 - store i32 1, i32* %i, align 4 - br label %for.cond142 - -for.cond142: ; preds = %for.inc157, %if.then137 - %89 = load i32, i32* %i, align 4 - %cmp143 = icmp slt i32 %89, 16 - br i1 %cmp143, label %for.body144, label %for.end159 - -for.body144: ; preds = %for.cond142 - %90 = load i32, i32* %i, align 4 - %idxprom145 = sext i32 %90 to i64 - %arrayidx146 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE8peri_row to [16 x [16 x float]]*), i64 0, i64 %idxprom145 - %91 = load i32, i32* %idx, align 4 - %idxprom147 = sext i32 %91 to i64 - %arrayidx148 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx146, i64 0, i64 %idxprom147 - %92 = load float, float* %arrayidx148, align 4 - %93 = load float*, float** %m.addr, align 8 - %94 = load i32, i32* %array_offset, align 4 - %call149 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %add150 = add i32 %call149, 1 - %mul151 = mul i32 %add150, 16 - %add152 = add i32 %94, %mul151 - %95 = load i32, i32* %idx, align 4 - %add153 = add i32 %add152, %95 - %idxprom154 = zext i32 %add153 to i64 - %arrayidx155 = getelementptr inbounds float, float* %93, i64 %idxprom154 - store float %92, float* %arrayidx155, align 4 - %96 = load i32, i32* %matrix_dim.addr, align 4 - %97 = load i32, i32* %array_offset, align 4 - %add156 = add nsw i32 %97, %96 - store i32 %add156, i32* %array_offset, align 4 - br label %for.inc157 - -for.inc157: ; preds = %for.body144 - %98 = load i32, i32* %i, align 4 - %inc158 = add nsw i32 %98, 1 - store i32 %inc158, i32* %i, align 4 - br label %for.cond142 - -for.end159: ; preds = %for.cond142 - br label %if.end183 - -if.else160: ; preds = %if.end134 - %call161 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %sub162 = sub i32 %call161, 16 - store i32 %sub162, i32* %idx, align 4 - %99 = load i32, i32* %offset.addr, align 4 - %call163 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %add164 = add i32 %call163, 1 - %mul165 = mul i32 %add164, 16 - %add166 = add i32 %99, %mul165 - %100 = load i32, i32* %matrix_dim.addr, align 4 - %mul167 = mul i32 %add166, %100 - %101 = load i32, i32* %offset.addr, align 4 - %add168 = add i32 %mul167, %101 - store i32 %add168, i32* %array_offset, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond169 - -for.cond169: ; preds = %for.inc180, %if.else160 - %102 = load i32, i32* %i, align 4 - %cmp170 = icmp slt i32 %102, 16 - br i1 %cmp170, label %for.body171, label %for.end182 - -for.body171: ; preds = %for.cond169 - %103 = load i32, i32* %i, align 4 - %idxprom172 = sext i32 %103 to i64 - %arrayidx173 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ13lud_perimeterPfiiE8peri_col to [16 x [16 x float]]*), i64 0, i64 %idxprom172 - %104 = load i32, i32* %idx, align 4 - %idxprom174 = sext i32 %104 to i64 - %arrayidx175 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx173, i64 0, i64 %idxprom174 - %105 = load float, float* %arrayidx175, align 4 - %106 = load float*, float** %m.addr, align 8 - %107 = load i32, i32* %array_offset, align 4 - %108 = load i32, i32* %idx, align 4 - %add176 = add nsw i32 %107, %108 - %idxprom177 = sext i32 %add176 to i64 - %arrayidx178 = getelementptr inbounds float, float* %106, i64 %idxprom177 - store float %105, float* %arrayidx178, align 4 - %109 = load i32, i32* %matrix_dim.addr, align 4 - %110 = load i32, i32* %array_offset, align 4 - %add179 = add nsw i32 %110, %109 - store i32 %add179, i32* %array_offset, align 4 - br label %for.inc180 - -for.inc180: ; preds = %for.body171 - %111 = load i32, i32* %i, align 4 - %inc181 = add nsw i32 %111, 1 - store i32 %inc181, i32* %i, align 4 - br label %for.cond169 - -for.end182: ; preds = %for.cond169 - br label %if.end183 - -if.end183: ; preds = %for.end182, %for.end159 - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z12lud_internalPfii(float* %m, i32 %matrix_dim, i32 %offset) #0 { -entry: - %m.addr = alloca float*, align 8 - %matrix_dim.addr = alloca i32, align 4 - %offset.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %sum = alloca float, align 4 - %global_row_id = alloca i32, align 4 - %global_col_id = alloca i32, align 4 - store float* %m, float** %m.addr, align 8 - store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 - store i32 %offset, i32* %offset.addr, align 4 - %0 = load i32, i32* %offset.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #2 - %add = add i32 %call, 1 - %mul = mul i32 %add, 16 - %add1 = add i32 %0, %mul - store i32 %add1, i32* %global_row_id, align 4 - %1 = load i32, i32* %offset.addr, align 4 - %call2 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - %add3 = add i32 %call2, 1 - %mul4 = mul i32 %add3, 16 - %add5 = add i32 %1, %mul4 - store i32 %add5, i32* %global_col_id, align 4 - %2 = load float*, float** %m.addr, align 8 - %3 = load i32, i32* %offset.addr, align 4 - %call6 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - %add7 = add i32 %3, %call6 - %4 = load i32, i32* %matrix_dim.addr, align 4 - %mul8 = mul i32 %add7, %4 - %5 = load i32, i32* %global_col_id, align 4 - %add9 = add i32 %mul8, %5 - %call10 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %add11 = add i32 %add9, %call10 - %idxprom = zext i32 %add11 to i64 - %arrayidx = getelementptr inbounds float, float* %2, i64 %idxprom - %6 = load float, float* %arrayidx, align 4 - %call12 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - %idxprom13 = zext i32 %call12 to i64 - %arrayidx14 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_internalPfiiE8peri_row to [16 x [16 x float]]*), i64 0, i64 %idxprom13 - %call15 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom16 = zext i32 %call15 to i64 - %arrayidx17 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx14, i64 0, i64 %idxprom16 - store float %6, float* %arrayidx17, align 4 - %7 = load float*, float** %m.addr, align 8 - %8 = load i32, i32* %global_row_id, align 4 - %call18 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - %add19 = add i32 %8, %call18 - %9 = load i32, i32* %matrix_dim.addr, align 4 - %mul20 = mul i32 %add19, %9 - %10 = load i32, i32* %offset.addr, align 4 - %add21 = add i32 %mul20, %10 - %call22 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %add23 = add i32 %add21, %call22 - %idxprom24 = zext i32 %add23 to i64 - %arrayidx25 = getelementptr inbounds float, float* %7, i64 %idxprom24 - %11 = load float, float* %arrayidx25, align 4 - %call26 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - %idxprom27 = zext i32 %call26 to i64 - %arrayidx28 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_internalPfiiE8peri_col to [16 x [16 x float]]*), i64 0, i64 %idxprom27 - %call29 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom30 = zext i32 %call29 to i64 - %arrayidx31 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx28, i64 0, i64 %idxprom30 - store float %11, float* %arrayidx31, align 4 - call void @llvm.nvvm.barrier0() - store float 0.000000e+00, float* %sum, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %12 = load i32, i32* %i, align 4 - %cmp = icmp slt i32 %12, 16 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %call32 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - %idxprom33 = zext i32 %call32 to i64 - %arrayidx34 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_internalPfiiE8peri_col to [16 x [16 x float]]*), i64 0, i64 %idxprom33 - %13 = load i32, i32* %i, align 4 - %idxprom35 = sext i32 %13 to i64 - %arrayidx36 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx34, i64 0, i64 %idxprom35 - %14 = load float, float* %arrayidx36, align 4 - %15 = load i32, i32* %i, align 4 - %idxprom37 = sext i32 %15 to i64 - %arrayidx38 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ12lud_internalPfiiE8peri_row to [16 x [16 x float]]*), i64 0, i64 %idxprom37 - %call39 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %idxprom40 = zext i32 %call39 to i64 - %arrayidx41 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx38, i64 0, i64 %idxprom40 - %16 = load float, float* %arrayidx41, align 4 - %mul42 = fmul contract float %14, %16 - %17 = load float, float* %sum, align 4 - %add43 = fadd contract float %17, %mul42 - store float %add43, float* %sum, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %18 = load i32, i32* %i, align 4 - %inc = add nsw i32 %18, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %19 = load float, float* %sum, align 4 - %20 = load float*, float** %m.addr, align 8 - %21 = load i32, i32* %global_row_id, align 4 - %call44 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - %add45 = add i32 %21, %call44 - %22 = load i32, i32* %matrix_dim.addr, align 4 - %mul46 = mul i32 %add45, %22 - %23 = load i32, i32* %global_col_id, align 4 - %add47 = add i32 %mul46, %23 - %call48 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - %add49 = add i32 %add47, %call48 - %idxprom50 = zext i32 %add49 to i64 - %arrayidx51 = getelementptr inbounds float, float* %20, i64 %idxprom50 - %24 = load float, float* %arrayidx51, align 4 - %sub = fsub contract float %24, %19 - store float %sub, float* %arrayidx51, align 4 - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() - ret i32 %0 -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() #3 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_50" "target-features"="+ptx64,+sm_50" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_50" "target-features"="+ptx64,+sm_50" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind } -attributes #3 = { nounwind readnone } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !6, !7, !6, !8, !8, !8, !8, !9, !9, !8} -!llvm.ident = !{!10} -!nvvmir.version = !{!11} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (float*, i32, i32)* @_Z12lud_diagonalPfii, !"kernel", i32 1} -!4 = !{void (float*, i32, i32)* @_Z13lud_perimeterPfii, !"kernel", i32 1} -!5 = !{void (float*, i32, i32)* @_Z12lud_internalPfii, !"kernel", i32 1} -!6 = !{null, !"align", i32 8} -!7 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!8 = !{null, !"align", i32 16} -!9 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!10 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!11 = !{i32 1, i32 4} diff --git a/examples/lud/lud_kernel-host-x86_64-unknown-linux-gnu.ll b/examples/lud/lud_kernel-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 65c51b5..0000000 --- a/examples/lud/lud_kernel-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,452 +0,0 @@ -; ModuleID = 'lud_kernel-host-x86_64-unknown-linux-gnu.bc' -source_filename = "cuda/lud_kernel.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque - -$_ZN4dim3C2Ejjj = comdat any - -@0 = private unnamed_addr constant [21 x i8] c"_Z12lud_diagonalPfii\00", align 1 -@1 = private unnamed_addr constant [22 x i8] c"_Z13lud_perimeterPfii\00", align 1 -@2 = private unnamed_addr constant [21 x i8] c"_Z12lud_internalPfii\00", align 1 -@3 = private constant [51057 x i8] c"P\EDU\BA\01\00\10\00`\C7\00\00\00\00\00\00\02\00\01\01@\00\00\00\E8\AE\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\002\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00@\AE\00\00\00\00\00\00\C0\A9\00\00\00\00\00\002\052\00@\008\00\03\00@\00\12\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z12lud_internalPfii\00.nv.info._Z12lud_internalPfii\00.nv.shared._Z12lud_internalPfii\00.nv.global\00.nv.constant0._Z12lud_internalPfii\00.text._Z13lud_perimeterPfii\00.nv.info._Z13lud_perimeterPfii\00.nv.shared._Z13lud_perimeterPfii\00.nv.constant0._Z13lud_perimeterPfii\00.text._Z12lud_diagonalPfii\00.nv.info._Z12lud_diagonalPfii\00.nv.shared._Z12lud_diagonalPfii\00.nv.constant0._Z12lud_diagonalPfii\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z12lud_internalPfii\00.text._Z12lud_internalPfii\00.nv.info._Z12lud_internalPfii\00.nv.shared._Z12lud_internalPfii\00.nv.global\00threadIdx\00blockIdx\00$___ZZ12lud_internalPfiiE8peri_row__905\00$___ZZ12lud_internalPfiiE8peri_col__907\00.nv.constant0._Z12lud_internalPfii\00_param\00_Z13lud_perimeterPfii\00.text._Z13lud_perimeterPfii\00.nv.info._Z13lud_perimeterPfii\00.nv.shared._Z13lud_perimeterPfii\00$_Z13lud_perimeterPfii$__cuda_sm3x_div_rn_noftz_f32\00$_Z13lud_perimeterPfii$__cuda_sm3x_div_rn_noftz_f32_slowpath\00$___ZZ13lud_perimeterPfiiE3dia__430\00$___ZZ13lud_perimeterPfiiE8peri_row__432\00$___ZZ13lud_perimeterPfiiE8peri_col__434\00.nv.constant0._Z13lud_perimeterPfii\00_Z12lud_diagonalPfii\00.text._Z12lud_diagonalPfii\00.nv.info._Z12lud_diagonalPfii\00.nv.shared._Z12lud_diagonalPfii\00$_Z12lud_diagonalPfii$__cuda_sm3x_div_rn_noftz_f32\00$_Z12lud_diagonalPfii$__cuda_sm3x_div_rn_noftz_f32_slowpath\00$___ZZ12lud_diagonalPfiiE6shadow__186\00.nv.constant0._Z12lud_diagonalPfii\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00G\00\00\00\03\00\0B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\80\00\00\00\03\00\0E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A0\00\00\00\03\00\0F\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\AB\00\00\00\01\00\0F\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\B5\00\00\00\01\00\0F\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\0E\01\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00N\01\00\00\03\00\0C\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\89\01\00\00\03\00\10\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\AA\01\00\00\22\00\0C\00PN\00\00\00\00\00\00`\01\00\00\00\00\00\00\DE\01\00\00\22\00\0C\00\B0O\00\00\00\00\00\00P\08\00\00\00\00\00\00\91\02\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\CA\02\00\00\03\00\0D\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\03\03\00\00\03\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00#\03\00\00\22\00\0D\00\E0$\00\00\00\00\00\00`\01\00\00\00\00\00\00V\03\00\00\22\00\0D\00@&\00\00\00\00\00\00@\08\00\00\00\00\00\00\B8\03\00\00\03\00\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\0B\00\00\00\00\00\00\00\00\00@\15\00\00\00\00\00\008\01\00\00\12\10\0C\00\00\00\00\00\00\00\00\00\00X\00\00\00\00\00\00\B5\02\00\00\12\10\0D\00\00\00\00\00\00\00\00\00\80.\00\00\00\00\00\00\04/\08\00\13\00\00\00\11\00\00\00\04#\08\00\0F\00\00\00\00\00\00\00\04\12\08\00\0F\00\00\00\00\00\00\00\04\11\08\00\0F\00\00\00\00\00\00\00\04#\08\00\0E\00\00\00\00\00\00\00\04\12\08\00\0E\00\00\00\00\00\00\00\04\11\08\00\0E\00\00\00\00\00\00\00\04#\08\00\13\00\00\00\00\00\00\00\04\12\08\00\13\00\00\00 \00\00\00\04\11\08\00\13\00\00\00 \00\00\00\04/\08\00\12\00\00\00\11\00\00\00\04#\08\00\0A\00\00\00\00\00\00\00\04\12\08\00\0A\00\00\00\00\00\00\00\04\11\08\00\0A\00\00\00\00\00\00\00\04#\08\00\09\00\00\00\00\00\00\00\04\12\08\00\09\00\00\00\00\00\00\00\04\11\08\00\09\00\00\00\00\00\00\00\04#\08\00\12\00\00\00\00\00\00\00\04\12\08\00\12\00\00\00 \00\00\00\04\11\08\00\12\00\00\00 \00\00\00\04/\08\00\11\00\00\00\0E\00\00\00\04#\08\00\11\00\00\00\00\00\00\00\04\12\08\00\11\00\00\00 \00\00\00\04\11\08\00\11\00\00\00 \00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\01\10\00\03\19\10\00\04\17\0C\00\00\00\00\00\02\00\0C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0\11\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00\08\03\00\00\F8\03\00\00\04\1C\04\000\15\00\00\04\1E\04\00 \00\00\00\010\00\00\01*\00\00\04\0A\08\00\0B\00\00\00@\01\10\00\03\19\10\00\04\17\0C\00\00\00\00\00\02\00\0C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0\11\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\10\00\B8\0E\00\00(\1D\00\00XB\00\00(F\00\00\04\1C\04\00HN\00\00\04\1E\04\00\90\00\00\00\010\00\00\01*\00\00\04\0A\08\00\10\00\00\00@\01\10\00\03\19\10\00\04\17\0C\00\00\00\00\00\02\00\0C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0\11\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1C\04\00\D8$\00\00\04\1E\04\00\90\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveB5\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F6\14visible .entry _Z12lud_diagonalPfii\9D\04\00\97\00\0F\22\00\01\0E{\04\00\93\00\0F*\00\08\1F1*\00\0F\0F\8B\0C\1B\1F6\E7\07\18\95pred %p<8\A0\03\10fA\01[f<16>\C3\03-56\C4\03 67\C5\03P\09.shaH\00\03\93\00\124\93\00\1FZ\C9\00\00\C0E6shadow[102t\03\0F\00\04\08\1F6\00\04\1C\0F8\01\01\0F\EA\0C\0B\0F\94\01\01\0F\12\03\0C\0F\F1\01\09\13]\B1\00#to\FB\12\07\A0\03\02\F9\02\01\F8\0F\0A\1C\00\183\FF\02\0B+\03\1F3!\0D\1A&ld\FC\02\04\1B\00\07\16\00$4,F\00\A1;\0Amul.lo.s\19\00\225,4\00\83%r4;\0Aadd\17\00%6,\1C\00\0Ar\00\03\86\03\186o\03O7, 0\C2\03\02\F2\047;\0Abra.uni LBB6_1;\0A\08\00\16:\9A\00%8,3\00\92;\0Asetp.gt\85\002p1, \00\C215;\0A@%p1 braI\00\1B4Y\00\132Y\00\122Y\00\02]\01455,Q\01\08\0A\01\04q\00\06\82\02\02\18\00\989, %tid.x\08\01350,4\00\00\22\00\01\C4\01\00_\00\034\00\22d5(\01\00\FE\00$hl\18\03457,\1C\00\132L\00\03\19\00$8,\98\00\01'\00\01\96\00\02q\03\111\AC\00\00#\00\02\AC\00\13s`\00\199\1E\01\06d\00460, \00\176H\03_rd61,|\03\0B\03\88\02\02\B4\03\056\00\02\A5\02)61\B0\00863,\1D\00\1D0\FF\00$64\18\01\08\9B\00$5,\1C\00\0A\FF\00866,V\00\1153\02\11f3\02\00\1D\00!],\08\01\07\AB\01'51\B6\02\06\16\00\182\C1\01\07\AB\01#3,\1E\00\00:\00\0F\B6\02\02+538\02\1338\02\173\91\02(54t\01\07`\00#5,\1E\00\1F1\ED\02\02/55\EE\02\04\D84:\0Abar.sync 0s\02\1F98\03\05\1B9J\00\135J\00\175\A7\00/109\03\07#2,!\00!14:\03\162:\03+20\\\00\136\\\00(6:\17\0C\166\0D\03\05y\01-27r\00\22le\1C\004p4,6\001r27t\00\164t\00+12t\00\137t\00\197t\00\1D8\0F\01\132|\0C\1B8@\00\138@\00\178\10\01\142+\03\192\EF\03\1F3'\01\00\13e'\01#5,8\00\00'\00\01\B5\00\175\B5\00\1B1u\00\139u\00\189\B5\00&45)\01\0B\1D\03\114\BF\04)45\9F\03/42\9F\03!\1243\03)42i\03444,h\00\196i\03\01\95\00\066\00\184R\04(46'\01\08N\00\02\B6\04\1D4\B6\04\02\FD\04\05U\00\09\B6\04\120\B6\04+48L\00\159L\00\0A\9A\00)50\9A\00\199\EC\04\1F1\EC\04\04\02\D3\03\01 \00\0BP\05'3,U\00(52\9A\00\131P\05*3]3\00\184\CD\00\0A3\00\1323\00\00W\044neg\17\00!3,\EA\00\84;\0Afma.rn\17\00&4,\1D\00\111\06\00\192\D1\04\2254\D1\04\0C\AB\06$10\05\07\170\BE\02\0A\97\01\06u\04\02\96\01\00\1E\00\1F1\1B\03\02/47\1B\03\04711:\\\01/16\\\01\04417, \00\0AH\06/18\A9\02!\121\F9\01)18u\01820,\1D\00\197\82\00%21\82\00\0Ax\02722,=\00'21\AB\01\03\AA\01)22\C2\07/31\8C\03\06\112*\02*31z\00$4,\1C\00\0A\A6\02(25\B0\00*24\1D\00'6,$\00\09\97\00\133\97\00\00d\016div-\02\224,\1E\00*%f$\02\122\F5\06\0D#\02\04\CF\08/12;\06\05/32\A1\05\00/33\A1\05\07$6,6\001r33\EC\04\176\EC\04\0Ca\05\141\19\07(13\EE\04/34\A3\05\05\1D3\C4\00\144B\00\174\E7\02/35\A5\05\03\1F6s\07\03337,\1E\00\1C1\BC\05#7,O\00\00&\00\01\D0\00\177\D0\00\0Dy\0A\04B\07\181C\07/40w\00\03\03\BA\05\00@\07\02J\024s64J\03\122\91\03\191-\03\1F2-\03\22\132`\02\198\96\02\00\B1\06\03h\00\0A\96\02\01\DF\02\056\00(30<\05/32\D6\05\04\00\13\02\03 \00\0A^\03\00\CB\01\06U\00'33\C7\02\03\8B\0A+34K\00\155K\00\0B\99\00\196\99\00\195V\0B\1F2\94\03\06\02\DD\01\0B\D4\06438,\1C\00\0B\B0\00'9,l\00'38\B0\00\136\B0\00\1A9\EB\05)40\E2\00\082\00\137\B7\06\190\EA\05\00{\00+f5\E8\05$9,\1A\00\00z\00)f7\C0\03\114\FD\02\1Cf\AF\09\141T\09(16n\02\1F3\E3\05\04\02E\0B\1F3\E3\05\06\0F[\03\07+7:\1A\00\05\FE\05\1A89\04\0A@\00\04\CA\08(19\9B\03\1F8\84\03\04\02 \09\1F8\F7\0A\05\1F3\AD\0A\05\182\E0\06(11\97\0E\06^\00\00u\07\02\1E\00'1;.\00\1F3\AF\0E\05\01w\07\046\00\193J\00&5, \00\0F\FF\0B\03\191\9B\02/16\D3\00\05,16_\0E\04\CB\0B\182\BA\0E/17\82\0B\07#3,!\00\03\BC\0E\173\82\0B\0D\BD\0E\05\BE\0E\182\B4\07\1F4\0F\09\05\02\C2\08\0D\F9\0D\0F\83\04!\02\FE\07\0A\CF\03&8,\1A\00\0A1\01\1F8\CC\03\06\01$\02\1A1\C9\04\00\1D\0A\02\1B\00\0A\CA\03\01.\09\04g\00(10\82\09\02\97\03\131\7F\11\04T\08\1F2\C7\0F\01/19\06\0E\03\02/\08\151\91\00\0B\A9\00\01\B1\09:r20\AA\00$4,\1C\00\0B\AB\00$5,\82\00\01'\00\09\FD\03\2215\E1\09\08\22\0D\0F\B1\0E\02/22\AB\00\04#3,\1E\00\00:\00\0F\B2\02\02\1C2\B1\0E$23,\02\08\B3\0E\1F2\B3\0E\04\02\A5\08\1F2\B3\0E\06/25\E9\02\06/4:\1A\15\0A\103g\02\9Eperimeter\1B\15\0E#\00\0F\1C\15\05\0F+\00\0A\1F1+\00\0C\0F\1E\15\1F\1F7\1E\15 ,14\1F\15,20\1F\15=135 \15?122!\15\0B\0E\CD\00TE3dia\1F\15\0F:\00\1B\108\0F\00O_row?\00)?col\9D\15\10\1F7\9D\15\1F\0E\B8\01\0F\9E\15\0E\0E\16\02\0F\9F\15\0F\0Eu\02\0F\A0\15t\09\9D\18\04F\05\044\06\16u\EF\14\1E3\EF\14:7_1\C3\07\137[\1297_1\9E\0B\07\C2\14\09L\04\158\A9\07\0E\FB\0A\0B'\16\1F1y\07\05$42\14\0B\02\FE\0A\07\C0\08'3, \00\0F\C7\04\02\194t\1E/44\F8\12\05\0B\EB\08\137\D8\1587_2H\09\1F5w\07\07#4,!\00\0B\84\12;7_5Z\00\133Z\00(3:k\06/10l\06\02(27\C2\05\07\18\00\05L\16\1B8\D6\08$9,8\00\01'\00\0DG\0C\121\06\07*12<\15\141k\06.117\07%3,\A4\00\02+\00\08;\07\149<\07\193A\16/11G\08\05\141\DD\06\1D1K\08\101\1B\09\0F\A0\04\08\0F\FB\0F\03\04K\10+16\B8\00*8, \00\1F5\0E\01\00\030\01\0BB\08\049\10\1E1E\08\047\10\04^\00*20\9D\07\2221\9E\07\199W\0A\1F0\A0\07\01?131\A1\07\03\1210\0F\2213\AC\01\1F3\DD\02\03;132\\\02\134\\\02\08\FA\0E?133\A4\07\03\121\8A\0D\01 \00\0F\8F\0A\04\1F3\17\03\05\1E5B\12\0D\D2\03\1F7\D2\03\06\148[\12\02G\12\07\D2\03'9, \00\0F\D2\03\03\189\D2\03/50\D2\03\05\1C5\BA\04\136\BB\00.6:\E8\17\0F\D2\03\01\02u\17\151\16\05\165\D3\03\1B9[\00\137[\00\187\D3\03/98\D2\03\02/15\06\1A\03\01\FB\02C%ctaG\05#hl\E1\07\03\BF\02\01 \00\09*\1B\03\82\02\05 \00\195g\00\1F9!\04\05(0,<\00\1A9\1D\00$1,$\00.16l\0B\03,\03\1B1n\0B\03o\0B\0E+\03401,\0A\01\02)\00\088\04\1388\04*018\04/028\04\05503,\22\00\0B8\04?04,\9E\08\0E\0F=\04\03\130\8D\04+04\BC\00*6, \00\1E3=\04%07E\01\09\A8\00\04\F0\00\1E7\12\01*9,^\00\1A8=\04\2209=\04\198\ED\05\1F2=\04\02/23=\04\04\02_\14\121\E6\0B?122=\04\03\1C2\DC\03\138\C6\02\08\DC\18?125=\04\04\02\8E\14\01 \00\0F=\04\04/26\82\03\04+9:T\08\149U\08\190\E2\18\0F\17\1D\02\12,\1A\00?-16l\08\02\08\05\03\1F6\FA\0F\03\227,\1C\00\08d\01\1F8\AC\04\05$9,2\00\198\FF\00$0,\1D\00\0F\A7\04\03*10\DD\03\01o\00\0F&\01\01\1C1a\09\05b\09\09\F2\0F\1F2t\1B\0D\152\A9\04\172\BF\09\0C\E4\01$12^\00\09\B1\1E/16\AC\04\01/31}\08\02/32[\04\03\02\F2\13\02\B3\06.32.\03\147\03\16\08,\03\03+\03\0E*\03\159\9D\17\03\D7\17\0C\84\17)198\04/207\04\04%21\CE\17\0A5\04/22l\08\1F\132\98\0B\0B1\18(4,\1D\00\0F\BC\15\00\145\15\01\08\99\00\06\D0\17\0BO\00(7,V\00\1A6\BA\17\117\22\04\09y!\1F4 \04\01/35\1F\04\03'36\C1\16\1F4\BD\02\02\1C3\BA\03$137\02\08\FD\0F/37\B9\13\04#8,\1E\00\0F\B9\13\04\1F8\F4\02\06/4:\8C\13\00*12*\07\1F4)\07\04\05\88\13\0A'\07\02\A9\04\1F5\B9\13\00$7,\8E\13\186Y\05\0F\0D\04\06\03-\0A\147\E3\11\0F\FD\11\04\0F\B9\22\03\192\12\04/21\B9\08\05\1D2\12\04\145\1E\01.5:\DC\11\0F\04\14\06.22\04\14=7_1|\01\04\96\05(16\12\04\1F4\11\04\01/23\11\04\02/24\11\04\03\03\05\12\02m\12\1D4\15\03\145\09\12\08\C3!\02\FD\13\0C\12\03\147z\14\024\14\0C\B8\13\197\0A\04\1F8\09\04\04\149\FE\13\0C<\08\1F,\9A\10\0E\0Fx\0C\04\05I\14\09 \0C\17,\1D\00\1E9\07\05\05\0F\01\0F\08\14B\1F6\08\14\02\1F7\08\14\04#8,\1E\00\00:\00\0F\B6\02\03\0D0\02\04\ED\0A.17\04!\0E\0B\04\140\EA \0F\0B\04\04\1F0\ED\02\06\1F8&\08\06\1F9\9C\1C\05/52\E2\10\05\02.%\06\12\03\166\12\03\1C2\CE\0B$20\FC\0F\09\8F\08/64\E5\10\0A(64\8A\00\1F6\17\09\06,65o\00\04\FD\04\09\E3\17/66\DF\03\07\02w\08\156\CD\00\179\CD\00\0C\AF\01\152\C9\10\182\CD\00/10\F4\22\06-10D\00\04\FE\05\09\FB\15/10\F8\22\03/11\F9\22\08\02\FE\02\03\07\0B2110p\12\07\A0\04\0D5\0A$24}\00(4:\E5\03\0F\B4!\05482, \00\0A\E8\03/83\EF\07\1F\128\98\03*83\A8\18(5,\1D\00\192}\04\0F\E6\22\05\128\C4\04\1D8n 888,U\00\09\E6\22\03\D8\1F\1B8\E6\22\138\CB\04\0D\02\03\02\15\03\00\1E\00\09>\02\02\C6\02\03\1D\00\190\1A\00#4, \00\09\D5\15/75\EC\04\05\02\18\03\037\00\195K\00&7, \00\0F\83\1F\03)77\C5\00\1F8\908\06\0CT\11$45d\01\08;\0E/79\EC\04\08#1,\22\00\04\EC\04\07\A8#\1D4`\00\041\07(46\EC\04/57\82\0A\05\03\228\1D7\1A\0B\1F9I\08$\05)8\0A\8B7(1,\1D\00\09\BD\06\1F0\EC\04\08\02\0B8\1A8?)\07'8\0B\F17(4,m\00\1F3\A03\00)64\EC\04\1F6\B39\02\1F8\A0!\04\02e\12$81\94\00\0C\AD\00\013\02*82\AD\00\03\F3*\1D6\AD\00(8,\BE8\0A\E9-\2268\85\04\08\B8\07\1F8\84\04\02/84\AC\00\04#5,\1E\00\00:\00\0F\C6\02\02\1D8\7F\04\04\\\10\184\\\10\1F8~\04\04\02\DB\12\1F8|\04\06/87\FD\02\06\1F8{\04\06/9:!?\0F_inter!?\0A\08\22\00\0F!?\0B\0F*\00\00\1F1*\00\08\0F!?\22\1F8\03* \1D2!?\0D\02*\1F4\E5B\00\1F3!?\10\08\C9\00\0E\C5)\0F>\00 \0F\C4)\13\1F8a?$\0Ex\01\0Fa?\0D\0E\D4\01\0Fa?\0E\0E1\02\0Fa?\84\0A(!\02:\07\18y:\07\04*!\0F^?\07\0E\13!/16\E6\06\03\0E*!\0C\85\00\1F9\94\1D\04\04(!\0B\BA$\02\93\08\0EW!\06\951\0FL\08\03\04\8D\09\0E\A5\1C\0F7\1E\13\12t&\01\0D\BD1\02\97\18\184G\00\1F6\06\1E\06\177Q\1E\0E9\1E\0E\95)\0C7\1E\07\06\1E\1F0S\09\01&21M\1E\1E2\FC>\155\B7/\0F8\1D1\0B]\00\148\09\01\08V\18\1E,4\04\0F[%\12\04\03\1D\09h*\07\86\1D\0F7\1D\00\04x)\0F@1W\0E\02#\0FA1\1B\096\02\1F2\EFB\06\0C\0D\1F\0E\B3\1D\0B\A7$\03\97\1D\145\9D\1D\0F\B7\1D\04\0E \02$17\B9\1D\0Fi#8\088#.0,\06\06\0F\11\0A\10\05\89#\0Fm;\02\04V+\0A\1D\00\04o#\0F\C7\01\01$23\0A#\0F\98\11\03/29\D7?\06\1F9z!\03\0A\0A\1E\138\ED.\1986D/307D\0C.30I/\1B88D\138I+)8_\96\1D&40\A3\04\0CH<\148\E6.\08\8A\01\1F9\8A\01#/30\AF\14\02\181/\15\0F\E5\14\00\04\D78\09\9A\14\0F\D3\16\05\04\E4\14\0D\9B\03)35\FD\14\1F4P9\00\1F5\E5\14\03\1E3\E5\14\0F\9A\04\0E\0F\E4\14*\07\9A\01\0F\E2=\07\06\9A0\08i\18\03\FF\17\1D0\B4\18\182,\15/41\1C:\00,2]\B4\17\04<\17\0E :\1F6\A2\17\02\05\EA\02\1Cf\D2\02\133w\02\08\C9\1F/42\8D<\04\05_1\0F\D7\0B\03/43/\03\04+4:\D0\16\08\EA\1A\062'\0Fq(\0D\0Ft>\00\1Fyp(\09\0E\10'\0D\85\05\03\F6&\143\FC&\0E\08>/28\08>\01\027\00\09\12\02\1F3\0E\11\02\03\B4:\147\E7&\0C\C6\03\145\103\0F\08(#\0CX\17\102\C2\07?subX\17\0F\037(\B05;\0Aret;\0A\0A}\0A\00\00\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([51057 x i8], [51057 x i8]* @3, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z12lud_diagonalPfii(float* %m, i32 %matrix_dim, i32 %offset) #0 { -entry: - %m.addr = alloca float*, align 8 - %matrix_dim.addr = alloca i32, align 4 - %offset.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store float* %m, float** %m.addr, align 8 - store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 - store i32 %offset, i32* %offset.addr, align 4 - %kernel_args = alloca i8*, i64 3, align 16 - %0 = bitcast float** %m.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32* %matrix_dim.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32* %offset.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %7 = load i64, i64* %shmem_size, align 8 - %8 = load i8*, i8** %stream, align 8 - %9 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %10 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 12, i1 false) - %11 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %12 = load i64, i64* %11, align 8 - %13 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %14 = load i32, i32* %13, align 8 - %15 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %16 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast i8* %8 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, i32, i32)* @_Z12lud_diagonalPfii to i8*), i64 %12, i32 %14, i64 %18, i32 %20, i8** %kernel_args, i64 %7, %struct.CUstream_st* %21) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z13lud_perimeterPfii(float* %m, i32 %matrix_dim, i32 %offset) #0 { -entry: - %m.addr = alloca float*, align 8 - %matrix_dim.addr = alloca i32, align 4 - %offset.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store float* %m, float** %m.addr, align 8 - store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 - store i32 %offset, i32* %offset.addr, align 4 - %kernel_args = alloca i8*, i64 3, align 16 - %0 = bitcast float** %m.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32* %matrix_dim.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32* %offset.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %7 = load i64, i64* %shmem_size, align 8 - %8 = load i8*, i8** %stream, align 8 - %9 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %10 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 12, i1 false) - %11 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %12 = load i64, i64* %11, align 8 - %13 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %14 = load i32, i32* %13, align 8 - %15 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %16 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast i8* %8 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, i32, i32)* @_Z13lud_perimeterPfii to i8*), i64 %12, i32 %14, i64 %18, i32 %20, i8** %kernel_args, i64 %7, %struct.CUstream_st* %21) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z12lud_internalPfii(float* %m, i32 %matrix_dim, i32 %offset) #0 { -entry: - %m.addr = alloca float*, align 8 - %matrix_dim.addr = alloca i32, align 4 - %offset.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store float* %m, float** %m.addr, align 8 - store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 - store i32 %offset, i32* %offset.addr, align 4 - %kernel_args = alloca i8*, i64 3, align 16 - %0 = bitcast float** %m.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32* %matrix_dim.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32* %offset.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %7 = load i64, i64* %shmem_size, align 8 - %8 = load i8*, i8** %stream, align 8 - %9 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %10 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %9, i8* align 8 %10, i64 12, i1 false) - %11 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %12 = load i64, i64* %11, align 8 - %13 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %14 = load i32, i32* %13, align 8 - %15 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %16 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %15, i8* align 8 %16, i64 12, i1 false) - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %18 = load i64, i64* %17, align 8 - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %20 = load i32, i32* %19, align 8 - %21 = bitcast i8* %8 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (float*, i32, i32)* @_Z12lud_internalPfii to i8*), i64 %12, i32 %14, i64 %18, i32 %20, i8** %kernel_args, i64 %7, %struct.CUstream_st* %21) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z8lud_cudaPfi(float* %m, i32 %matrix_dim) #0 { -entry: - %m.addr = alloca float*, align 8 - %matrix_dim.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %dimBlock = alloca %struct.dim3, align 4 - %m_debug = alloca float*, align 8 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp2 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp2.coerce = alloca { i64, i32 }, align 4 - %agg.tmp5 = alloca %struct.dim3, align 4 - %agg.tmp8 = alloca %struct.dim3, align 4 - %agg.tmp5.coerce = alloca { i64, i32 }, align 4 - %agg.tmp8.coerce = alloca { i64, i32 }, align 4 - %dimGrid = alloca %struct.dim3, align 4 - %agg.tmp20 = alloca %struct.dim3, align 4 - %agg.tmp21 = alloca %struct.dim3, align 4 - %agg.tmp20.coerce = alloca { i64, i32 }, align 4 - %agg.tmp21.coerce = alloca { i64, i32 }, align 4 - %agg.tmp27 = alloca %struct.dim3, align 4 - %agg.tmp28 = alloca %struct.dim3, align 4 - %agg.tmp27.coerce = alloca { i64, i32 }, align 4 - %agg.tmp28.coerce = alloca { i64, i32 }, align 4 - store float* %m, float** %m.addr, align 8 - store i32 %matrix_dim, i32* %matrix_dim.addr, align 4 - store i32 0, i32* %i, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimBlock, i32 16, i32 16, i32 1) - %0 = load i32, i32* %matrix_dim.addr, align 4 - %1 = load i32, i32* %matrix_dim.addr, align 4 - %mul = mul nsw i32 %0, %1 - %conv = sext i32 %mul to i64 - %mul1 = mul i64 %conv, 4 - %call = call noalias i8* @malloc(i64 %mul1) #5 - %2 = bitcast i8* %call to float* - store float* %2, float** %m_debug, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %3 = load i32, i32* %i, align 4 - %4 = load i32, i32* %matrix_dim.addr, align 4 - %sub = sub nsw i32 %4, 16 - %cmp = icmp slt i32 %3, %sub - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp, i32 1, i32 1, i32 1) - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp2, i32 16, i32 1, i32 1) - %5 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %6 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %5, i8* align 4 %6, i64 12, i1 false) - %7 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %8 = load i64, i64* %7, align 4 - %9 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %10 = load i32, i32* %9, align 4 - %11 = bitcast { i64, i32 }* %agg.tmp2.coerce to i8* - %12 = bitcast %struct.dim3* %agg.tmp2 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %11, i8* align 4 %12, i64 12, i1 false) - %13 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp2.coerce, i32 0, i32 0 - %14 = load i64, i64* %13, align 4 - %15 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp2.coerce, i32 0, i32 1 - %16 = load i32, i32* %15, align 4 - %call3 = call i32 @__cudaPushCallConfiguration(i64 %8, i32 %10, i64 %14, i32 %16, i64 0, i8* null) - %tobool = icmp ne i32 %call3, 0 - br i1 %tobool, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %for.body - %17 = load float*, float** %m.addr, align 8 - %18 = load i32, i32* %matrix_dim.addr, align 4 - %19 = load i32, i32* %i, align 4 - call void @_Z12lud_diagonalPfii(float* %17, i32 %18, i32 %19) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %for.body - %call4 = call i32 @cudaDeviceSynchronize() - %20 = load i32, i32* %matrix_dim.addr, align 4 - %21 = load i32, i32* %i, align 4 - %sub6 = sub nsw i32 %20, %21 - %div = sdiv i32 %sub6, 16 - %sub7 = sub nsw i32 %div, 1 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp5, i32 %sub7, i32 1, i32 1) - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp8, i32 32, i32 1, i32 1) - %22 = bitcast { i64, i32 }* %agg.tmp5.coerce to i8* - %23 = bitcast %struct.dim3* %agg.tmp5 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %22, i8* align 4 %23, i64 12, i1 false) - %24 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp5.coerce, i32 0, i32 0 - %25 = load i64, i64* %24, align 4 - %26 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp5.coerce, i32 0, i32 1 - %27 = load i32, i32* %26, align 4 - %28 = bitcast { i64, i32 }* %agg.tmp8.coerce to i8* - %29 = bitcast %struct.dim3* %agg.tmp8 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %28, i8* align 4 %29, i64 12, i1 false) - %30 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp8.coerce, i32 0, i32 0 - %31 = load i64, i64* %30, align 4 - %32 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp8.coerce, i32 0, i32 1 - %33 = load i32, i32* %32, align 4 - %call9 = call i32 @__cudaPushCallConfiguration(i64 %25, i32 %27, i64 %31, i32 %33, i64 0, i8* null) - %tobool10 = icmp ne i32 %call9, 0 - br i1 %tobool10, label %kcall.end12, label %kcall.configok11 - -kcall.configok11: ; preds = %kcall.end - %34 = load float*, float** %m.addr, align 8 - %35 = load i32, i32* %matrix_dim.addr, align 4 - %36 = load i32, i32* %i, align 4 - call void @_Z13lud_perimeterPfii(float* %34, i32 %35, i32 %36) - br label %kcall.end12 - -kcall.end12: ; preds = %kcall.configok11, %kcall.end - %call13 = call i32 @cudaDeviceSynchronize() - %37 = load i32, i32* %matrix_dim.addr, align 4 - %38 = load i32, i32* %i, align 4 - %sub14 = sub nsw i32 %37, %38 - %div15 = sdiv i32 %sub14, 16 - %sub16 = sub nsw i32 %div15, 1 - %39 = load i32, i32* %matrix_dim.addr, align 4 - %40 = load i32, i32* %i, align 4 - %sub17 = sub nsw i32 %39, %40 - %div18 = sdiv i32 %sub17, 16 - %sub19 = sub nsw i32 %div18, 1 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimGrid, i32 %sub16, i32 %sub19, i32 1) - %41 = bitcast %struct.dim3* %agg.tmp20 to i8* - %42 = bitcast %struct.dim3* %dimGrid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %41, i8* align 4 %42, i64 12, i1 false) - %43 = bitcast %struct.dim3* %agg.tmp21 to i8* - %44 = bitcast %struct.dim3* %dimBlock to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %43, i8* align 4 %44, i64 12, i1 false) - %45 = bitcast { i64, i32 }* %agg.tmp20.coerce to i8* - %46 = bitcast %struct.dim3* %agg.tmp20 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %45, i8* align 4 %46, i64 12, i1 false) - %47 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp20.coerce, i32 0, i32 0 - %48 = load i64, i64* %47, align 4 - %49 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp20.coerce, i32 0, i32 1 - %50 = load i32, i32* %49, align 4 - %51 = bitcast { i64, i32 }* %agg.tmp21.coerce to i8* - %52 = bitcast %struct.dim3* %agg.tmp21 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %51, i8* align 4 %52, i64 12, i1 false) - %53 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp21.coerce, i32 0, i32 0 - %54 = load i64, i64* %53, align 4 - %55 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp21.coerce, i32 0, i32 1 - %56 = load i32, i32* %55, align 4 - %call22 = call i32 @__cudaPushCallConfiguration(i64 %48, i32 %50, i64 %54, i32 %56, i64 0, i8* null) - %tobool23 = icmp ne i32 %call22, 0 - br i1 %tobool23, label %kcall.end25, label %kcall.configok24 - -kcall.configok24: ; preds = %kcall.end12 - %57 = load float*, float** %m.addr, align 8 - %58 = load i32, i32* %matrix_dim.addr, align 4 - %59 = load i32, i32* %i, align 4 - call void @_Z12lud_internalPfii(float* %57, i32 %58, i32 %59) - br label %kcall.end25 - -kcall.end25: ; preds = %kcall.configok24, %kcall.end12 - %call26 = call i32 @cudaDeviceSynchronize() - br label %for.inc - -for.inc: ; preds = %kcall.end25 - %60 = load i32, i32* %i, align 4 - %add = add nsw i32 %60, 16 - store i32 %add, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp27, i32 1, i32 1, i32 1) - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp28, i32 16, i32 1, i32 1) - %61 = bitcast { i64, i32 }* %agg.tmp27.coerce to i8* - %62 = bitcast %struct.dim3* %agg.tmp27 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %61, i8* align 4 %62, i64 12, i1 false) - %63 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp27.coerce, i32 0, i32 0 - %64 = load i64, i64* %63, align 4 - %65 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp27.coerce, i32 0, i32 1 - %66 = load i32, i32* %65, align 4 - %67 = bitcast { i64, i32 }* %agg.tmp28.coerce to i8* - %68 = bitcast %struct.dim3* %agg.tmp28 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %67, i8* align 4 %68, i64 12, i1 false) - %69 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp28.coerce, i32 0, i32 0 - %70 = load i64, i64* %69, align 4 - %71 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp28.coerce, i32 0, i32 1 - %72 = load i32, i32* %71, align 4 - %call29 = call i32 @__cudaPushCallConfiguration(i64 %64, i32 %66, i64 %70, i32 %72, i64 0, i8* null) - %tobool30 = icmp ne i32 %call29, 0 - br i1 %tobool30, label %kcall.end32, label %kcall.configok31 - -kcall.configok31: ; preds = %for.end - %73 = load float*, float** %m.addr, align 8 - %74 = load i32, i32* %matrix_dim.addr, align 4 - %75 = load i32, i32* %i, align 4 - call void @_Z12lud_diagonalPfii(float* %73, i32 %74, i32 %75) - br label %kcall.end32 - -kcall.end32: ; preds = %kcall.configok31, %for.end - %call33 = call i32 @cudaDeviceSynchronize() - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #2 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #3 - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #4 - -declare dso_local i32 @cudaDeviceSynchronize() #4 - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, i32, i32)* @_Z12lud_diagonalPfii to i8*), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %2 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, i32, i32)* @_Z13lud_perimeterPfii to i8*), i8* getelementptr inbounds ([22 x i8], [22 x i8]* @1, i64 0, i64 0), i8* getelementptr inbounds ([22 x i8], [22 x i8]* @1, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - %3 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (float*, i32, i32)* @_Z12lud_internalPfii to i8*), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @2, i64 0, i64 0), i8* getelementptr inbounds ([21 x i8], [21 x i8]* @2, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/lud/run.sh b/examples/lud/run.sh deleted file mode 100644 index 793ed32..0000000 --- a/examples/lud/run.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -set -e -llvm-as lud_kernel-cuda-nvptx64-nvidia-cuda-sm_50.ll -llvm-as lud_kernel-host-x86_64-unknown-linux-gnu.ll -llvm-as common-host-x86_64-unknown-linux-gnu.ll -llvm-as lud-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator lud_kernel-cuda-nvptx64-nvidia-cuda-sm_50.bc kernel.bc -../../build/compilation/hostTranslator lud_kernel-host-x86_64-unknown-linux-gnu.bc kernel_host.bc - -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj kernel_host.bc -llc --relocation-model=pic --filetype=obj lud-host-x86_64-unknown-linux-gnu.bc -o host.o -llc --relocation-model=pic --filetype=obj common-host-x86_64-unknown-linux-gnu.bc -o common.o - -g++ -Wall -L../../build/runtime \ - -L../../build/runtime/threadPool \ - -o lud_cuda -fPIC -no-pie host.o kernel_host.o kernel.o common.o -lc -lx86Runtime -lthreadPool -lpthread -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -./lud_cuda -s 256 -v > res.log -if grep -q "PASS" res.log; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/microbench/cudamemcpy_test.cc b/examples/microbench/cudamemcpy_test.cc deleted file mode 100644 index b329a32..0000000 --- a/examples/microbench/cudamemcpy_test.cc +++ /dev/null @@ -1,40 +0,0 @@ -#include - -__global__ void saxpy(int n, float a, float *x, float *y) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - if (i < n) - y[i] = a * x[i] + y[i]; -} - -int main(void) { - int N = 1 << 20; - float *x, *y, *d_x, *d_y; - x = (float *)malloc(N * sizeof(float)); - y = (float *)malloc(N * sizeof(float)); - - cudaMalloc(&d_x, N * sizeof(float)); - cudaMalloc(&d_y, N * sizeof(float)); - - for (int i = 0; i < N; i++) { - x[i] = 1.0f; - y[i] = 2.0f; - } - - cudaMemcpy(d_x, x, N * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(d_y, y, N * sizeof(float), cudaMemcpyHostToDevice); - - // Perform SAXPY on 1M elements - // saxpy<<<(N+255)/256, 256>>>(N, 2.0f, d_x, d_y); - - cudaMemcpy(y, d_y, N * sizeof(float), cudaMemcpyDeviceToHost); - - float maxError = 0.0f; - for (int i = 0; i < N; i++) - maxError = max(maxError, abs(y[i] - 4.0f)); - printf("Max error: %f\n", maxError); - - cudaFree(d_x); - cudaFree(d_y); - free(x); - free(y); -} diff --git a/examples/microbench/dummy_kernel.cc b/examples/microbench/dummy_kernel.cc deleted file mode 100644 index d9f8673..0000000 --- a/examples/microbench/dummy_kernel.cc +++ /dev/null @@ -1,39 +0,0 @@ -#include - -__global__ void saxpy(void) { - int i = blockIdx.x * blockDim.x + threadIdx.x; - printf("block_id:%d thread_id:%d \n", i) -} - -int main(void) { - int N = 1 << 20; - float *x, *y, *d_x, *d_y; - x = (float *)malloc(N * sizeof(float)); - y = (float *)malloc(N * sizeof(float)); - - cudaMalloc(&d_x, N * sizeof(float)); - cudaMalloc(&d_y, N * sizeof(float)); - - for (int i = 0; i < N; i++) { - x[i] = 1.0f; - y[i] = 2.0f; - } - - cudaMemcpy(d_x, x, N * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(d_y, y, N * sizeof(float), cudaMemcpyHostToDevice); - - // Perform SAXPY on 1M elements - saxpy<<<(1, 1)>>>; - - cudaMemcpy(y, d_y, N * sizeof(float), cudaMemcpyDeviceToHost); - - float maxError = 0.0f; - for (int i = 0; i < N; i++) - maxError = max(maxError, abs(y[i] - 4.0f)); - printf("Max error: %f\n", maxError); - - cudaFree(d_x); - cudaFree(d_y); - free(x); - free(y); -} diff --git a/examples/microbench/kerne_arg.cc b/examples/microbench/kerne_arg.cc deleted file mode 100644 index ce91e63..0000000 --- a/examples/microbench/kerne_arg.cc +++ /dev/null @@ -1,36 +0,0 @@ -#include - -__global__ void saxpy(int N) { printf("hello!: %d\n", N); } - -int main(void) { - int N = 1 << 20; - float *x, *y, *d_x, *d_y; - x = (float *)malloc(N * sizeof(float)); - y = (float *)malloc(N * sizeof(float)); - - cudaMalloc(&d_x, N * sizeof(float)); - cudaMalloc(&d_y, N * sizeof(float)); - - for (int i = 0; i < N; i++) { - x[i] = 1.0f; - y[i] = 2.0f; - } - - cudaMemcpy(d_x, x, N * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(d_y, y, N * sizeof(float), cudaMemcpyHostToDevice); - - // Perform SAXPY on 1M elements - saxpy<<<(1, 1)>>>(N); - - cudaMemcpy(y, d_y, N * sizeof(float), cudaMemcpyDeviceToHost); - - float maxError = 0.0f; - for (int i = 0; i < N; i++) - maxError = max(maxError, abs(y[i] - 4.0f)); - printf("Max error: %f\n", maxError); - - cudaFree(d_x); - cudaFree(d_y); - free(x); - free(y); -} diff --git a/examples/microbench/one_thread_kernel.cc b/examples/microbench/one_thread_kernel.cc deleted file mode 100644 index 6df23f0..0000000 --- a/examples/microbench/one_thread_kernel.cc +++ /dev/null @@ -1,36 +0,0 @@ -#include - -__global__ void saxpy(void) { printf("hello!\n"); } - -int main(void) { - int N = 1 << 20; - float *x, *y, *d_x, *d_y; - x = (float *)malloc(N * sizeof(float)); - y = (float *)malloc(N * sizeof(float)); - - cudaMalloc(&d_x, N * sizeof(float)); - cudaMalloc(&d_y, N * sizeof(float)); - - for (int i = 0; i < N; i++) { - x[i] = 1.0f; - y[i] = 2.0f; - } - - cudaMemcpy(d_x, x, N * sizeof(float), cudaMemcpyHostToDevice); - cudaMemcpy(d_y, y, N * sizeof(float), cudaMemcpyHostToDevice); - - // Perform SAXPY on 1M elements - saxpy<<<(1, 1)>>>; - - cudaMemcpy(y, d_y, N * sizeof(float), cudaMemcpyDeviceToHost); - - float maxError = 0.0f; - for (int i = 0; i < N; i++) - maxError = max(maxError, abs(y[i] - 4.0f)); - printf("Max error: %f\n", maxError); - - cudaFree(d_x); - cudaFree(d_y); - free(x); - free(y); -} diff --git a/examples/myocyte/run.sh b/examples/myocyte/run.sh deleted file mode 100644 index 2edff3e..0000000 --- a/examples/myocyte/run.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -set -e -llvm-as main-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as main-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator main-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator main-host-x86_64-unknown-linux-gnu.bc host.bc - -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - -g++ -Wall -L../../build/runtime -L../../build/runtime/threadPool \ - -o myocyte.out -fPIC -no-pie host.o kernel.o \ - -lc -lx86Runtime -lthreadPool -lpthread -lm - -./myocyte.out 100 1 0 -if grep -q "1.3705539" output.txt; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/nn/filelist_4 b/examples/nn/filelist_4 deleted file mode 100644 index 23ef193..0000000 --- a/examples/nn/filelist_4 +++ /dev/null @@ -1,4 +0,0 @@ -../../rodinia-data/nn/cane4_0.db -../../rodinia-data/nn/cane4_1.db -../../rodinia-data/nn/cane4_2.db -../../rodinia-data/nn/cane4_3.db diff --git a/examples/nn/nn_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/nn/nn_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 761c56c..0000000 --- a/examples/nn/nn_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,271 +0,0 @@ -; ModuleID = 'nn_cuda-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "nn_cuda.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_blockDim_t = type { i8 } -%struct.__cuda_builtin_gridDim_t = type { i8 } -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } -%struct.latLong = type { float, float } - -$_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv = comdat any - -$_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -@blockDim = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockDim_t, align 1 -@gridDim = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_gridDim_t, align 1 -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z6euclidP7latLongPfiff(%struct.latLong* %d_locations, float* %d_distances, i32 %numRecords, float %lat, float %lng) #0 { -entry: - %d_locations.addr = alloca %struct.latLong*, align 8 - %d_distances.addr = alloca float*, align 8 - %numRecords.addr = alloca i32, align 4 - %lat.addr = alloca float, align 4 - %lng.addr = alloca float, align 4 - %globalId = alloca i32, align 4 - %latLong = alloca %struct.latLong*, align 8 - %dist = alloca float*, align 8 - store %struct.latLong* %d_locations, %struct.latLong** %d_locations.addr, align 8 - store float* %d_distances, float** %d_distances.addr, align 8 - store i32 %numRecords, i32* %numRecords.addr, align 4 - store float %lat, float* %lat.addr, align 4 - store float %lng, float* %lng.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #4 - %call1 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #4 - %call2 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #4 - %mul = mul i32 %call1, %call2 - %call3 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #4 - %add = add i32 %mul, %call3 - %mul4 = mul i32 %call, %add - %call5 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #4 - %add6 = add i32 %mul4, %call5 - store i32 %add6, i32* %globalId, align 4 - %0 = load %struct.latLong*, %struct.latLong** %d_locations.addr, align 8 - %1 = load i32, i32* %globalId, align 4 - %idx.ext = sext i32 %1 to i64 - %add.ptr = getelementptr inbounds %struct.latLong, %struct.latLong* %0, i64 %idx.ext - store %struct.latLong* %add.ptr, %struct.latLong** %latLong, align 8 - %2 = load i32, i32* %globalId, align 4 - %3 = load i32, i32* %numRecords.addr, align 4 - %cmp = icmp slt i32 %2, %3 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %4 = load float*, float** %d_distances.addr, align 8 - %5 = load i32, i32* %globalId, align 4 - %idx.ext7 = sext i32 %5 to i64 - %add.ptr8 = getelementptr inbounds float, float* %4, i64 %idx.ext7 - store float* %add.ptr8, float** %dist, align 8 - %6 = load float, float* %lat.addr, align 4 - %7 = load %struct.latLong*, %struct.latLong** %latLong, align 8 - %lat9 = getelementptr inbounds %struct.latLong, %struct.latLong* %7, i32 0, i32 0 - %8 = load float, float* %lat9, align 4 - %sub = fsub contract float %6, %8 - %9 = load float, float* %lat.addr, align 4 - %10 = load %struct.latLong*, %struct.latLong** %latLong, align 8 - %lat10 = getelementptr inbounds %struct.latLong, %struct.latLong* %10, i32 0, i32 0 - %11 = load float, float* %lat10, align 4 - %sub11 = fsub contract float %9, %11 - %mul12 = fmul contract float %sub, %sub11 - %12 = load float, float* %lng.addr, align 4 - %13 = load %struct.latLong*, %struct.latLong** %latLong, align 8 - %lng13 = getelementptr inbounds %struct.latLong, %struct.latLong* %13, i32 0, i32 1 - %14 = load float, float* %lng13, align 4 - %sub14 = fsub contract float %12, %14 - %15 = load float, float* %lng.addr, align 4 - %16 = load %struct.latLong*, %struct.latLong** %latLong, align 8 - %lng15 = getelementptr inbounds %struct.latLong, %struct.latLong* %16, i32 0, i32 1 - %17 = load float, float* %lng15, align 4 - %sub16 = fsub contract float %15, %17 - %mul17 = fmul contract float %sub14, %sub16 - %add18 = fadd contract float %mul12, %mul17 - %call19 = call float @_ZL4sqrtf(float %add18) #4 - %18 = load float*, float** %dist, align 8 - store float %call19, float* %18, align 4 - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define internal float @_ZL4sqrtf(float %__x) #1 { -entry: - %__x.addr = alloca float, align 4 - store float %__x, float* %__x.addr, align 4 - %0 = load float, float* %__x.addr, align 4 - %call = call float @_ZL5sqrtff(float %0) #4 - ret float %call -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2 - -; Function Attrs: alwaysinline convergent nounwind -define internal float @_ZL5sqrtff(float %__a) #1 { -entry: - %__a.addr = alloca float, align 4 - store float %__a, float* %__a.addr, align 4 - %0 = load float, float* %__a.addr, align 4 - %call = call float @__nv_sqrtf(float %0) #4 - ret float %call -} - -; Function Attrs: alwaysinline convergent inlinehint nounwind -define internal float @__nv_sqrtf(float %x) #3 { - %1 = call float @llvm.nvvm.sqrt.f(float %x) - ret float %1 -} - -; Function Attrs: nounwind readnone -declare float @llvm.nvvm.sqrt.f(float) #2 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } -attributes #3 = { alwaysinline convergent inlinehint nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { convergent nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !4, !6, !6, !6, !6, !7, !7, !6} -!llvm.ident = !{!8} -!nvvmir.version = !{!9} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (%struct.latLong*, float*, i32, float, float)* @_Z6euclidP7latLongPfiff, !"kernel", i32 1} -!4 = !{null, !"align", i32 8} -!5 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!6 = !{null, !"align", i32 16} -!7 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!8 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!9 = !{i32 1, i32 4} diff --git a/examples/nn/nn_cuda-host-x86_64-unknown-linux-gnu.ll b/examples/nn/nn_cuda-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 785a93f..0000000 --- a/examples/nn/nn_cuda-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,3691 +0,0 @@ -; ModuleID = 'nn_cuda-host-x86_64-unknown-linux-gnu.bc' -source_filename = "nn_cuda.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.latLong = type { float, float } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque -%"class.std::vector" = type { %"struct.std::_Vector_base" } -%"struct.std::_Vector_base" = type { %"struct.std::_Vector_base >::_Vector_impl" } -%"struct.std::_Vector_base >::_Vector_impl" = type { %struct.record*, %struct.record*, %struct.record* } -%struct.record = type { [53 x i8], float } -%"class.std::vector.0" = type { %"struct.std::_Vector_base.1" } -%"struct.std::_Vector_base.1" = type { %"struct.std::_Vector_base >::_Vector_impl" } -%"struct.std::_Vector_base >::_Vector_impl" = type { %struct.latLong*, %struct.latLong*, %struct.latLong* } -%struct.cudaDeviceProp = type { [256 x i8], %struct.CUuuid_st, [8 x i8], i32, i64, i64, i32, i32, i64, i32, [3 x i32], [3 x i32], i32, i64, i32, i32, i64, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], [2 x i32], [3 x i32], [2 x i32], [3 x i32], [3 x i32], i32, [2 x i32], [3 x i32], [2 x i32], i32, [2 x i32], [3 x i32], [2 x i32], [3 x i32], i32, [2 x i32], i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i32, i32 } -%struct.CUuuid_st = type { [16 x i8] } -%"class.std::allocator.2" = type { i8 } -%"class.std::allocator" = type { i8 } -%"class.__gnu_cxx::__normal_iterator" = type { %struct.latLong* } -%"class.__gnu_cxx::__normal_iterator.5" = type { %struct.record* } -%"class.__gnu_cxx::new_allocator" = type { i8 } -%"class.__gnu_cxx::new_allocator.3" = type { i8 } - -$_ZNSt6vectorI6recordSaIS0_EEC2Ev = comdat any - -$_ZNSt6vectorI7latLongSaIS0_EEC2Ev = comdat any - -$_ZN4dim3C2Ejjj = comdat any - -$_ZNSt6vectorI7latLongSaIS0_EEixEm = comdat any - -$_ZNSt6vectorI6recordSaIS0_EEixEm = comdat any - -$_ZNSt6vectorI7latLongSaIS0_EED2Ev = comdat any - -$__clang_call_terminate = comdat any - -$_ZNSt6vectorI6recordSaIS0_EED2Ev = comdat any - -$_ZNSt6vectorI7latLongSaIS0_EE9push_backERKS0_ = comdat any - -$_ZNSt6vectorI6recordSaIS0_EE9push_backERKS0_ = comdat any - -$_ZNSt12_Vector_baseI6recordSaIS0_EEC2Ev = comdat any - -$_ZNSt12_Vector_baseI6recordSaIS0_EE12_Vector_implC2Ev = comdat any - -$_ZNSaI6recordEC2Ev = comdat any - -$_ZN9__gnu_cxx13new_allocatorI6recordEC2Ev = comdat any - -$_ZSt8_DestroyIP6recordS0_EvT_S2_RSaIT0_E = comdat any - -$_ZNSt12_Vector_baseI6recordSaIS0_EE19_M_get_Tp_allocatorEv = comdat any - -$_ZNSt12_Vector_baseI6recordSaIS0_EED2Ev = comdat any - -$_ZSt8_DestroyIP6recordEvT_S2_ = comdat any - -$_ZNSt12_Destroy_auxILb1EE9__destroyIP6recordEEvT_S4_ = comdat any - -$_ZNSt12_Vector_baseI6recordSaIS0_EE13_M_deallocateEPS0_m = comdat any - -$_ZNSt12_Vector_baseI6recordSaIS0_EE12_Vector_implD2Ev = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE10deallocateERS2_PS1_m = comdat any - -$_ZN9__gnu_cxx13new_allocatorI6recordE10deallocateEPS1_m = comdat any - -$_ZNSaI6recordED2Ev = comdat any - -$_ZN9__gnu_cxx13new_allocatorI6recordED2Ev = comdat any - -$_ZNSt12_Vector_baseI7latLongSaIS0_EEC2Ev = comdat any - -$_ZNSt12_Vector_baseI7latLongSaIS0_EE12_Vector_implC2Ev = comdat any - -$_ZNSaI7latLongEC2Ev = comdat any - -$_ZN9__gnu_cxx13new_allocatorI7latLongEC2Ev = comdat any - -$_ZSt8_DestroyIP7latLongS0_EvT_S2_RSaIT0_E = comdat any - -$_ZNSt12_Vector_baseI7latLongSaIS0_EE19_M_get_Tp_allocatorEv = comdat any - -$_ZNSt12_Vector_baseI7latLongSaIS0_EED2Ev = comdat any - -$_ZSt8_DestroyIP7latLongEvT_S2_ = comdat any - -$_ZNSt12_Destroy_auxILb1EE9__destroyIP7latLongEEvT_S4_ = comdat any - -$_ZNSt12_Vector_baseI7latLongSaIS0_EE13_M_deallocateEPS0_m = comdat any - -$_ZNSt12_Vector_baseI7latLongSaIS0_EE12_Vector_implD2Ev = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE10deallocateERS2_PS1_m = comdat any - -$_ZN9__gnu_cxx13new_allocatorI7latLongE10deallocateEPS1_m = comdat any - -$_ZNSaI7latLongED2Ev = comdat any - -$_ZN9__gnu_cxx13new_allocatorI7latLongED2Ev = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE9constructIS1_EEvRS2_PS1_RKT_ = comdat any - -$_ZNSt6vectorI7latLongSaIS0_EE17_M_realloc_insertEN9__gnu_cxx17__normal_iteratorIPS0_S2_EERKS0_ = comdat any - -$_ZNSt6vectorI7latLongSaIS0_EE3endEv = comdat any - -$_ZN9__gnu_cxx13new_allocatorI7latLongE9constructEPS1_RKS1_ = comdat any - -$_ZNKSt6vectorI7latLongSaIS0_EE12_M_check_lenEmPKc = comdat any - -$_ZN9__gnu_cxxmiIP7latLongSt6vectorIS1_SaIS1_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKS9_SC_ = comdat any - -$_ZNSt6vectorI7latLongSaIS0_EE5beginEv = comdat any - -$_ZNSt12_Vector_baseI7latLongSaIS0_EE11_M_allocateEm = comdat any - -$_ZSt34__uninitialized_move_if_noexcept_aIP7latLongS1_SaIS0_EET0_T_S4_S3_RT1_ = comdat any - -$_ZNK9__gnu_cxx17__normal_iteratorIP7latLongSt6vectorIS1_SaIS1_EEE4baseEv = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE7destroyERS2_PS1_ = comdat any - -$_ZNKSt6vectorI7latLongSaIS0_EE8max_sizeEv = comdat any - -$_ZNKSt6vectorI7latLongSaIS0_EE4sizeEv = comdat any - -$_ZSt3maxImERKT_S2_S2_ = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE8max_sizeERKS2_ = comdat any - -$_ZNKSt12_Vector_baseI7latLongSaIS0_EE19_M_get_Tp_allocatorEv = comdat any - -$_ZNK9__gnu_cxx13new_allocatorI7latLongE8max_sizeEv = comdat any - -$_ZN9__gnu_cxx17__normal_iteratorIP7latLongSt6vectorIS1_SaIS1_EEEC2ERKS2_ = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE8allocateERS2_m = comdat any - -$_ZN9__gnu_cxx13new_allocatorI7latLongE8allocateEmPKv = comdat any - -$_ZSt22__uninitialized_copy_aIP7latLongS1_S0_ET0_T_S3_S2_RSaIT1_E = comdat any - -$_ZSt18uninitialized_copyIP7latLongS1_ET0_T_S3_S2_ = comdat any - -$_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIP7latLongS3_EET0_T_S5_S4_ = comdat any - -$_ZSt4copyIP7latLongS1_ET0_T_S3_S2_ = comdat any - -$_ZSt14__copy_move_a2ILb0EP7latLongS1_ET1_T0_S3_S2_ = comdat any - -$_ZSt12__miter_baseIP7latLongET_S2_ = comdat any - -$_ZSt13__copy_move_aILb0EP7latLongS1_ET1_T0_S3_S2_ = comdat any - -$_ZSt12__niter_baseIP7latLongET_S2_ = comdat any - -$_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mI7latLongEEPT_PKS4_S7_S5_ = comdat any - -$_ZN9__gnu_cxx13new_allocatorI7latLongE7destroyEPS1_ = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE9constructIS1_EEvRS2_PS1_RKT_ = comdat any - -$_ZNSt6vectorI6recordSaIS0_EE17_M_realloc_insertEN9__gnu_cxx17__normal_iteratorIPS0_S2_EERKS0_ = comdat any - -$_ZNSt6vectorI6recordSaIS0_EE3endEv = comdat any - -$_ZN9__gnu_cxx13new_allocatorI6recordE9constructEPS1_RKS1_ = comdat any - -$_ZNKSt6vectorI6recordSaIS0_EE12_M_check_lenEmPKc = comdat any - -$_ZN9__gnu_cxxmiIP6recordSt6vectorIS1_SaIS1_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKS9_SC_ = comdat any - -$_ZNSt6vectorI6recordSaIS0_EE5beginEv = comdat any - -$_ZNSt12_Vector_baseI6recordSaIS0_EE11_M_allocateEm = comdat any - -$_ZSt34__uninitialized_move_if_noexcept_aIP6recordS1_SaIS0_EET0_T_S4_S3_RT1_ = comdat any - -$_ZNK9__gnu_cxx17__normal_iteratorIP6recordSt6vectorIS1_SaIS1_EEE4baseEv = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE7destroyERS2_PS1_ = comdat any - -$_ZNKSt6vectorI6recordSaIS0_EE8max_sizeEv = comdat any - -$_ZNKSt6vectorI6recordSaIS0_EE4sizeEv = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE8max_sizeERKS2_ = comdat any - -$_ZNKSt12_Vector_baseI6recordSaIS0_EE19_M_get_Tp_allocatorEv = comdat any - -$_ZNK9__gnu_cxx13new_allocatorI6recordE8max_sizeEv = comdat any - -$_ZN9__gnu_cxx17__normal_iteratorIP6recordSt6vectorIS1_SaIS1_EEEC2ERKS2_ = comdat any - -$_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE8allocateERS2_m = comdat any - -$_ZN9__gnu_cxx13new_allocatorI6recordE8allocateEmPKv = comdat any - -$_ZSt22__uninitialized_copy_aIP6recordS1_S0_ET0_T_S3_S2_RSaIT1_E = comdat any - -$_ZSt18uninitialized_copyIP6recordS1_ET0_T_S3_S2_ = comdat any - -$_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIP6recordS3_EET0_T_S5_S4_ = comdat any - -$_ZSt4copyIP6recordS1_ET0_T_S3_S2_ = comdat any - -$_ZSt14__copy_move_a2ILb0EP6recordS1_ET1_T0_S3_S2_ = comdat any - -$_ZSt12__miter_baseIP6recordET_S2_ = comdat any - -$_ZSt13__copy_move_aILb0EP6recordS1_ET1_T0_S3_S2_ = comdat any - -$_ZSt12__niter_baseIP6recordET_S2_ = comdat any - -$_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mI6recordEEPT_PKS4_S7_S5_ = comdat any - -$_ZN9__gnu_cxx13new_allocatorI6recordE7destroyEPS1_ = comdat any - -@.str = private unnamed_addr constant [12 x i8] c"before all\0A\00", align 1 -@.str.1 = private unnamed_addr constant [18 x i8] c"after before all\0A\00", align 1 -@.str.2 = private unnamed_addr constant [13 x i8] c"before call\0A\00", align 1 -@.str.3 = private unnamed_addr constant [12 x i8] c"after call\0A\00", align 1 -@.str.4 = private unnamed_addr constant [13 x i8] c"before find\0A\00", align 1 -@.str.5 = private unnamed_addr constant [12 x i8] c"after find\0A\00", align 1 -@.str.6 = private unnamed_addr constant [20 x i8] c"%s --> Distance=%f\0A\00", align 1 -@.str.7 = private unnamed_addr constant [2 x i8] c"r\00", align 1 -@.str.8 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str.9 = private unnamed_addr constant [24 x i8] c"error reading filelist\0A\00", align 1 -@.str.10 = private unnamed_addr constant [20 x i8] c"error opening a db\0A\00", align 1 -@.str.11 = private unnamed_addr constant [24 x i8] c"Nearest Neighbor Usage\0A\00", align 1 -@.str.12 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 -@.str.13 = private unnamed_addr constant [90 x i8] c"nearestNeighbor [filename] -r [int] -lat [float] -lng [float] [-hqt] [-p [int] -d [int]]\0A\00", align 1 -@.str.14 = private unnamed_addr constant [10 x i8] c"example:\0A\00", align 1 -@.str.15 = private unnamed_addr constant [55 x i8] c"$ ./nearestNeighbor filelist.txt -r 5 -lat 30 -lng 90\0A\00", align 1 -@.str.16 = private unnamed_addr constant [59 x i8] c"filename the filename that lists the data input files\0A\00", align 1 -@.str.17 = private unnamed_addr constant [60 x i8] c"-r [int] the number of records to return (default: 10)\0A\00", align 1 -@.str.18 = private unnamed_addr constant [62 x i8] c"-lat [float] the latitude for nearest neighbors (default: 0)\0A\00", align 1 -@.str.19 = private unnamed_addr constant [63 x i8] c"-lng [float] the longitude for nearest neighbors (default: 0)\0A\00", align 1 -@.str.20 = private unnamed_addr constant [36 x i8] c"-h, --help Display the help file\0A\00", align 1 -@.str.21 = private unnamed_addr constant [52 x i8] c"-q Quiet mode. Suppress all text output.\0A\00", align 1 -@.str.22 = private unnamed_addr constant [40 x i8] c"-t Print timing information.\0A\00", align 1 -@.str.23 = private unnamed_addr constant [73 x i8] c"-p [int] Choose the platform (must choose both platform and device)\0A\00", align 1 -@.str.24 = private unnamed_addr constant [71 x i8] c"-d [int] Choose the device (must choose both platform and device)\0A\00", align 1 -@.str.25 = private unnamed_addr constant [60 x i8] c"Notes: 1. The filename is required as the first parameter.\0A\00", align 1 -@.str.26 = private unnamed_addr constant [61 x i8] c" 2. If you declare either the device or the platform,\0A\00", align 1 -@.str.27 = private unnamed_addr constant [35 x i8] c" you must declare both.\0A\0A\00", align 1 -@.str.28 = private unnamed_addr constant [26 x i8] c"vector::_M_realloc_insert\00", align 1 -@0 = private unnamed_addr constant [24 x i8] c"_Z6euclidP7latLongPfiff\00", align 1 -@1 = private constant [8313 x i8] c"P\EDU\BA\01\00\10\00h \00\00\00\00\00\00\02\00\01\01@\00\00\00\A8\19\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\00\19\00\00\00\00\00\00\C0\16\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\09\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z6euclidP7latLongPfiff\00.nv.info._Z6euclidP7latLongPfiff\00.nv.shared._Z6euclidP7latLongPfiff\00.nv.global\00.nv.constant0._Z6euclidP7latLongPfiff\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z6euclidP7latLongPfiff\00.text._Z6euclidP7latLongPfiff\00.nv.info._Z6euclidP7latLongPfiff\00.nv.shared._Z6euclidP7latLongPfiff\00.nv.global\00blockDim\00gridDim\00blockIdx\00threadIdx\00$_Z6euclidP7latLongPfiff$__cuda_sm20_sqrt_rn_f32_slowpath\00.nv.constant0._Z6euclidP7latLongPfiff\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00J\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\AC\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\B7\00\00\00\01\00\08\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\C0\00\00\00\01\00\08\00\03\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\C8\00\00\00\01\00\08\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\D1\00\00\00\01\00\08\00\02\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\DB\00\00\00\22\00\07\00\D0\0E\00\00\00\00\00\00p\02\00\00\00\00\00\00\15\01\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00@\11\00\00\00\00\00\00\04/\08\00\09\00\00\00\0D\00\00\00\04#\08\00\07\00\00\00\00\00\00\00\04\12\08\00\07\00\00\00\00\00\00\00\04\11\08\00\07\00\00\00\00\00\00\00\04#\08\00\09\00\00\00\00\00\00\00\04\12\08\00\09\00\00\008\00\00\00\04\11\08\00\09\00\00\008\00\00\00\010\00\00\01*\00\00\04\0A\08\00\08\00\00\00@\01\1C\00\03\19\1C\00\04\17\0C\00\00\00\00\00\04\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\14\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00\18\04\00\008\04\00\00\04\1C\04\00\C8\0E\00\00\04\1E\04\00\90\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F6\17visible .entry _Z6euclidP7latLongPfiff\A0\04\00\9A\00\0F%\00\04\0E\81\04\0F-\00\0F\07b\04\00\C6\00\0F-\00\0B\07C\04\1Ff-\00\0E\1F3-\00\12\0F\05\04\1B?6[5\F1\0C\16wpred %p\D7\0A\00\87\00k%f<14>)\04\1E1r\08/17+\04\0C\1F6+\04\12\02s\00\00-\03\0F\00\01\0C\1D]5\00\1F15\00\0E\0F\EC\03\00\0F5\00\0F\0F\CD\03\01\0F\A0\00\0F\0F\AE\03\01\0Fl\00\0F#0]%\01#to\99\13\07\E8\04\02\82\03\01[\0E\0A\1C\00\144q\03\0F;\00\03\145\D3\03\0F;\00\00\116\1C\00\1F5\EF\03\02\1A6\16\00\03\EF\03*d4\D9\03'24\06\04\15f\16\00\01D\00\1Bf\16\00\02\05\04+f2\DB\08{%ntid.x\1B\04\\%ncta\18\00\00\EB\00\02\17\00\B1y;\0Amul.lo.s\1A\00#5,7\00(r41\00\1561\00cx;\0Aadd.\00$7,3\00\1B6H\00#8,\95\00(r7H\00\\9, %tF\00410,2\00\1B9\EF\04\03\1C\05\110\06\02\03m\01$7,`\01\01\16\00\02D\005d8,3\00T;\0AshlR\03#9,\1E\00\133s\00\03E\02#0,L\00\00$\00\0A\A3\01\144\BF\05\03w\00\02\E2\02\181a\00\06\17\00%2,\C1\01\92;\0Asetp.ge\94\003p1,8\00\00'\00\F2\0B;\0A@%p1 bra LBB6_2;\0Abra.uni\10\0021;\0A\08\00\17:\EB\00\05u\00)16\ED\00/12\EE\00\04413, \00\1A2\F0\00$4,Q\00\01'\00\0B\F2\00\03\AC\02!14f\00\02\09\04%3,\85\02\08\7F\01515,'\01\07.\00\104\16\00\00\1E\00\00\91\00Sub.rn\19\00\225,L\00(%f]\00%6,\CC\02\07\16\00\147E\00,+4G\00\228,6\002%f7k\02\05\1A\00$9,\1F\00f8;\0Afma\1A\00\01\E5\01%f5\05\00\1A9H\03\124]\03\170\81\00\05q\01*4],\00\120,\00\181,\00\04\85\01\01\FE\077qrty\00\01\F0\00)128\01%6,m\01\07\\\00\22rd\12\04;f13\12\02\132\12\02\B02:\0Aret;\0A\0A}\0A\00\00\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([8313 x i8], [8313 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z6euclidP7latLongPfiff(%struct.latLong* %d_locations, float* %d_distances, i32 %numRecords, float %lat, float %lng) #0 { -entry: - %d_locations.addr = alloca %struct.latLong*, align 8 - %d_distances.addr = alloca float*, align 8 - %numRecords.addr = alloca i32, align 4 - %lat.addr = alloca float, align 4 - %lng.addr = alloca float, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store %struct.latLong* %d_locations, %struct.latLong** %d_locations.addr, align 8 - store float* %d_distances, float** %d_distances.addr, align 8 - store i32 %numRecords, i32* %numRecords.addr, align 4 - store float %lat, float* %lat.addr, align 4 - store float %lng, float* %lng.addr, align 4 - %kernel_args = alloca i8*, i64 5, align 16 - %0 = bitcast %struct.latLong** %d_locations.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast float** %d_distances.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32* %numRecords.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast float* %lat.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast float* %lng.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %11 = load i64, i64* %shmem_size, align 8 - %12 = load i8*, i8** %stream, align 8 - %13 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %14 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %13, i8* align 8 %14, i64 12, i1 false) - %15 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %16 = load i64, i64* %15, align 8 - %17 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %18 = load i32, i32* %17, align 8 - %19 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %20 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %19, i8* align 8 %20, i64 12, i1 false) - %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %22 = load i64, i64* %21, align 8 - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %24 = load i32, i32* %23, align 8 - %25 = bitcast i8* %12 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (%struct.latLong*, float*, i32, float, float)* @_Z6euclidP7latLongPfiff to i8*), i64 %16, i32 %18, i64 %22, i32 %24, i8** %kernel_args, i64 %11, %struct.CUstream_st* %25) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %i = alloca i32, align 4 - %lat = alloca float, align 4 - %lng = alloca float, align 4 - %quiet = alloca i32, align 4 - %timing = alloca i32, align 4 - %platform = alloca i32, align 4 - %device = alloca i32, align 4 - %records = alloca %"class.std::vector", align 8 - %locations = alloca %"class.std::vector.0", align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %filename = alloca [100 x i8], align 16 - %resultsCount = alloca i32, align 4 - %cleanup.dest.slot = alloca i32, align 4 - %numRecords = alloca i32, align 4 - %distances = alloca float*, align 8 - %d_locations = alloca %struct.latLong*, align 8 - %d_distances = alloca float*, align 8 - %deviceProp = alloca %struct.cudaDeviceProp, align 8 - %maxGridX = alloca i64, align 8 - %threadsPerBlock = alloca i64, align 8 - %totalDeviceMemory = alloca i64, align 8 - %freeDeviceMemory = alloca i64, align 8 - %blocks = alloca i64, align 8 - %gridY = alloca i64, align 8 - %gridX = alloca i64, align 8 - %gridDim = alloca %struct.dim3, align 4 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp46 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp46.coerce = alloca { i64, i32 }, align 4 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call i32 @cudaSetDevice(i32 0) - store i32 0, i32* %i, align 4 - store i32 0, i32* %quiet, align 4 - store i32 0, i32* %timing, align 4 - store i32 0, i32* %platform, align 4 - store i32 0, i32* %device, align 4 - call void @_ZNSt6vectorI6recordSaIS0_EEC2Ev(%"class.std::vector"* %records) - invoke void @_ZNSt6vectorI7latLongSaIS0_EEC2Ev(%"class.std::vector.0"* %locations) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - store i32 10, i32* %resultsCount, align 4 - %0 = load i32, i32* %argc.addr, align 4 - %1 = load i8**, i8*** %argv.addr, align 8 - %arraydecay = getelementptr inbounds [100 x i8], [100 x i8]* %filename, i64 0, i64 0 - %call3 = invoke i32 @_Z16parseCommandlineiPPcS_PiPfS2_S1_S1_S1_S1_(i32 %0, i8** %1, i8* %arraydecay, i32* %resultsCount, float* %lat, float* %lng, i32* %quiet, i32* %timing, i32* %platform, i32* %device) - to label %invoke.cont2 unwind label %lpad1 - -invoke.cont2: ; preds = %invoke.cont - %tobool = icmp ne i32 %call3, 0 - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %invoke.cont2 - invoke void @_Z10printUsagev() - to label %invoke.cont4 unwind label %lpad1 - -invoke.cont4: ; preds = %if.then - store i32 0, i32* %retval, align 4 - store i32 1, i32* %cleanup.dest.slot, align 4 - br label %cleanup - -lpad: ; preds = %cleanup, %entry - %2 = landingpad { i8*, i32 } - cleanup - %3 = extractvalue { i8*, i32 } %2, 0 - store i8* %3, i8** %exn.slot, align 8 - %4 = extractvalue { i8*, i32 } %2, 1 - store i32 %4, i32* %ehselector.slot, align 4 - br label %ehcleanup - -lpad1: ; preds = %invoke.cont80, %if.end79, %invoke.cont74, %invoke.cont70, %for.body, %invoke.cont63, %invoke.cont61, %invoke.cont59, %invoke.cont55, %invoke.cont53, %kcall.end, %kcall.configok, %invoke.cont48, %invoke.cont44, %invoke.cont42, %invoke.cont38, %invoke.cont36, %invoke.cont32, %invoke.cont27, %invoke.cont16, %invoke.cont14, %invoke.cont12, %if.end11, %invoke.cont5, %if.end, %if.then, %invoke.cont - %5 = landingpad { i8*, i32 } - cleanup - %6 = extractvalue { i8*, i32 } %5, 0 - store i8* %6, i8** %exn.slot, align 8 - %7 = extractvalue { i8*, i32 } %5, 1 - store i32 %7, i32* %ehselector.slot, align 4 - invoke void @_ZNSt6vectorI7latLongSaIS0_EED2Ev(%"class.std::vector.0"* %locations) - to label %invoke.cont85 unwind label %terminate.lpad - -if.end: ; preds = %invoke.cont2 - %call6 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str, i64 0, i64 0)) - to label %invoke.cont5 unwind label %lpad1 - -invoke.cont5: ; preds = %if.end - %arraydecay7 = getelementptr inbounds [100 x i8], [100 x i8]* %filename, i64 0, i64 0 - %call9 = invoke i32 @_Z8loadDataPcRSt6vectorI6recordSaIS1_EERS0_I7latLongSaIS5_EE(i8* %arraydecay7, %"class.std::vector"* dereferenceable(24) %records, %"class.std::vector.0"* dereferenceable(24) %locations) - to label %invoke.cont8 unwind label %lpad1 - -invoke.cont8: ; preds = %invoke.cont5 - store i32 %call9, i32* %numRecords, align 4 - %8 = load i32, i32* %resultsCount, align 4 - %9 = load i32, i32* %numRecords, align 4 - %cmp = icmp sgt i32 %8, %9 - br i1 %cmp, label %if.then10, label %if.end11 - -if.then10: ; preds = %invoke.cont8 - %10 = load i32, i32* %numRecords, align 4 - store i32 %10, i32* %resultsCount, align 4 - br label %if.end11 - -if.end11: ; preds = %if.then10, %invoke.cont8 - %call13 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.1, i64 0, i64 0)) - to label %invoke.cont12 unwind label %lpad1 - -invoke.cont12: ; preds = %if.end11 - %call15 = invoke i32 @cudaGetDeviceProperties(%struct.cudaDeviceProp* %deviceProp, i32 0) - to label %invoke.cont14 unwind label %lpad1 - -invoke.cont14: ; preds = %invoke.cont12 - %call17 = invoke i32 @cudaDeviceSynchronize() - to label %invoke.cont16 unwind label %lpad1 - -invoke.cont16: ; preds = %invoke.cont14 - %maxGridSize = getelementptr inbounds %struct.cudaDeviceProp, %struct.cudaDeviceProp* %deviceProp, i32 0, i32 11 - %arrayidx = getelementptr inbounds [3 x i32], [3 x i32]* %maxGridSize, i64 0, i64 0 - %11 = load i32, i32* %arrayidx, align 8 - %conv = sext i32 %11 to i64 - store i64 %conv, i64* %maxGridX, align 8 - store i64 256, i64* %threadsPerBlock, align 8 - %12 = load i32, i32* %numRecords, align 4 - %conv18 = sext i32 %12 to i64 - %13 = load i64, i64* %threadsPerBlock, align 8 - %add = add i64 %conv18, %13 - %sub = sub i64 %add, 1 - %14 = load i64, i64* %threadsPerBlock, align 8 - %div = udiv i64 %sub, %14 - store i64 %div, i64* %blocks, align 8 - %15 = load i64, i64* %blocks, align 8 - %16 = load i64, i64* %maxGridX, align 8 - %add19 = add i64 %15, %16 - %sub20 = sub i64 %add19, 1 - %17 = load i64, i64* %maxGridX, align 8 - %div21 = udiv i64 %sub20, %17 - store i64 %div21, i64* %gridY, align 8 - %18 = load i64, i64* %blocks, align 8 - %19 = load i64, i64* %gridY, align 8 - %add22 = add i64 %18, %19 - %sub23 = sub i64 %add22, 1 - %20 = load i64, i64* %gridY, align 8 - %div24 = udiv i64 %sub23, %20 - store i64 %div24, i64* %gridX, align 8 - %21 = load i64, i64* %gridX, align 8 - %conv25 = trunc i64 %21 to i32 - %22 = load i64, i64* %gridY, align 8 - %conv26 = trunc i64 %22 to i32 - invoke void @_ZN4dim3C2Ejjj(%struct.dim3* %gridDim, i32 %conv25, i32 %conv26, i32 1) - to label %invoke.cont27 unwind label %lpad1 - -invoke.cont27: ; preds = %invoke.cont16 - %23 = load i32, i32* %numRecords, align 4 - %conv28 = sext i32 %23 to i64 - %mul = mul i64 4, %conv28 - %call29 = call noalias i8* @malloc(i64 %mul) #12 - %24 = bitcast i8* %call29 to float* - store float* %24, float** %distances, align 8 - %25 = bitcast %struct.latLong** %d_locations to i8** - %26 = load i32, i32* %numRecords, align 4 - %conv30 = sext i32 %26 to i64 - %mul31 = mul i64 8, %conv30 - %call33 = invoke i32 @cudaMalloc(i8** %25, i64 %mul31) - to label %invoke.cont32 unwind label %lpad1 - -invoke.cont32: ; preds = %invoke.cont27 - %27 = bitcast float** %d_distances to i8** - %28 = load i32, i32* %numRecords, align 4 - %conv34 = sext i32 %28 to i64 - %mul35 = mul i64 4, %conv34 - %call37 = invoke i32 @cudaMalloc(i8** %27, i64 %mul35) - to label %invoke.cont36 unwind label %lpad1 - -invoke.cont36: ; preds = %invoke.cont32 - %29 = load %struct.latLong*, %struct.latLong** %d_locations, align 8 - %30 = bitcast %struct.latLong* %29 to i8* - %call39 = invoke dereferenceable(8) %struct.latLong* @_ZNSt6vectorI7latLongSaIS0_EEixEm(%"class.std::vector.0"* %locations, i64 0) - to label %invoke.cont38 unwind label %lpad1 - -invoke.cont38: ; preds = %invoke.cont36 - %31 = bitcast %struct.latLong* %call39 to i8* - %32 = load i32, i32* %numRecords, align 4 - %conv40 = sext i32 %32 to i64 - %mul41 = mul i64 8, %conv40 - %call43 = invoke i32 @cudaMemcpy(i8* %30, i8* %31, i64 %mul41, i32 1) - to label %invoke.cont42 unwind label %lpad1 - -invoke.cont42: ; preds = %invoke.cont38 - %call45 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.2, i64 0, i64 0)) - to label %invoke.cont44 unwind label %lpad1 - -invoke.cont44: ; preds = %invoke.cont42 - %33 = bitcast %struct.dim3* %agg.tmp to i8* - %34 = bitcast %struct.dim3* %gridDim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %33, i8* align 4 %34, i64 12, i1 false) - %35 = load i64, i64* %threadsPerBlock, align 8 - %conv47 = trunc i64 %35 to i32 - invoke void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp46, i32 %conv47, i32 1, i32 1) - to label %invoke.cont48 unwind label %lpad1 - -invoke.cont48: ; preds = %invoke.cont44 - %36 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %37 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %36, i8* align 4 %37, i64 12, i1 false) - %38 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %39 = load i64, i64* %38, align 4 - %40 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %41 = load i32, i32* %40, align 4 - %42 = bitcast { i64, i32 }* %agg.tmp46.coerce to i8* - %43 = bitcast %struct.dim3* %agg.tmp46 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %42, i8* align 4 %43, i64 12, i1 false) - %44 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp46.coerce, i32 0, i32 0 - %45 = load i64, i64* %44, align 4 - %46 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp46.coerce, i32 0, i32 1 - %47 = load i32, i32* %46, align 4 - %call50 = invoke i32 @__cudaPushCallConfiguration(i64 %39, i32 %41, i64 %45, i32 %47, i64 0, i8* null) - to label %invoke.cont49 unwind label %lpad1 - -invoke.cont49: ; preds = %invoke.cont48 - %tobool51 = icmp ne i32 %call50, 0 - br i1 %tobool51, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %invoke.cont49 - %48 = load %struct.latLong*, %struct.latLong** %d_locations, align 8 - %49 = load float*, float** %d_distances, align 8 - %50 = load i32, i32* %numRecords, align 4 - %51 = load float, float* %lat, align 4 - %52 = load float, float* %lng, align 4 - invoke void @_Z6euclidP7latLongPfiff(%struct.latLong* %48, float* %49, i32 %50, float %51, float %52) - to label %invoke.cont52 unwind label %lpad1 - -invoke.cont52: ; preds = %kcall.configok - br label %kcall.end - -kcall.end: ; preds = %invoke.cont52, %invoke.cont49 - %call54 = invoke i32 @cudaDeviceSynchronize() - to label %invoke.cont53 unwind label %lpad1 - -invoke.cont53: ; preds = %kcall.end - %call56 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.3, i64 0, i64 0)) - to label %invoke.cont55 unwind label %lpad1 - -invoke.cont55: ; preds = %invoke.cont53 - %53 = load float*, float** %distances, align 8 - %54 = bitcast float* %53 to i8* - %55 = load float*, float** %d_distances, align 8 - %56 = bitcast float* %55 to i8* - %57 = load i32, i32* %numRecords, align 4 - %conv57 = sext i32 %57 to i64 - %mul58 = mul i64 4, %conv57 - %call60 = invoke i32 @cudaMemcpy(i8* %54, i8* %56, i64 %mul58, i32 2) - to label %invoke.cont59 unwind label %lpad1 - -invoke.cont59: ; preds = %invoke.cont55 - %call62 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.4, i64 0, i64 0)) - to label %invoke.cont61 unwind label %lpad1 - -invoke.cont61: ; preds = %invoke.cont59 - %58 = load float*, float** %distances, align 8 - %59 = load i32, i32* %numRecords, align 4 - %60 = load i32, i32* %resultsCount, align 4 - invoke void @_Z10findLowestRSt6vectorI6recordSaIS0_EEPfii(%"class.std::vector"* dereferenceable(24) %records, float* %58, i32 %59, i32 %60) - to label %invoke.cont63 unwind label %lpad1 - -invoke.cont63: ; preds = %invoke.cont61 - %call65 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.5, i64 0, i64 0)) - to label %invoke.cont64 unwind label %lpad1 - -invoke.cont64: ; preds = %invoke.cont63 - %61 = load i32, i32* %quiet, align 4 - %tobool66 = icmp ne i32 %61, 0 - br i1 %tobool66, label %if.end79, label %if.then67 - -if.then67: ; preds = %invoke.cont64 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.then67 - %62 = load i32, i32* %i, align 4 - %63 = load i32, i32* %resultsCount, align 4 - %cmp68 = icmp slt i32 %62, %63 - br i1 %cmp68, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %64 = load i32, i32* %i, align 4 - %conv69 = sext i32 %64 to i64 - %call71 = invoke dereferenceable(60) %struct.record* @_ZNSt6vectorI6recordSaIS0_EEixEm(%"class.std::vector"* %records, i64 %conv69) - to label %invoke.cont70 unwind label %lpad1 - -invoke.cont70: ; preds = %for.body - %recString = getelementptr inbounds %struct.record, %struct.record* %call71, i32 0, i32 0 - %arraydecay72 = getelementptr inbounds [53 x i8], [53 x i8]* %recString, i64 0, i64 0 - %65 = load i32, i32* %i, align 4 - %conv73 = sext i32 %65 to i64 - %call75 = invoke dereferenceable(60) %struct.record* @_ZNSt6vectorI6recordSaIS0_EEixEm(%"class.std::vector"* %records, i64 %conv73) - to label %invoke.cont74 unwind label %lpad1 - -invoke.cont74: ; preds = %invoke.cont70 - %distance = getelementptr inbounds %struct.record, %struct.record* %call75, i32 0, i32 1 - %66 = load float, float* %distance, align 4 - %conv76 = fpext float %66 to double - %call78 = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.6, i64 0, i64 0), i8* %arraydecay72, double %conv76) - to label %invoke.cont77 unwind label %lpad1 - -invoke.cont77: ; preds = %invoke.cont74 - br label %for.inc - -for.inc: ; preds = %invoke.cont77 - %67 = load i32, i32* %i, align 4 - %inc = add nsw i32 %67, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - br label %if.end79 - -if.end79: ; preds = %for.end, %invoke.cont64 - %68 = load float*, float** %distances, align 8 - %69 = bitcast float* %68 to i8* - call void @free(i8* %69) #12 - %70 = load %struct.latLong*, %struct.latLong** %d_locations, align 8 - %71 = bitcast %struct.latLong* %70 to i8* - %call81 = invoke i32 @cudaFree(i8* %71) - to label %invoke.cont80 unwind label %lpad1 - -invoke.cont80: ; preds = %if.end79 - %72 = load float*, float** %d_distances, align 8 - %73 = bitcast float* %72 to i8* - %call83 = invoke i32 @cudaFree(i8* %73) - to label %invoke.cont82 unwind label %lpad1 - -invoke.cont82: ; preds = %invoke.cont80 - store i32 0, i32* %cleanup.dest.slot, align 4 - br label %cleanup - -cleanup: ; preds = %invoke.cont82, %invoke.cont4 - invoke void @_ZNSt6vectorI7latLongSaIS0_EED2Ev(%"class.std::vector.0"* %locations) - to label %invoke.cont84 unwind label %lpad - -invoke.cont84: ; preds = %cleanup - call void @_ZNSt6vectorI6recordSaIS0_EED2Ev(%"class.std::vector"* %records) - %cleanup.dest = load i32, i32* %cleanup.dest.slot, align 4 - switch i32 %cleanup.dest, label %unreachable [ - i32 0, label %cleanup.cont - i32 1, label %cleanup.cont - ] - -cleanup.cont: ; preds = %invoke.cont84, %invoke.cont84 - %74 = load i32, i32* %retval, align 4 - ret i32 %74 - -invoke.cont85: ; preds = %lpad1 - br label %ehcleanup - -ehcleanup: ; preds = %invoke.cont85, %lpad - invoke void @_ZNSt6vectorI6recordSaIS0_EED2Ev(%"class.std::vector"* %records) - to label %invoke.cont87 unwind label %terminate.lpad - -invoke.cont87: ; preds = %ehcleanup - br label %eh.resume - -eh.resume: ; preds = %invoke.cont87 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val88 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val88 - -terminate.lpad: ; preds = %ehcleanup, %lpad1 - %75 = landingpad { i8*, i32 } - catch i8* null - %76 = extractvalue { i8*, i32 } %75, 0 - call void @__clang_call_terminate(i8* %76) #13 - unreachable - -unreachable: ; preds = %invoke.cont84 - unreachable -} - -declare dso_local i32 @cudaSetDevice(i32) #3 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorI6recordSaIS0_EEC2Ev(%"class.std::vector"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - call void @_ZNSt12_Vector_baseI6recordSaIS0_EEC2Ev(%"struct.std::_Vector_base"* %0) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorI7latLongSaIS0_EEC2Ev(%"class.std::vector.0"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - call void @_ZNSt12_Vector_baseI7latLongSaIS0_EEC2Ev(%"struct.std::_Vector_base.1"* %0) - ret void -} - -declare dso_local i32 @__gxx_personality_v0(...) - -; Function Attrs: noinline optnone uwtable -define dso_local i32 @_Z16parseCommandlineiPPcS_PiPfS2_S1_S1_S1_S1_(i32 %argc, i8** %argv, i8* %filename, i32* %r, float* %lat, float* %lng, i32* %q, i32* %t, i32* %p, i32* %d) #0 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %filename.addr = alloca i8*, align 8 - %r.addr = alloca i32*, align 8 - %lat.addr = alloca float*, align 8 - %lng.addr = alloca float*, align 8 - %q.addr = alloca i32*, align 8 - %t.addr = alloca i32*, align 8 - %p.addr = alloca i32*, align 8 - %d.addr = alloca i32*, align 8 - %i = alloca i32, align 4 - %flag = alloca i8, align 1 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - store i8* %filename, i8** %filename.addr, align 8 - store i32* %r, i32** %r.addr, align 8 - store float* %lat, float** %lat.addr, align 8 - store float* %lng, float** %lng.addr, align 8 - store i32* %q, i32** %q.addr, align 8 - store i32* %t, i32** %t.addr, align 8 - store i32* %p, i32** %p.addr, align 8 - store i32* %d, i32** %d.addr, align 8 - %0 = load i32, i32* %argc.addr, align 4 - %cmp = icmp slt i32 %0, 2 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - store i32 1, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - %1 = load i8*, i8** %filename.addr, align 8 - %2 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %2, i64 1 - %3 = load i8*, i8** %arrayidx, align 8 - %call = call i8* @strncpy(i8* %1, i8* %3, i64 100) - store i32 1, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end - %4 = load i32, i32* %i, align 4 - %5 = load i32, i32* %argc.addr, align 4 - %cmp1 = icmp slt i32 %4, %5 - br i1 %cmp1, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %6 = load i8**, i8*** %argv.addr, align 8 - %7 = load i32, i32* %i, align 4 - %idxprom = sext i32 %7 to i64 - %arrayidx2 = getelementptr inbounds i8*, i8** %6, i64 %idxprom - %8 = load i8*, i8** %arrayidx2, align 8 - %arrayidx3 = getelementptr inbounds i8, i8* %8, i64 0 - %9 = load i8, i8* %arrayidx3, align 1 - %conv = sext i8 %9 to i32 - %cmp4 = icmp eq i32 %conv, 45 - br i1 %cmp4, label %if.then5, label %if.end44 - -if.then5: ; preds = %for.body - %10 = load i8**, i8*** %argv.addr, align 8 - %11 = load i32, i32* %i, align 4 - %idxprom6 = sext i32 %11 to i64 - %arrayidx7 = getelementptr inbounds i8*, i8** %10, i64 %idxprom6 - %12 = load i8*, i8** %arrayidx7, align 8 - %arrayidx8 = getelementptr inbounds i8, i8* %12, i64 1 - %13 = load i8, i8* %arrayidx8, align 1 - store i8 %13, i8* %flag, align 1 - %14 = load i8, i8* %flag, align 1 - %conv9 = sext i8 %14 to i32 - switch i32 %conv9, label %sw.epilog [ - i32 114, label %sw.bb - i32 108, label %sw.bb13 - i32 104, label %sw.bb31 - i32 113, label %sw.bb32 - i32 116, label %sw.bb33 - i32 112, label %sw.bb34 - i32 100, label %sw.bb39 - ] - -sw.bb: ; preds = %if.then5 - %15 = load i32, i32* %i, align 4 - %inc = add nsw i32 %15, 1 - store i32 %inc, i32* %i, align 4 - %16 = load i8**, i8*** %argv.addr, align 8 - %17 = load i32, i32* %i, align 4 - %idxprom10 = sext i32 %17 to i64 - %arrayidx11 = getelementptr inbounds i8*, i8** %16, i64 %idxprom10 - %18 = load i8*, i8** %arrayidx11, align 8 - %call12 = call i32 @atoi(i8* %18) #14 - %19 = load i32*, i32** %r.addr, align 8 - store i32 %call12, i32* %19, align 4 - br label %sw.epilog - -sw.bb13: ; preds = %if.then5 - %20 = load i8**, i8*** %argv.addr, align 8 - %21 = load i32, i32* %i, align 4 - %idxprom14 = sext i32 %21 to i64 - %arrayidx15 = getelementptr inbounds i8*, i8** %20, i64 %idxprom14 - %22 = load i8*, i8** %arrayidx15, align 8 - %arrayidx16 = getelementptr inbounds i8, i8* %22, i64 2 - %23 = load i8, i8* %arrayidx16, align 1 - %conv17 = sext i8 %23 to i32 - %cmp18 = icmp eq i32 %conv17, 97 - br i1 %cmp18, label %if.then19, label %if.else - -if.then19: ; preds = %sw.bb13 - %24 = load i8**, i8*** %argv.addr, align 8 - %25 = load i32, i32* %i, align 4 - %add = add nsw i32 %25, 1 - %idxprom20 = sext i32 %add to i64 - %arrayidx21 = getelementptr inbounds i8*, i8** %24, i64 %idxprom20 - %26 = load i8*, i8** %arrayidx21, align 8 - %call22 = call double @atof(i8* %26) #14 - %conv23 = fptrunc double %call22 to float - %27 = load float*, float** %lat.addr, align 8 - store float %conv23, float* %27, align 4 - br label %if.end29 - -if.else: ; preds = %sw.bb13 - %28 = load i8**, i8*** %argv.addr, align 8 - %29 = load i32, i32* %i, align 4 - %add24 = add nsw i32 %29, 1 - %idxprom25 = sext i32 %add24 to i64 - %arrayidx26 = getelementptr inbounds i8*, i8** %28, i64 %idxprom25 - %30 = load i8*, i8** %arrayidx26, align 8 - %call27 = call double @atof(i8* %30) #14 - %conv28 = fptrunc double %call27 to float - %31 = load float*, float** %lng.addr, align 8 - store float %conv28, float* %31, align 4 - br label %if.end29 - -if.end29: ; preds = %if.else, %if.then19 - %32 = load i32, i32* %i, align 4 - %inc30 = add nsw i32 %32, 1 - store i32 %inc30, i32* %i, align 4 - br label %sw.epilog - -sw.bb31: ; preds = %if.then5 - store i32 1, i32* %retval, align 4 - br label %return - -sw.bb32: ; preds = %if.then5 - %33 = load i32*, i32** %q.addr, align 8 - store i32 1, i32* %33, align 4 - br label %sw.epilog - -sw.bb33: ; preds = %if.then5 - %34 = load i32*, i32** %t.addr, align 8 - store i32 1, i32* %34, align 4 - br label %sw.epilog - -sw.bb34: ; preds = %if.then5 - %35 = load i32, i32* %i, align 4 - %inc35 = add nsw i32 %35, 1 - store i32 %inc35, i32* %i, align 4 - %36 = load i8**, i8*** %argv.addr, align 8 - %37 = load i32, i32* %i, align 4 - %idxprom36 = sext i32 %37 to i64 - %arrayidx37 = getelementptr inbounds i8*, i8** %36, i64 %idxprom36 - %38 = load i8*, i8** %arrayidx37, align 8 - %call38 = call i32 @atoi(i8* %38) #14 - %39 = load i32*, i32** %p.addr, align 8 - store i32 %call38, i32* %39, align 4 - br label %sw.epilog - -sw.bb39: ; preds = %if.then5 - %40 = load i32, i32* %i, align 4 - %inc40 = add nsw i32 %40, 1 - store i32 %inc40, i32* %i, align 4 - %41 = load i8**, i8*** %argv.addr, align 8 - %42 = load i32, i32* %i, align 4 - %idxprom41 = sext i32 %42 to i64 - %arrayidx42 = getelementptr inbounds i8*, i8** %41, i64 %idxprom41 - %43 = load i8*, i8** %arrayidx42, align 8 - %call43 = call i32 @atoi(i8* %43) #14 - %44 = load i32*, i32** %d.addr, align 8 - store i32 %call43, i32* %44, align 4 - br label %sw.epilog - -sw.epilog: ; preds = %sw.bb39, %sw.bb34, %sw.bb33, %sw.bb32, %if.end29, %sw.bb, %if.then5 - br label %if.end44 - -if.end44: ; preds = %sw.epilog, %for.body - br label %for.inc - -for.inc: ; preds = %if.end44 - %45 = load i32, i32* %i, align 4 - %inc45 = add nsw i32 %45, 1 - store i32 %inc45, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %46 = load i32*, i32** %d.addr, align 8 - %47 = load i32, i32* %46, align 4 - %cmp46 = icmp sge i32 %47, 0 - br i1 %cmp46, label %land.lhs.true, label %lor.lhs.false - -land.lhs.true: ; preds = %for.end - %48 = load i32*, i32** %p.addr, align 8 - %49 = load i32, i32* %48, align 4 - %cmp47 = icmp slt i32 %49, 0 - br i1 %cmp47, label %if.then51, label %lor.lhs.false - -lor.lhs.false: ; preds = %land.lhs.true, %for.end - %50 = load i32*, i32** %p.addr, align 8 - %51 = load i32, i32* %50, align 4 - %cmp48 = icmp sge i32 %51, 0 - br i1 %cmp48, label %land.lhs.true49, label %if.end52 - -land.lhs.true49: ; preds = %lor.lhs.false - %52 = load i32*, i32** %d.addr, align 8 - %53 = load i32, i32* %52, align 4 - %cmp50 = icmp slt i32 %53, 0 - br i1 %cmp50, label %if.then51, label %if.end52 - -if.then51: ; preds = %land.lhs.true49, %land.lhs.true - store i32 1, i32* %retval, align 4 - br label %return - -if.end52: ; preds = %land.lhs.true49, %lor.lhs.false - store i32 0, i32* %retval, align 4 - br label %return - -return: ; preds = %if.end52, %if.then51, %sw.bb31, %if.then - %54 = load i32, i32* %retval, align 4 - ret i32 %54 -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z10printUsagev() #0 { -entry: - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.11, i64 0, i64 0)) - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.12, i64 0, i64 0)) - %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([90 x i8], [90 x i8]* @.str.13, i64 0, i64 0)) - %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.12, i64 0, i64 0)) - %call4 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.14, i64 0, i64 0)) - %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([55 x i8], [55 x i8]* @.str.15, i64 0, i64 0)) - %call6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.12, i64 0, i64 0)) - %call7 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([59 x i8], [59 x i8]* @.str.16, i64 0, i64 0)) - %call8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @.str.17, i64 0, i64 0)) - %call9 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([62 x i8], [62 x i8]* @.str.18, i64 0, i64 0)) - %call10 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([63 x i8], [63 x i8]* @.str.19, i64 0, i64 0)) - %call11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.12, i64 0, i64 0)) - %call12 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.20, i64 0, i64 0)) - %call13 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([52 x i8], [52 x i8]* @.str.21, i64 0, i64 0)) - %call14 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([40 x i8], [40 x i8]* @.str.22, i64 0, i64 0)) - %call15 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.12, i64 0, i64 0)) - %call16 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([73 x i8], [73 x i8]* @.str.23, i64 0, i64 0)) - %call17 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([71 x i8], [71 x i8]* @.str.24, i64 0, i64 0)) - %call18 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.12, i64 0, i64 0)) - %call19 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.12, i64 0, i64 0)) - %call20 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([60 x i8], [60 x i8]* @.str.25, i64 0, i64 0)) - %call21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([61 x i8], [61 x i8]* @.str.26, i64 0, i64 0)) - %call22 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([35 x i8], [35 x i8]* @.str.27, i64 0, i64 0)) - ret void -} - -declare dso_local i32 @printf(i8*, ...) #3 - -; Function Attrs: noinline optnone uwtable -define dso_local i32 @_Z8loadDataPcRSt6vectorI6recordSaIS1_EERS0_I7latLongSaIS5_EE(i8* %filename, %"class.std::vector"* dereferenceable(24) %records, %"class.std::vector.0"* dereferenceable(24) %locations) #0 { -entry: - %filename.addr = alloca i8*, align 8 - %records.addr = alloca %"class.std::vector"*, align 8 - %locations.addr = alloca %"class.std::vector.0"*, align 8 - %flist = alloca %struct._IO_FILE*, align 8 - %fp = alloca %struct._IO_FILE*, align 8 - %i = alloca i32, align 4 - %dbname = alloca [64 x i8], align 16 - %recNum = alloca i32, align 4 - %record = alloca %struct.record, align 4 - %latLong = alloca %struct.latLong, align 4 - %substr = alloca [6 x i8], align 1 - store i8* %filename, i8** %filename.addr, align 8 - store %"class.std::vector"* %records, %"class.std::vector"** %records.addr, align 8 - store %"class.std::vector.0"* %locations, %"class.std::vector.0"** %locations.addr, align 8 - store i32 0, i32* %i, align 4 - store i32 0, i32* %recNum, align 4 - %0 = load i8*, i8** %filename.addr, align 8 - %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.7, i64 0, i64 0)) - store %struct._IO_FILE* %call, %struct._IO_FILE** %flist, align 8 - br label %while.cond - -while.cond: ; preds = %while.end, %entry - %1 = load %struct._IO_FILE*, %struct._IO_FILE** %flist, align 8 - %call1 = call i32 @feof(%struct._IO_FILE* %1) #12 - %tobool = icmp ne i32 %call1, 0 - %lnot = xor i1 %tobool, true - br i1 %lnot, label %while.body, label %while.end48 - -while.body: ; preds = %while.cond - %2 = load %struct._IO_FILE*, %struct._IO_FILE** %flist, align 8 - %arraydecay = getelementptr inbounds [64 x i8], [64 x i8]* %dbname, i64 0, i64 0 - %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fscanf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.8, i64 0, i64 0), i8* %arraydecay) - %cmp = icmp ne i32 %call2, 1 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %while.body - %3 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call3 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.9, i64 0, i64 0)) - call void @exit(i32 0) #13 - unreachable - -if.end: ; preds = %while.body - %arraydecay4 = getelementptr inbounds [64 x i8], [64 x i8]* %dbname, i64 0, i64 0 - %call5 = call %struct._IO_FILE* @fopen(i8* %arraydecay4, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.7, i64 0, i64 0)) - store %struct._IO_FILE* %call5, %struct._IO_FILE** %fp, align 8 - %4 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %tobool6 = icmp ne %struct._IO_FILE* %4, null - br i1 %tobool6, label %if.end9, label %if.then7 - -if.then7: ; preds = %if.end - %call8 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.10, i64 0, i64 0)) - call void @exit(i32 1) #13 - unreachable - -if.end9: ; preds = %if.end - br label %while.cond10 - -while.cond10: ; preds = %for.end41, %if.end9 - %5 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call11 = call i32 @feof(%struct._IO_FILE* %5) #12 - %tobool12 = icmp ne i32 %call11, 0 - %lnot13 = xor i1 %tobool12, true - br i1 %lnot13, label %while.body14, label %while.end - -while.body14: ; preds = %while.cond10 - %recString = getelementptr inbounds %struct.record, %struct.record* %record, i32 0, i32 0 - %arraydecay15 = getelementptr inbounds [53 x i8], [53 x i8]* %recString, i64 0, i64 0 - %6 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call16 = call i8* @fgets(i8* %arraydecay15, i32 49, %struct._IO_FILE* %6) - %7 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call17 = call i32 @fgetc(%struct._IO_FILE* %7) - %8 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call18 = call i32 @feof(%struct._IO_FILE* %8) #12 - %tobool19 = icmp ne i32 %call18, 0 - br i1 %tobool19, label %if.then20, label %if.end21 - -if.then20: ; preds = %while.body14 - br label %while.end - -if.end21: ; preds = %while.body14 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end21 - %9 = load i32, i32* %i, align 4 - %cmp22 = icmp slt i32 %9, 5 - br i1 %cmp22, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %recString23 = getelementptr inbounds %struct.record, %struct.record* %record, i32 0, i32 0 - %arraydecay24 = getelementptr inbounds [53 x i8], [53 x i8]* %recString23, i64 0, i64 0 - %10 = load i32, i32* %i, align 4 - %idx.ext = sext i32 %10 to i64 - %add.ptr = getelementptr inbounds i8, i8* %arraydecay24, i64 %idx.ext - %add.ptr25 = getelementptr inbounds i8, i8* %add.ptr, i64 28 - %11 = load i8, i8* %add.ptr25, align 1 - %12 = load i32, i32* %i, align 4 - %idxprom = sext i32 %12 to i64 - %arrayidx = getelementptr inbounds [6 x i8], [6 x i8]* %substr, i64 0, i64 %idxprom - store i8 %11, i8* %arrayidx, align 1 - br label %for.inc - -for.inc: ; preds = %for.body - %13 = load i32, i32* %i, align 4 - %inc = add nsw i32 %13, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %arrayidx26 = getelementptr inbounds [6 x i8], [6 x i8]* %substr, i64 0, i64 5 - store i8 0, i8* %arrayidx26, align 1 - %arraydecay27 = getelementptr inbounds [6 x i8], [6 x i8]* %substr, i64 0, i64 0 - %call28 = call double @atof(i8* %arraydecay27) #14 - %conv = fptrunc double %call28 to float - %lat = getelementptr inbounds %struct.latLong, %struct.latLong* %latLong, i32 0, i32 0 - store float %conv, float* %lat, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond29 - -for.cond29: ; preds = %for.inc39, %for.end - %14 = load i32, i32* %i, align 4 - %cmp30 = icmp slt i32 %14, 5 - br i1 %cmp30, label %for.body31, label %for.end41 - -for.body31: ; preds = %for.cond29 - %recString32 = getelementptr inbounds %struct.record, %struct.record* %record, i32 0, i32 0 - %arraydecay33 = getelementptr inbounds [53 x i8], [53 x i8]* %recString32, i64 0, i64 0 - %15 = load i32, i32* %i, align 4 - %idx.ext34 = sext i32 %15 to i64 - %add.ptr35 = getelementptr inbounds i8, i8* %arraydecay33, i64 %idx.ext34 - %add.ptr36 = getelementptr inbounds i8, i8* %add.ptr35, i64 33 - %16 = load i8, i8* %add.ptr36, align 1 - %17 = load i32, i32* %i, align 4 - %idxprom37 = sext i32 %17 to i64 - %arrayidx38 = getelementptr inbounds [6 x i8], [6 x i8]* %substr, i64 0, i64 %idxprom37 - store i8 %16, i8* %arrayidx38, align 1 - br label %for.inc39 - -for.inc39: ; preds = %for.body31 - %18 = load i32, i32* %i, align 4 - %inc40 = add nsw i32 %18, 1 - store i32 %inc40, i32* %i, align 4 - br label %for.cond29 - -for.end41: ; preds = %for.cond29 - %arrayidx42 = getelementptr inbounds [6 x i8], [6 x i8]* %substr, i64 0, i64 5 - store i8 0, i8* %arrayidx42, align 1 - %arraydecay43 = getelementptr inbounds [6 x i8], [6 x i8]* %substr, i64 0, i64 0 - %call44 = call double @atof(i8* %arraydecay43) #14 - %conv45 = fptrunc double %call44 to float - %lng = getelementptr inbounds %struct.latLong, %struct.latLong* %latLong, i32 0, i32 1 - store float %conv45, float* %lng, align 4 - %19 = load %"class.std::vector.0"*, %"class.std::vector.0"** %locations.addr, align 8 - call void @_ZNSt6vectorI7latLongSaIS0_EE9push_backERKS0_(%"class.std::vector.0"* %19, %struct.latLong* dereferenceable(8) %latLong) - %20 = load %"class.std::vector"*, %"class.std::vector"** %records.addr, align 8 - call void @_ZNSt6vectorI6recordSaIS0_EE9push_backERKS0_(%"class.std::vector"* %20, %struct.record* dereferenceable(60) %record) - %21 = load i32, i32* %recNum, align 4 - %inc46 = add nsw i32 %21, 1 - store i32 %inc46, i32* %recNum, align 4 - br label %while.cond10 - -while.end: ; preds = %if.then20, %while.cond10 - %22 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call47 = call i32 @fclose(%struct._IO_FILE* %22) - br label %while.cond - -while.end48: ; preds = %while.cond - %23 = load %struct._IO_FILE*, %struct._IO_FILE** %flist, align 8 - %call49 = call i32 @fclose(%struct._IO_FILE* %23) - %24 = load i32, i32* %recNum, align 4 - ret i32 %24 -} - -declare dso_local i32 @cudaGetDeviceProperties(%struct.cudaDeviceProp*, i32) #3 - -declare dso_local i32 @cudaDeviceSynchronize() #3 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #5 - -declare dso_local i32 @cudaMalloc(i8**, i64) #3 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #3 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %struct.latLong* @_ZNSt6vectorI7latLongSaIS0_EEixEm(%"class.std::vector.0"* %this, i64 %__n) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - %1 = load %struct.latLong*, %struct.latLong** %_M_start, align 8 - %2 = load i64, i64* %__n.addr, align 8 - %add.ptr = getelementptr inbounds %struct.latLong, %struct.latLong* %1, i64 %2 - ret %struct.latLong* %add.ptr -} - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #3 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z10findLowestRSt6vectorI6recordSaIS0_EEPfii(%"class.std::vector"* dereferenceable(24) %records, float* %distances, i32 %numRecords, i32 %topN) #0 { -entry: - %records.addr = alloca %"class.std::vector"*, align 8 - %distances.addr = alloca float*, align 8 - %numRecords.addr = alloca i32, align 4 - %topN.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - %val = alloca float, align 4 - %minLoc = alloca i32, align 4 - %tempRec = alloca %struct.record*, align 8 - %tempDist = alloca float, align 4 - store %"class.std::vector"* %records, %"class.std::vector"** %records.addr, align 8 - store float* %distances, float** %distances.addr, align 8 - store i32 %numRecords, i32* %numRecords.addr, align 4 - store i32 %topN, i32* %topN.addr, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc25, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %topN.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end27 - -for.body: ; preds = %for.cond - %2 = load i32, i32* %i, align 4 - store i32 %2, i32* %minLoc, align 4 - %3 = load i32, i32* %i, align 4 - store i32 %3, i32* %j, align 4 - br label %for.cond1 - -for.cond1: ; preds = %for.inc, %for.body - %4 = load i32, i32* %j, align 4 - %5 = load i32, i32* %numRecords.addr, align 4 - %cmp2 = icmp slt i32 %4, %5 - br i1 %cmp2, label %for.body3, label %for.end - -for.body3: ; preds = %for.cond1 - %6 = load float*, float** %distances.addr, align 8 - %7 = load i32, i32* %j, align 4 - %idxprom = sext i32 %7 to i64 - %arrayidx = getelementptr inbounds float, float* %6, i64 %idxprom - %8 = load float, float* %arrayidx, align 4 - store float %8, float* %val, align 4 - %9 = load float, float* %val, align 4 - %10 = load float*, float** %distances.addr, align 8 - %11 = load i32, i32* %minLoc, align 4 - %idxprom4 = sext i32 %11 to i64 - %arrayidx5 = getelementptr inbounds float, float* %10, i64 %idxprom4 - %12 = load float, float* %arrayidx5, align 4 - %cmp6 = fcmp olt float %9, %12 - br i1 %cmp6, label %if.then, label %if.end - -if.then: ; preds = %for.body3 - %13 = load i32, i32* %j, align 4 - store i32 %13, i32* %minLoc, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body3 - br label %for.inc - -for.inc: ; preds = %if.end - %14 = load i32, i32* %j, align 4 - %inc = add nsw i32 %14, 1 - store i32 %inc, i32* %j, align 4 - br label %for.cond1 - -for.end: ; preds = %for.cond1 - %15 = load %"class.std::vector"*, %"class.std::vector"** %records.addr, align 8 - %16 = load i32, i32* %i, align 4 - %conv = sext i32 %16 to i64 - %call = call dereferenceable(60) %struct.record* @_ZNSt6vectorI6recordSaIS0_EEixEm(%"class.std::vector"* %15, i64 %conv) - store %struct.record* %call, %struct.record** %tempRec, align 8 - %17 = load %"class.std::vector"*, %"class.std::vector"** %records.addr, align 8 - %18 = load i32, i32* %minLoc, align 4 - %conv7 = sext i32 %18 to i64 - %call8 = call dereferenceable(60) %struct.record* @_ZNSt6vectorI6recordSaIS0_EEixEm(%"class.std::vector"* %17, i64 %conv7) - %19 = load %"class.std::vector"*, %"class.std::vector"** %records.addr, align 8 - %20 = load i32, i32* %i, align 4 - %conv9 = sext i32 %20 to i64 - %call10 = call dereferenceable(60) %struct.record* @_ZNSt6vectorI6recordSaIS0_EEixEm(%"class.std::vector"* %19, i64 %conv9) - %21 = bitcast %struct.record* %call10 to i8* - %22 = bitcast %struct.record* %call8 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %21, i8* align 4 %22, i64 60, i1 false) - %23 = load %struct.record*, %struct.record** %tempRec, align 8 - %24 = load %"class.std::vector"*, %"class.std::vector"** %records.addr, align 8 - %25 = load i32, i32* %minLoc, align 4 - %conv11 = sext i32 %25 to i64 - %call12 = call dereferenceable(60) %struct.record* @_ZNSt6vectorI6recordSaIS0_EEixEm(%"class.std::vector"* %24, i64 %conv11) - %26 = bitcast %struct.record* %call12 to i8* - %27 = bitcast %struct.record* %23 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %26, i8* align 4 %27, i64 60, i1 false) - %28 = load float*, float** %distances.addr, align 8 - %29 = load i32, i32* %i, align 4 - %idxprom13 = sext i32 %29 to i64 - %arrayidx14 = getelementptr inbounds float, float* %28, i64 %idxprom13 - %30 = load float, float* %arrayidx14, align 4 - store float %30, float* %tempDist, align 4 - %31 = load float*, float** %distances.addr, align 8 - %32 = load i32, i32* %minLoc, align 4 - %idxprom15 = sext i32 %32 to i64 - %arrayidx16 = getelementptr inbounds float, float* %31, i64 %idxprom15 - %33 = load float, float* %arrayidx16, align 4 - %34 = load float*, float** %distances.addr, align 8 - %35 = load i32, i32* %i, align 4 - %idxprom17 = sext i32 %35 to i64 - %arrayidx18 = getelementptr inbounds float, float* %34, i64 %idxprom17 - store float %33, float* %arrayidx18, align 4 - %36 = load float, float* %tempDist, align 4 - %37 = load float*, float** %distances.addr, align 8 - %38 = load i32, i32* %minLoc, align 4 - %idxprom19 = sext i32 %38 to i64 - %arrayidx20 = getelementptr inbounds float, float* %37, i64 %idxprom19 - store float %36, float* %arrayidx20, align 4 - %39 = load float*, float** %distances.addr, align 8 - %40 = load i32, i32* %i, align 4 - %idxprom21 = sext i32 %40 to i64 - %arrayidx22 = getelementptr inbounds float, float* %39, i64 %idxprom21 - %41 = load float, float* %arrayidx22, align 4 - %42 = load %"class.std::vector"*, %"class.std::vector"** %records.addr, align 8 - %43 = load i32, i32* %i, align 4 - %conv23 = sext i32 %43 to i64 - %call24 = call dereferenceable(60) %struct.record* @_ZNSt6vectorI6recordSaIS0_EEixEm(%"class.std::vector"* %42, i64 %conv23) - %distance = getelementptr inbounds %struct.record, %struct.record* %call24, i32 0, i32 1 - store float %41, float* %distance, align 4 - br label %for.inc25 - -for.inc25: ; preds = %for.end - %44 = load i32, i32* %i, align 4 - %inc26 = add nsw i32 %44, 1 - store i32 %inc26, i32* %i, align 4 - br label %for.cond - -for.end27: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(60) %struct.record* @_ZNSt6vectorI6recordSaIS0_EEixEm(%"class.std::vector"* %this, i64 %__n) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - %1 = load %struct.record*, %struct.record** %_M_start, align 8 - %2 = load i64, i64* %__n.addr, align 8 - %add.ptr = getelementptr inbounds %struct.record, %struct.record* %1, i64 %2 - ret %struct.record* %add.ptr -} - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #5 - -declare dso_local i32 @cudaFree(i8*) #3 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorI7latLongSaIS0_EED2Ev(%"class.std::vector.0"* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - %1 = load %struct.latLong*, %struct.latLong** %_M_start, align 8 - %2 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %2, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 1 - %3 = load %struct.latLong*, %struct.latLong** %_M_finish, align 8 - %4 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %call = invoke dereferenceable(1) %"class.std::allocator.2"* @_ZNSt12_Vector_baseI7latLongSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base.1"* %4) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - invoke void @_ZSt8_DestroyIP7latLongS0_EvT_S2_RSaIT0_E(%struct.latLong* %1, %struct.latLong* %3, %"class.std::allocator.2"* dereferenceable(1) %call) - to label %invoke.cont3 unwind label %lpad - -invoke.cont3: ; preds = %invoke.cont - %5 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - call void @_ZNSt12_Vector_baseI7latLongSaIS0_EED2Ev(%"struct.std::_Vector_base.1"* %5) - ret void - -lpad: ; preds = %invoke.cont, %entry - %6 = landingpad { i8*, i32 } - cleanup - %7 = extractvalue { i8*, i32 } %6, 0 - store i8* %7, i8** %exn.slot, align 8 - %8 = extractvalue { i8*, i32 } %6, 1 - store i32 %8, i32* %ehselector.slot, align 4 - %9 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - invoke void @_ZNSt12_Vector_baseI7latLongSaIS0_EED2Ev(%"struct.std::_Vector_base.1"* %9) - to label %invoke.cont4 unwind label %terminate.lpad - -invoke.cont4: ; preds = %lpad - br label %eh.resume - -eh.resume: ; preds = %invoke.cont4 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val5 - -terminate.lpad: ; preds = %lpad - %10 = landingpad { i8*, i32 } - catch i8* null - %11 = extractvalue { i8*, i32 } %10, 0 - call void @__clang_call_terminate(i8* %11) #13 - unreachable -} - -; Function Attrs: noinline noreturn nounwind -define linkonce_odr hidden void @__clang_call_terminate(i8* %0) #6 comdat { - %2 = call i8* @__cxa_begin_catch(i8* %0) #12 - call void @_ZSt9terminatev() #13 - unreachable -} - -declare dso_local i8* @__cxa_begin_catch(i8*) - -declare dso_local void @_ZSt9terminatev() - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorI6recordSaIS0_EED2Ev(%"class.std::vector"* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - %1 = load %struct.record*, %struct.record** %_M_start, align 8 - %2 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %2, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 1 - %3 = load %struct.record*, %struct.record** %_M_finish, align 8 - %4 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call = invoke dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseI6recordSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %4) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - invoke void @_ZSt8_DestroyIP6recordS0_EvT_S2_RSaIT0_E(%struct.record* %1, %struct.record* %3, %"class.std::allocator"* dereferenceable(1) %call) - to label %invoke.cont3 unwind label %lpad - -invoke.cont3: ; preds = %invoke.cont - %5 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - call void @_ZNSt12_Vector_baseI6recordSaIS0_EED2Ev(%"struct.std::_Vector_base"* %5) - ret void - -lpad: ; preds = %invoke.cont, %entry - %6 = landingpad { i8*, i32 } - cleanup - %7 = extractvalue { i8*, i32 } %6, 0 - store i8* %7, i8** %exn.slot, align 8 - %8 = extractvalue { i8*, i32 } %6, 1 - store i32 %8, i32* %ehselector.slot, align 4 - %9 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - invoke void @_ZNSt12_Vector_baseI6recordSaIS0_EED2Ev(%"struct.std::_Vector_base"* %9) - to label %invoke.cont4 unwind label %terminate.lpad - -invoke.cont4: ; preds = %lpad - br label %eh.resume - -eh.resume: ; preds = %invoke.cont4 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val5 - -terminate.lpad: ; preds = %lpad - %10 = landingpad { i8*, i32 } - catch i8* null - %11 = extractvalue { i8*, i32 } %10, 0 - call void @__clang_call_terminate(i8* %11) #13 - unreachable -} - -declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #3 - -; Function Attrs: nounwind -declare dso_local i32 @feof(%struct._IO_FILE*) #5 - -declare dso_local i32 @fscanf(%struct._IO_FILE*, i8*, ...) #3 - -declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #3 - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #7 - -declare dso_local i8* @fgets(i8*, i32, %struct._IO_FILE*) #3 - -declare dso_local i32 @fgetc(%struct._IO_FILE*) #3 - -; Function Attrs: nounwind readonly -declare dso_local double @atof(i8*) #8 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorI7latLongSaIS0_EE9push_backERKS0_(%"class.std::vector.0"* %this, %struct.latLong* dereferenceable(8) %__x) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__x.addr = alloca %struct.latLong*, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - store %struct.latLong* %__x, %struct.latLong** %__x.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - %1 = load %struct.latLong*, %struct.latLong** %_M_finish, align 8 - %2 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %2, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 2 - %3 = load %struct.latLong*, %struct.latLong** %_M_end_of_storage, align 8 - %cmp = icmp ne %struct.latLong* %1, %3 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %4 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl3 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %4, i32 0, i32 0 - %5 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl3 to %"class.std::allocator.2"* - %6 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl4 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %6, i32 0, i32 0 - %_M_finish5 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl4, i32 0, i32 1 - %7 = load %struct.latLong*, %struct.latLong** %_M_finish5, align 8 - %8 = load %struct.latLong*, %struct.latLong** %__x.addr, align 8 - call void @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE9constructIS1_EEvRS2_PS1_RKT_(%"class.std::allocator.2"* dereferenceable(1) %5, %struct.latLong* %7, %struct.latLong* dereferenceable(8) %8) - %9 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl6 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %9, i32 0, i32 0 - %_M_finish7 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl6, i32 0, i32 1 - %10 = load %struct.latLong*, %struct.latLong** %_M_finish7, align 8 - %incdec.ptr = getelementptr inbounds %struct.latLong, %struct.latLong* %10, i32 1 - store %struct.latLong* %incdec.ptr, %struct.latLong** %_M_finish7, align 8 - br label %if.end - -if.else: ; preds = %entry - %call = call %struct.latLong* @_ZNSt6vectorI7latLongSaIS0_EE3endEv(%"class.std::vector.0"* %this1) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - store %struct.latLong* %call, %struct.latLong** %coerce.dive, align 8 - %11 = load %struct.latLong*, %struct.latLong** %__x.addr, align 8 - %coerce.dive8 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %agg.tmp, i32 0, i32 0 - %12 = load %struct.latLong*, %struct.latLong** %coerce.dive8, align 8 - call void @_ZNSt6vectorI7latLongSaIS0_EE17_M_realloc_insertEN9__gnu_cxx17__normal_iteratorIPS0_S2_EERKS0_(%"class.std::vector.0"* %this1, %struct.latLong* %12, %struct.latLong* dereferenceable(8) %11) - br label %if.end - -if.end: ; preds = %if.else, %if.then - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorI6recordSaIS0_EE9push_backERKS0_(%"class.std::vector"* %this, %struct.record* dereferenceable(60) %__x) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - %__x.addr = alloca %struct.record*, align 8 - %agg.tmp = alloca %"class.__gnu_cxx::__normal_iterator.5", align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - store %struct.record* %__x, %struct.record** %__x.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - %1 = load %struct.record*, %struct.record** %_M_finish, align 8 - %2 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %2, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 2 - %3 = load %struct.record*, %struct.record** %_M_end_of_storage, align 8 - %cmp = icmp ne %struct.record* %1, %3 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %4 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl3 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %4, i32 0, i32 0 - %5 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl3 to %"class.std::allocator"* - %6 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl4 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %6, i32 0, i32 0 - %_M_finish5 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl4, i32 0, i32 1 - %7 = load %struct.record*, %struct.record** %_M_finish5, align 8 - %8 = load %struct.record*, %struct.record** %__x.addr, align 8 - call void @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE9constructIS1_EEvRS2_PS1_RKT_(%"class.std::allocator"* dereferenceable(1) %5, %struct.record* %7, %struct.record* dereferenceable(60) %8) - %9 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl6 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %9, i32 0, i32 0 - %_M_finish7 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl6, i32 0, i32 1 - %10 = load %struct.record*, %struct.record** %_M_finish7, align 8 - %incdec.ptr = getelementptr inbounds %struct.record, %struct.record* %10, i32 1 - store %struct.record* %incdec.ptr, %struct.record** %_M_finish7, align 8 - br label %if.end - -if.else: ; preds = %entry - %call = call %struct.record* @_ZNSt6vectorI6recordSaIS0_EE3endEv(%"class.std::vector"* %this1) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.5", %"class.__gnu_cxx::__normal_iterator.5"* %agg.tmp, i32 0, i32 0 - store %struct.record* %call, %struct.record** %coerce.dive, align 8 - %11 = load %struct.record*, %struct.record** %__x.addr, align 8 - %coerce.dive8 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.5", %"class.__gnu_cxx::__normal_iterator.5"* %agg.tmp, i32 0, i32 0 - %12 = load %struct.record*, %struct.record** %coerce.dive8, align 8 - call void @_ZNSt6vectorI6recordSaIS0_EE17_M_realloc_insertEN9__gnu_cxx17__normal_iteratorIPS0_S2_EERKS0_(%"class.std::vector"* %this1, %struct.record* %12, %struct.record* dereferenceable(60) %11) - br label %if.end - -if.end: ; preds = %if.else, %if.then - ret void -} - -declare dso_local i32 @fclose(%struct._IO_FILE*) #3 - -declare dso_local i8* @strncpy(i8*, i8*, i64) #3 - -; Function Attrs: nounwind readonly -declare dso_local i32 @atoi(i8*) #8 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseI6recordSaIS0_EEC2Ev(%"struct.std::_Vector_base"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - call void @_ZNSt12_Vector_baseI6recordSaIS0_EE12_Vector_implC2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseI6recordSaIS0_EE12_Vector_implC2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base >::_Vector_impl"*, align 8 - store %"struct.std::_Vector_base >::_Vector_impl"* %this, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base >::_Vector_impl"*, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %this1 to %"class.std::allocator"* - call void @_ZNSaI6recordEC2Ev(%"class.std::allocator"* %0) #12 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 0 - store %struct.record* null, %struct.record** %_M_start, align 8 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 1 - store %struct.record* null, %struct.record** %_M_finish, align 8 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 2 - store %struct.record* null, %struct.record** %_M_end_of_storage, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaI6recordEC2Ev(%"class.std::allocator"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator"*, align 8 - store %"class.std::allocator"* %this, %"class.std::allocator"** %this.addr, align 8 - %this1 = load %"class.std::allocator"*, %"class.std::allocator"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator"* %this1 to %"class.__gnu_cxx::new_allocator"* - call void @_ZN9__gnu_cxx13new_allocatorI6recordEC2Ev(%"class.__gnu_cxx::new_allocator"* %0) #12 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorI6recordEC2Ev(%"class.__gnu_cxx::new_allocator"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt8_DestroyIP6recordS0_EvT_S2_RSaIT0_E(%struct.record* %__first, %struct.record* %__last, %"class.std::allocator"* dereferenceable(1) %0) #0 comdat { -entry: - %__first.addr = alloca %struct.record*, align 8 - %__last.addr = alloca %struct.record*, align 8 - %.addr = alloca %"class.std::allocator"*, align 8 - store %struct.record* %__first, %struct.record** %__first.addr, align 8 - store %struct.record* %__last, %struct.record** %__last.addr, align 8 - store %"class.std::allocator"* %0, %"class.std::allocator"** %.addr, align 8 - %1 = load %struct.record*, %struct.record** %__first.addr, align 8 - %2 = load %struct.record*, %struct.record** %__last.addr, align 8 - call void @_ZSt8_DestroyIP6recordEvT_S2_(%struct.record* %1, %struct.record* %2) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseI6recordSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %this) #4 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator"* - ret %"class.std::allocator"* %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseI6recordSaIS0_EED2Ev(%"struct.std::_Vector_base"* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - %0 = load %struct.record*, %struct.record** %_M_start, align 8 - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 2 - %1 = load %struct.record*, %struct.record** %_M_end_of_storage, align 8 - %_M_impl3 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %_M_start4 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl3, i32 0, i32 0 - %2 = load %struct.record*, %struct.record** %_M_start4, align 8 - %sub.ptr.lhs.cast = ptrtoint %struct.record* %1 to i64 - %sub.ptr.rhs.cast = ptrtoint %struct.record* %2 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 60 - invoke void @_ZNSt12_Vector_baseI6recordSaIS0_EE13_M_deallocateEPS0_m(%"struct.std::_Vector_base"* %this1, %struct.record* %0, i64 %sub.ptr.div) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %_M_impl5 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - call void @_ZNSt12_Vector_baseI6recordSaIS0_EE12_Vector_implD2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl5) #12 - ret void - -lpad: ; preds = %entry - %3 = landingpad { i8*, i32 } - cleanup - %4 = extractvalue { i8*, i32 } %3, 0 - store i8* %4, i8** %exn.slot, align 8 - %5 = extractvalue { i8*, i32 } %3, 1 - store i32 %5, i32* %ehselector.slot, align 4 - %_M_impl6 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - call void @_ZNSt12_Vector_baseI6recordSaIS0_EE12_Vector_implD2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl6) #12 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val7 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val7 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt8_DestroyIP6recordEvT_S2_(%struct.record* %__first, %struct.record* %__last) #0 comdat { -entry: - %__first.addr = alloca %struct.record*, align 8 - %__last.addr = alloca %struct.record*, align 8 - store %struct.record* %__first, %struct.record** %__first.addr, align 8 - store %struct.record* %__last, %struct.record** %__last.addr, align 8 - %0 = load %struct.record*, %struct.record** %__first.addr, align 8 - %1 = load %struct.record*, %struct.record** %__last.addr, align 8 - call void @_ZNSt12_Destroy_auxILb1EE9__destroyIP6recordEEvT_S4_(%struct.record* %0, %struct.record* %1) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Destroy_auxILb1EE9__destroyIP6recordEEvT_S4_(%struct.record* %0, %struct.record* %1) #4 comdat align 2 { -entry: - %.addr = alloca %struct.record*, align 8 - %.addr1 = alloca %struct.record*, align 8 - store %struct.record* %0, %struct.record** %.addr, align 8 - store %struct.record* %1, %struct.record** %.addr1, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseI6recordSaIS0_EE13_M_deallocateEPS0_m(%"struct.std::_Vector_base"* %this, %struct.record* %__p, i64 %__n) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - %__p.addr = alloca %struct.record*, align 8 - %__n.addr = alloca i64, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - store %struct.record* %__p, %struct.record** %__p.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %0 = load %struct.record*, %struct.record** %__p.addr, align 8 - %tobool = icmp ne %struct.record* %0, null - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %1 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator"* - %2 = load %struct.record*, %struct.record** %__p.addr, align 8 - %3 = load i64, i64* %__n.addr, align 8 - call void @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE10deallocateERS2_PS1_m(%"class.std::allocator"* dereferenceable(1) %1, %struct.record* %2, i64 %3) - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseI6recordSaIS0_EE12_Vector_implD2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base >::_Vector_impl"*, align 8 - store %"struct.std::_Vector_base >::_Vector_impl"* %this, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base >::_Vector_impl"*, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %this1 to %"class.std::allocator"* - call void @_ZNSaI6recordED2Ev(%"class.std::allocator"* %0) #12 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE10deallocateERS2_PS1_m(%"class.std::allocator"* dereferenceable(1) %__a, %struct.record* %__p, i64 %__n) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - %__p.addr = alloca %struct.record*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - store %struct.record* %__p, %struct.record** %__p.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator"* %0 to %"class.__gnu_cxx::new_allocator"* - %2 = load %struct.record*, %struct.record** %__p.addr, align 8 - %3 = load i64, i64* %__n.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorI6recordE10deallocateEPS1_m(%"class.__gnu_cxx::new_allocator"* %1, %struct.record* %2, i64 %3) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorI6recordE10deallocateEPS1_m(%"class.__gnu_cxx::new_allocator"* %this, %struct.record* %__p, i64 %0) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - %__p.addr = alloca %struct.record*, align 8 - %.addr = alloca i64, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - store %struct.record* %__p, %struct.record** %__p.addr, align 8 - store i64 %0, i64* %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %1 = load %struct.record*, %struct.record** %__p.addr, align 8 - %2 = bitcast %struct.record* %1 to i8* - call void @_ZdlPv(i8* %2) #12 - ret void -} - -; Function Attrs: nobuiltin nounwind -declare dso_local void @_ZdlPv(i8*) #9 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaI6recordED2Ev(%"class.std::allocator"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator"*, align 8 - store %"class.std::allocator"* %this, %"class.std::allocator"** %this.addr, align 8 - %this1 = load %"class.std::allocator"*, %"class.std::allocator"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator"* %this1 to %"class.__gnu_cxx::new_allocator"* - call void @_ZN9__gnu_cxx13new_allocatorI6recordED2Ev(%"class.__gnu_cxx::new_allocator"* %0) #12 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorI6recordED2Ev(%"class.__gnu_cxx::new_allocator"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseI7latLongSaIS0_EEC2Ev(%"struct.std::_Vector_base.1"* %this) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base.1"*, align 8 - store %"struct.std::_Vector_base.1"* %this, %"struct.std::_Vector_base.1"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base.1"*, %"struct.std::_Vector_base.1"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %this1, i32 0, i32 0 - call void @_ZNSt12_Vector_baseI7latLongSaIS0_EE12_Vector_implC2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseI7latLongSaIS0_EE12_Vector_implC2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base >::_Vector_impl"*, align 8 - store %"struct.std::_Vector_base >::_Vector_impl"* %this, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base >::_Vector_impl"*, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %this1 to %"class.std::allocator.2"* - call void @_ZNSaI7latLongEC2Ev(%"class.std::allocator.2"* %0) #12 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 0 - store %struct.latLong* null, %struct.latLong** %_M_start, align 8 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 1 - store %struct.latLong* null, %struct.latLong** %_M_finish, align 8 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %this1, i32 0, i32 2 - store %struct.latLong* null, %struct.latLong** %_M_end_of_storage, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaI7latLongEC2Ev(%"class.std::allocator.2"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.2"*, align 8 - store %"class.std::allocator.2"* %this, %"class.std::allocator.2"** %this.addr, align 8 - %this1 = load %"class.std::allocator.2"*, %"class.std::allocator.2"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator.2"* %this1 to %"class.__gnu_cxx::new_allocator.3"* - call void @_ZN9__gnu_cxx13new_allocatorI7latLongEC2Ev(%"class.__gnu_cxx::new_allocator.3"* %0) #12 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorI7latLongEC2Ev(%"class.__gnu_cxx::new_allocator.3"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.3"*, align 8 - store %"class.__gnu_cxx::new_allocator.3"* %this, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.3"*, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt8_DestroyIP7latLongS0_EvT_S2_RSaIT0_E(%struct.latLong* %__first, %struct.latLong* %__last, %"class.std::allocator.2"* dereferenceable(1) %0) #0 comdat { -entry: - %__first.addr = alloca %struct.latLong*, align 8 - %__last.addr = alloca %struct.latLong*, align 8 - %.addr = alloca %"class.std::allocator.2"*, align 8 - store %struct.latLong* %__first, %struct.latLong** %__first.addr, align 8 - store %struct.latLong* %__last, %struct.latLong** %__last.addr, align 8 - store %"class.std::allocator.2"* %0, %"class.std::allocator.2"** %.addr, align 8 - %1 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %2 = load %struct.latLong*, %struct.latLong** %__last.addr, align 8 - call void @_ZSt8_DestroyIP7latLongEvT_S2_(%struct.latLong* %1, %struct.latLong* %2) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator.2"* @_ZNSt12_Vector_baseI7latLongSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base.1"* %this) #4 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base.1"*, align 8 - store %"struct.std::_Vector_base.1"* %this, %"struct.std::_Vector_base.1"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base.1"*, %"struct.std::_Vector_base.1"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator.2"* - ret %"class.std::allocator.2"* %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseI7latLongSaIS0_EED2Ev(%"struct.std::_Vector_base.1"* %this) unnamed_addr #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %"struct.std::_Vector_base.1"*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %"struct.std::_Vector_base.1"* %this, %"struct.std::_Vector_base.1"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base.1"*, %"struct.std::_Vector_base.1"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %this1, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - %0 = load %struct.latLong*, %struct.latLong** %_M_start, align 8 - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %this1, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 2 - %1 = load %struct.latLong*, %struct.latLong** %_M_end_of_storage, align 8 - %_M_impl3 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %this1, i32 0, i32 0 - %_M_start4 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl3, i32 0, i32 0 - %2 = load %struct.latLong*, %struct.latLong** %_M_start4, align 8 - %sub.ptr.lhs.cast = ptrtoint %struct.latLong* %1 to i64 - %sub.ptr.rhs.cast = ptrtoint %struct.latLong* %2 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - invoke void @_ZNSt12_Vector_baseI7latLongSaIS0_EE13_M_deallocateEPS0_m(%"struct.std::_Vector_base.1"* %this1, %struct.latLong* %0, i64 %sub.ptr.div) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %_M_impl5 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %this1, i32 0, i32 0 - call void @_ZNSt12_Vector_baseI7latLongSaIS0_EE12_Vector_implD2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl5) #12 - ret void - -lpad: ; preds = %entry - %3 = landingpad { i8*, i32 } - cleanup - %4 = extractvalue { i8*, i32 } %3, 0 - store i8* %4, i8** %exn.slot, align 8 - %5 = extractvalue { i8*, i32 } %3, 1 - store i32 %5, i32* %ehselector.slot, align 4 - %_M_impl6 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %this1, i32 0, i32 0 - call void @_ZNSt12_Vector_baseI7latLongSaIS0_EE12_Vector_implD2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %_M_impl6) #12 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val7 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val7 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZSt8_DestroyIP7latLongEvT_S2_(%struct.latLong* %__first, %struct.latLong* %__last) #0 comdat { -entry: - %__first.addr = alloca %struct.latLong*, align 8 - %__last.addr = alloca %struct.latLong*, align 8 - store %struct.latLong* %__first, %struct.latLong** %__first.addr, align 8 - store %struct.latLong* %__last, %struct.latLong** %__last.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %1 = load %struct.latLong*, %struct.latLong** %__last.addr, align 8 - call void @_ZNSt12_Destroy_auxILb1EE9__destroyIP7latLongEEvT_S4_(%struct.latLong* %0, %struct.latLong* %1) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Destroy_auxILb1EE9__destroyIP7latLongEEvT_S4_(%struct.latLong* %0, %struct.latLong* %1) #4 comdat align 2 { -entry: - %.addr = alloca %struct.latLong*, align 8 - %.addr1 = alloca %struct.latLong*, align 8 - store %struct.latLong* %0, %struct.latLong** %.addr, align 8 - store %struct.latLong* %1, %struct.latLong** %.addr1, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseI7latLongSaIS0_EE13_M_deallocateEPS0_m(%"struct.std::_Vector_base.1"* %this, %struct.latLong* %__p, i64 %__n) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base.1"*, align 8 - %__p.addr = alloca %struct.latLong*, align 8 - %__n.addr = alloca i64, align 8 - store %"struct.std::_Vector_base.1"* %this, %"struct.std::_Vector_base.1"** %this.addr, align 8 - store %struct.latLong* %__p, %struct.latLong** %__p.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"struct.std::_Vector_base.1"*, %"struct.std::_Vector_base.1"** %this.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__p.addr, align 8 - %tobool = icmp ne %struct.latLong* %0, null - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %this1, i32 0, i32 0 - %1 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator.2"* - %2 = load %struct.latLong*, %struct.latLong** %__p.addr, align 8 - %3 = load i64, i64* %__n.addr, align 8 - call void @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE10deallocateERS2_PS1_m(%"class.std::allocator.2"* dereferenceable(1) %1, %struct.latLong* %2, i64 %3) - br label %if.end - -if.end: ; preds = %if.then, %entry - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSt12_Vector_baseI7latLongSaIS0_EE12_Vector_implD2Ev(%"struct.std::_Vector_base >::_Vector_impl"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base >::_Vector_impl"*, align 8 - store %"struct.std::_Vector_base >::_Vector_impl"* %this, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base >::_Vector_impl"*, %"struct.std::_Vector_base >::_Vector_impl"** %this.addr, align 8 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %this1 to %"class.std::allocator.2"* - call void @_ZNSaI7latLongED2Ev(%"class.std::allocator.2"* %0) #12 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE10deallocateERS2_PS1_m(%"class.std::allocator.2"* dereferenceable(1) %__a, %struct.latLong* %__p, i64 %__n) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.2"*, align 8 - %__p.addr = alloca %struct.latLong*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::allocator.2"* %__a, %"class.std::allocator.2"** %__a.addr, align 8 - store %struct.latLong* %__p, %struct.latLong** %__p.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load %"class.std::allocator.2"*, %"class.std::allocator.2"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator.2"* %0 to %"class.__gnu_cxx::new_allocator.3"* - %2 = load %struct.latLong*, %struct.latLong** %__p.addr, align 8 - %3 = load i64, i64* %__n.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorI7latLongE10deallocateEPS1_m(%"class.__gnu_cxx::new_allocator.3"* %1, %struct.latLong* %2, i64 %3) - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorI7latLongE10deallocateEPS1_m(%"class.__gnu_cxx::new_allocator.3"* %this, %struct.latLong* %__p, i64 %0) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.3"*, align 8 - %__p.addr = alloca %struct.latLong*, align 8 - %.addr = alloca i64, align 8 - store %"class.__gnu_cxx::new_allocator.3"* %this, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - store %struct.latLong* %__p, %struct.latLong** %__p.addr, align 8 - store i64 %0, i64* %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.3"*, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - %1 = load %struct.latLong*, %struct.latLong** %__p.addr, align 8 - %2 = bitcast %struct.latLong* %1 to i8* - call void @_ZdlPv(i8* %2) #12 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZNSaI7latLongED2Ev(%"class.std::allocator.2"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"class.std::allocator.2"*, align 8 - store %"class.std::allocator.2"* %this, %"class.std::allocator.2"** %this.addr, align 8 - %this1 = load %"class.std::allocator.2"*, %"class.std::allocator.2"** %this.addr, align 8 - %0 = bitcast %"class.std::allocator.2"* %this1 to %"class.__gnu_cxx::new_allocator.3"* - call void @_ZN9__gnu_cxx13new_allocatorI7latLongED2Ev(%"class.__gnu_cxx::new_allocator.3"* %0) #12 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorI7latLongED2Ev(%"class.__gnu_cxx::new_allocator.3"* %this) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.3"*, align 8 - store %"class.__gnu_cxx::new_allocator.3"* %this, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.3"*, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE9constructIS1_EEvRS2_PS1_RKT_(%"class.std::allocator.2"* dereferenceable(1) %__a, %struct.latLong* %__p, %struct.latLong* dereferenceable(8) %__arg) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.2"*, align 8 - %__p.addr = alloca %struct.latLong*, align 8 - %__arg.addr = alloca %struct.latLong*, align 8 - store %"class.std::allocator.2"* %__a, %"class.std::allocator.2"** %__a.addr, align 8 - store %struct.latLong* %__p, %struct.latLong** %__p.addr, align 8 - store %struct.latLong* %__arg, %struct.latLong** %__arg.addr, align 8 - %0 = load %"class.std::allocator.2"*, %"class.std::allocator.2"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator.2"* %0 to %"class.__gnu_cxx::new_allocator.3"* - %2 = load %struct.latLong*, %struct.latLong** %__p.addr, align 8 - %3 = load %struct.latLong*, %struct.latLong** %__arg.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorI7latLongE9constructEPS1_RKS1_(%"class.__gnu_cxx::new_allocator.3"* %1, %struct.latLong* %2, %struct.latLong* dereferenceable(8) %3) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorI7latLongSaIS0_EE17_M_realloc_insertEN9__gnu_cxx17__normal_iteratorIPS0_S2_EERKS0_(%"class.std::vector.0"* %this, %struct.latLong* %__position.coerce, %struct.latLong* dereferenceable(8) %__x) #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %__position = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__x.addr = alloca %struct.latLong*, align 8 - %__len = alloca i64, align 8 - %__elems_before = alloca i64, align 8 - %ref.tmp = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %__new_start = alloca %struct.latLong*, align 8 - %__new_finish = alloca %struct.latLong*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %__position, i32 0, i32 0 - store %struct.latLong* %__position.coerce, %struct.latLong** %coerce.dive, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - store %struct.latLong* %__x, %struct.latLong** %__x.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %call = call i64 @_ZNKSt6vectorI7latLongSaIS0_EE12_M_check_lenEmPKc(%"class.std::vector.0"* %this1, i64 1, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.28, i64 0, i64 0)) - store i64 %call, i64* %__len, align 8 - %call2 = call %struct.latLong* @_ZNSt6vectorI7latLongSaIS0_EE5beginEv(%"class.std::vector.0"* %this1) - %coerce.dive3 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %ref.tmp, i32 0, i32 0 - store %struct.latLong* %call2, %struct.latLong** %coerce.dive3, align 8 - %call4 = call i64 @_ZN9__gnu_cxxmiIP7latLongSt6vectorIS1_SaIS1_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKS9_SC_(%"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__position, %"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %ref.tmp) - store i64 %call4, i64* %__elems_before, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %1 = load i64, i64* %__len, align 8 - %call5 = call %struct.latLong* @_ZNSt12_Vector_baseI7latLongSaIS0_EE11_M_allocateEm(%"struct.std::_Vector_base.1"* %0, i64 %1) - store %struct.latLong* %call5, %struct.latLong** %__new_start, align 8 - %2 = load %struct.latLong*, %struct.latLong** %__new_start, align 8 - store %struct.latLong* %2, %struct.latLong** %__new_finish, align 8 - %3 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %3, i32 0, i32 0 - %4 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator.2"* - %5 = load %struct.latLong*, %struct.latLong** %__new_start, align 8 - %6 = load i64, i64* %__elems_before, align 8 - %add.ptr = getelementptr inbounds %struct.latLong, %struct.latLong* %5, i64 %6 - %7 = load %struct.latLong*, %struct.latLong** %__x.addr, align 8 - invoke void @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE9constructIS1_EEvRS2_PS1_RKT_(%"class.std::allocator.2"* dereferenceable(1) %4, %struct.latLong* %add.ptr, %struct.latLong* dereferenceable(8) %7) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - store %struct.latLong* null, %struct.latLong** %__new_finish, align 8 - %8 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl6 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %8, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl6, i32 0, i32 0 - %9 = load %struct.latLong*, %struct.latLong** %_M_start, align 8 - %call8 = invoke dereferenceable(8) %struct.latLong** @_ZNK9__gnu_cxx17__normal_iteratorIP7latLongSt6vectorIS1_SaIS1_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator"* %__position) - to label %invoke.cont7 unwind label %lpad - -invoke.cont7: ; preds = %invoke.cont - %10 = load %struct.latLong*, %struct.latLong** %call8, align 8 - %11 = load %struct.latLong*, %struct.latLong** %__new_start, align 8 - %12 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %call10 = invoke dereferenceable(1) %"class.std::allocator.2"* @_ZNSt12_Vector_baseI7latLongSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base.1"* %12) - to label %invoke.cont9 unwind label %lpad - -invoke.cont9: ; preds = %invoke.cont7 - %call12 = invoke %struct.latLong* @_ZSt34__uninitialized_move_if_noexcept_aIP7latLongS1_SaIS0_EET0_T_S4_S3_RT1_(%struct.latLong* %9, %struct.latLong* %10, %struct.latLong* %11, %"class.std::allocator.2"* dereferenceable(1) %call10) - to label %invoke.cont11 unwind label %lpad - -invoke.cont11: ; preds = %invoke.cont9 - store %struct.latLong* %call12, %struct.latLong** %__new_finish, align 8 - %13 = load %struct.latLong*, %struct.latLong** %__new_finish, align 8 - %incdec.ptr = getelementptr inbounds %struct.latLong, %struct.latLong* %13, i32 1 - store %struct.latLong* %incdec.ptr, %struct.latLong** %__new_finish, align 8 - %call14 = invoke dereferenceable(8) %struct.latLong** @_ZNK9__gnu_cxx17__normal_iteratorIP7latLongSt6vectorIS1_SaIS1_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator"* %__position) - to label %invoke.cont13 unwind label %lpad - -invoke.cont13: ; preds = %invoke.cont11 - %14 = load %struct.latLong*, %struct.latLong** %call14, align 8 - %15 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl15 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %15, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl15, i32 0, i32 1 - %16 = load %struct.latLong*, %struct.latLong** %_M_finish, align 8 - %17 = load %struct.latLong*, %struct.latLong** %__new_finish, align 8 - %18 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %call17 = invoke dereferenceable(1) %"class.std::allocator.2"* @_ZNSt12_Vector_baseI7latLongSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base.1"* %18) - to label %invoke.cont16 unwind label %lpad - -invoke.cont16: ; preds = %invoke.cont13 - %call19 = invoke %struct.latLong* @_ZSt34__uninitialized_move_if_noexcept_aIP7latLongS1_SaIS0_EET0_T_S4_S3_RT1_(%struct.latLong* %14, %struct.latLong* %16, %struct.latLong* %17, %"class.std::allocator.2"* dereferenceable(1) %call17) - to label %invoke.cont18 unwind label %lpad - -invoke.cont18: ; preds = %invoke.cont16 - store %struct.latLong* %call19, %struct.latLong** %__new_finish, align 8 - br label %try.cont - -lpad: ; preds = %invoke.cont16, %invoke.cont13, %invoke.cont11, %invoke.cont9, %invoke.cont7, %invoke.cont, %entry - %19 = landingpad { i8*, i32 } - catch i8* null - %20 = extractvalue { i8*, i32 } %19, 0 - store i8* %20, i8** %exn.slot, align 8 - %21 = extractvalue { i8*, i32 } %19, 1 - store i32 %21, i32* %ehselector.slot, align 4 - br label %catch - -catch: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %22 = call i8* @__cxa_begin_catch(i8* %exn) #12 - %23 = load %struct.latLong*, %struct.latLong** %__new_finish, align 8 - %tobool = icmp ne %struct.latLong* %23, null - br i1 %tobool, label %if.else, label %if.then - -if.then: ; preds = %catch - %24 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl20 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %24, i32 0, i32 0 - %25 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl20 to %"class.std::allocator.2"* - %26 = load %struct.latLong*, %struct.latLong** %__new_start, align 8 - %27 = load i64, i64* %__elems_before, align 8 - %add.ptr21 = getelementptr inbounds %struct.latLong, %struct.latLong* %26, i64 %27 - invoke void @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE7destroyERS2_PS1_(%"class.std::allocator.2"* dereferenceable(1) %25, %struct.latLong* %add.ptr21) - to label %invoke.cont23 unwind label %lpad22 - -invoke.cont23: ; preds = %if.then - br label %if.end - -lpad22: ; preds = %invoke.cont27, %if.end, %invoke.cont24, %if.else, %if.then - %28 = landingpad { i8*, i32 } - cleanup - %29 = extractvalue { i8*, i32 } %28, 0 - store i8* %29, i8** %exn.slot, align 8 - %30 = extractvalue { i8*, i32 } %28, 1 - store i32 %30, i32* %ehselector.slot, align 4 - invoke void @__cxa_end_catch() - to label %invoke.cont28 unwind label %terminate.lpad - -if.else: ; preds = %catch - %31 = load %struct.latLong*, %struct.latLong** %__new_start, align 8 - %32 = load %struct.latLong*, %struct.latLong** %__new_finish, align 8 - %33 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %call25 = invoke dereferenceable(1) %"class.std::allocator.2"* @_ZNSt12_Vector_baseI7latLongSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base.1"* %33) - to label %invoke.cont24 unwind label %lpad22 - -invoke.cont24: ; preds = %if.else - invoke void @_ZSt8_DestroyIP7latLongS0_EvT_S2_RSaIT0_E(%struct.latLong* %31, %struct.latLong* %32, %"class.std::allocator.2"* dereferenceable(1) %call25) - to label %invoke.cont26 unwind label %lpad22 - -invoke.cont26: ; preds = %invoke.cont24 - br label %if.end - -if.end: ; preds = %invoke.cont26, %invoke.cont23 - %34 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %35 = load %struct.latLong*, %struct.latLong** %__new_start, align 8 - %36 = load i64, i64* %__len, align 8 - invoke void @_ZNSt12_Vector_baseI7latLongSaIS0_EE13_M_deallocateEPS0_m(%"struct.std::_Vector_base.1"* %34, %struct.latLong* %35, i64 %36) - to label %invoke.cont27 unwind label %lpad22 - -invoke.cont27: ; preds = %if.end - invoke void @__cxa_rethrow() #15 - to label %unreachable unwind label %lpad22 - -invoke.cont28: ; preds = %lpad22 - br label %eh.resume - -try.cont: ; preds = %invoke.cont18 - %37 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl29 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %37, i32 0, i32 0 - %_M_start30 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl29, i32 0, i32 0 - %38 = load %struct.latLong*, %struct.latLong** %_M_start30, align 8 - %39 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl31 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %39, i32 0, i32 0 - %_M_finish32 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl31, i32 0, i32 1 - %40 = load %struct.latLong*, %struct.latLong** %_M_finish32, align 8 - %41 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %call33 = call dereferenceable(1) %"class.std::allocator.2"* @_ZNSt12_Vector_baseI7latLongSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base.1"* %41) - call void @_ZSt8_DestroyIP7latLongS0_EvT_S2_RSaIT0_E(%struct.latLong* %38, %struct.latLong* %40, %"class.std::allocator.2"* dereferenceable(1) %call33) - %42 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %43 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl34 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %43, i32 0, i32 0 - %_M_start35 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl34, i32 0, i32 0 - %44 = load %struct.latLong*, %struct.latLong** %_M_start35, align 8 - %45 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl36 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %45, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl36, i32 0, i32 2 - %46 = load %struct.latLong*, %struct.latLong** %_M_end_of_storage, align 8 - %47 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl37 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %47, i32 0, i32 0 - %_M_start38 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl37, i32 0, i32 0 - %48 = load %struct.latLong*, %struct.latLong** %_M_start38, align 8 - %sub.ptr.lhs.cast = ptrtoint %struct.latLong* %46 to i64 - %sub.ptr.rhs.cast = ptrtoint %struct.latLong* %48 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - call void @_ZNSt12_Vector_baseI7latLongSaIS0_EE13_M_deallocateEPS0_m(%"struct.std::_Vector_base.1"* %42, %struct.latLong* %44, i64 %sub.ptr.div) - %49 = load %struct.latLong*, %struct.latLong** %__new_start, align 8 - %50 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl39 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %50, i32 0, i32 0 - %_M_start40 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl39, i32 0, i32 0 - store %struct.latLong* %49, %struct.latLong** %_M_start40, align 8 - %51 = load %struct.latLong*, %struct.latLong** %__new_finish, align 8 - %52 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl41 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %52, i32 0, i32 0 - %_M_finish42 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl41, i32 0, i32 1 - store %struct.latLong* %51, %struct.latLong** %_M_finish42, align 8 - %53 = load %struct.latLong*, %struct.latLong** %__new_start, align 8 - %54 = load i64, i64* %__len, align 8 - %add.ptr43 = getelementptr inbounds %struct.latLong, %struct.latLong* %53, i64 %54 - %55 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl44 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %55, i32 0, i32 0 - %_M_end_of_storage45 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl44, i32 0, i32 2 - store %struct.latLong* %add.ptr43, %struct.latLong** %_M_end_of_storage45, align 8 - ret void - -eh.resume: ; preds = %invoke.cont28 - %exn46 = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn46, 0 - %lpad.val47 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val47 - -terminate.lpad: ; preds = %lpad22 - %56 = landingpad { i8*, i32 } - catch i8* null - %57 = extractvalue { i8*, i32 } %56, 0 - call void @__clang_call_terminate(i8* %57) #13 - unreachable - -unreachable: ; preds = %invoke.cont27 - unreachable -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZNSt6vectorI7latLongSaIS0_EE3endEv(%"class.std::vector.0"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - call void @_ZN9__gnu_cxx17__normal_iteratorIP7latLongSt6vectorIS1_SaIS1_EEEC2ERKS2_(%"class.__gnu_cxx::__normal_iterator"* %retval, %struct.latLong** dereferenceable(8) %_M_finish) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %retval, i32 0, i32 0 - %1 = load %struct.latLong*, %struct.latLong** %coerce.dive, align 8 - ret %struct.latLong* %1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorI7latLongE9constructEPS1_RKS1_(%"class.__gnu_cxx::new_allocator.3"* %this, %struct.latLong* %__p, %struct.latLong* dereferenceable(8) %__val) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.3"*, align 8 - %__p.addr = alloca %struct.latLong*, align 8 - %__val.addr = alloca %struct.latLong*, align 8 - store %"class.__gnu_cxx::new_allocator.3"* %this, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - store %struct.latLong* %__p, %struct.latLong** %__p.addr, align 8 - store %struct.latLong* %__val, %struct.latLong** %__val.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.3"*, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__p.addr, align 8 - %1 = bitcast %struct.latLong* %0 to i8* - %2 = bitcast i8* %1 to %struct.latLong* - %3 = load %struct.latLong*, %struct.latLong** %__val.addr, align 8 - %4 = bitcast %struct.latLong* %2 to i8* - %5 = bitcast %struct.latLong* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %4, i8* align 4 %5, i64 8, i1 false) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorI7latLongSaIS0_EE12_M_check_lenEmPKc(%"class.std::vector.0"* %this, i64 %__n, i8* %__s) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - %__n.addr = alloca i64, align 8 - %__s.addr = alloca i8*, align 8 - %__len = alloca i64, align 8 - %ref.tmp = alloca i64, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - store i8* %__s, i8** %__s.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %call = call i64 @_ZNKSt6vectorI7latLongSaIS0_EE8max_sizeEv(%"class.std::vector.0"* %this1) - %call2 = call i64 @_ZNKSt6vectorI7latLongSaIS0_EE4sizeEv(%"class.std::vector.0"* %this1) - %sub = sub i64 %call, %call2 - %0 = load i64, i64* %__n.addr, align 8 - %cmp = icmp ult i64 %sub, %0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load i8*, i8** %__s.addr, align 8 - call void @_ZSt20__throw_length_errorPKc(i8* %1) #15 - unreachable - -if.end: ; preds = %entry - %call3 = call i64 @_ZNKSt6vectorI7latLongSaIS0_EE4sizeEv(%"class.std::vector.0"* %this1) - %call4 = call i64 @_ZNKSt6vectorI7latLongSaIS0_EE4sizeEv(%"class.std::vector.0"* %this1) - store i64 %call4, i64* %ref.tmp, align 8 - %call5 = call dereferenceable(8) i64* @_ZSt3maxImERKT_S2_S2_(i64* dereferenceable(8) %ref.tmp, i64* dereferenceable(8) %__n.addr) - %2 = load i64, i64* %call5, align 8 - %add = add i64 %call3, %2 - store i64 %add, i64* %__len, align 8 - %3 = load i64, i64* %__len, align 8 - %call6 = call i64 @_ZNKSt6vectorI7latLongSaIS0_EE4sizeEv(%"class.std::vector.0"* %this1) - %cmp7 = icmp ult i64 %3, %call6 - br i1 %cmp7, label %cond.true, label %lor.lhs.false - -lor.lhs.false: ; preds = %if.end - %4 = load i64, i64* %__len, align 8 - %call8 = call i64 @_ZNKSt6vectorI7latLongSaIS0_EE8max_sizeEv(%"class.std::vector.0"* %this1) - %cmp9 = icmp ugt i64 %4, %call8 - br i1 %cmp9, label %cond.true, label %cond.false - -cond.true: ; preds = %lor.lhs.false, %if.end - %call10 = call i64 @_ZNKSt6vectorI7latLongSaIS0_EE8max_sizeEv(%"class.std::vector.0"* %this1) - br label %cond.end - -cond.false: ; preds = %lor.lhs.false - %5 = load i64, i64* %__len, align 8 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i64 [ %call10, %cond.true ], [ %5, %cond.false ] - ret i64 %cond -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZN9__gnu_cxxmiIP7latLongSt6vectorIS1_SaIS1_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKS9_SC_(%"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__lhs, %"class.__gnu_cxx::__normal_iterator"* dereferenceable(8) %__rhs) #0 comdat { -entry: - %__lhs.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - %__rhs.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %__lhs, %"class.__gnu_cxx::__normal_iterator"** %__lhs.addr, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %__rhs, %"class.__gnu_cxx::__normal_iterator"** %__rhs.addr, align 8 - %0 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %__lhs.addr, align 8 - %call = call dereferenceable(8) %struct.latLong** @_ZNK9__gnu_cxx17__normal_iteratorIP7latLongSt6vectorIS1_SaIS1_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator"* %0) - %1 = load %struct.latLong*, %struct.latLong** %call, align 8 - %2 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %__rhs.addr, align 8 - %call1 = call dereferenceable(8) %struct.latLong** @_ZNK9__gnu_cxx17__normal_iteratorIP7latLongSt6vectorIS1_SaIS1_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator"* %2) - %3 = load %struct.latLong*, %struct.latLong** %call1, align 8 - %sub.ptr.lhs.cast = ptrtoint %struct.latLong* %1 to i64 - %sub.ptr.rhs.cast = ptrtoint %struct.latLong* %3 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - ret i64 %sub.ptr.div -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZNSt6vectorI7latLongSaIS0_EE5beginEv(%"class.std::vector.0"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator", align 8 - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - call void @_ZN9__gnu_cxx17__normal_iteratorIP7latLongSt6vectorIS1_SaIS1_EEEC2ERKS2_(%"class.__gnu_cxx::__normal_iterator"* %retval, %struct.latLong** dereferenceable(8) %_M_start) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %retval, i32 0, i32 0 - %1 = load %struct.latLong*, %struct.latLong** %coerce.dive, align 8 - ret %struct.latLong* %1 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZNSt12_Vector_baseI7latLongSaIS0_EE11_M_allocateEm(%"struct.std::_Vector_base.1"* %this, i64 %__n) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base.1"*, align 8 - %__n.addr = alloca i64, align 8 - store %"struct.std::_Vector_base.1"* %this, %"struct.std::_Vector_base.1"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"struct.std::_Vector_base.1"*, %"struct.std::_Vector_base.1"** %this.addr, align 8 - %0 = load i64, i64* %__n.addr, align 8 - %cmp = icmp ne i64 %0, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %this1, i32 0, i32 0 - %1 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator.2"* - %2 = load i64, i64* %__n.addr, align 8 - %call = call %struct.latLong* @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE8allocateERS2_m(%"class.std::allocator.2"* dereferenceable(1) %1, i64 %2) - br label %cond.end - -cond.false: ; preds = %entry - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi %struct.latLong* [ %call, %cond.true ], [ null, %cond.false ] - ret %struct.latLong* %cond -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZSt34__uninitialized_move_if_noexcept_aIP7latLongS1_SaIS0_EET0_T_S4_S3_RT1_(%struct.latLong* %__first, %struct.latLong* %__last, %struct.latLong* %__result, %"class.std::allocator.2"* dereferenceable(1) %__alloc) #0 comdat { -entry: - %__first.addr = alloca %struct.latLong*, align 8 - %__last.addr = alloca %struct.latLong*, align 8 - %__result.addr = alloca %struct.latLong*, align 8 - %__alloc.addr = alloca %"class.std::allocator.2"*, align 8 - store %struct.latLong* %__first, %struct.latLong** %__first.addr, align 8 - store %struct.latLong* %__last, %struct.latLong** %__last.addr, align 8 - store %struct.latLong* %__result, %struct.latLong** %__result.addr, align 8 - store %"class.std::allocator.2"* %__alloc, %"class.std::allocator.2"** %__alloc.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %1 = load %struct.latLong*, %struct.latLong** %__last.addr, align 8 - %2 = load %struct.latLong*, %struct.latLong** %__result.addr, align 8 - %3 = load %"class.std::allocator.2"*, %"class.std::allocator.2"** %__alloc.addr, align 8 - %call = call %struct.latLong* @_ZSt22__uninitialized_copy_aIP7latLongS1_S0_ET0_T_S3_S2_RSaIT1_E(%struct.latLong* %0, %struct.latLong* %1, %struct.latLong* %2, %"class.std::allocator.2"* dereferenceable(1) %3) - ret %struct.latLong* %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %struct.latLong** @_ZNK9__gnu_cxx17__normal_iteratorIP7latLongSt6vectorIS1_SaIS1_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator"* %this) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %this, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %this1, i32 0, i32 0 - ret %struct.latLong** %_M_current -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE7destroyERS2_PS1_(%"class.std::allocator.2"* dereferenceable(1) %__a, %struct.latLong* %__p) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.2"*, align 8 - %__p.addr = alloca %struct.latLong*, align 8 - store %"class.std::allocator.2"* %__a, %"class.std::allocator.2"** %__a.addr, align 8 - store %struct.latLong* %__p, %struct.latLong** %__p.addr, align 8 - %0 = load %"class.std::allocator.2"*, %"class.std::allocator.2"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator.2"* %0 to %"class.__gnu_cxx::new_allocator.3"* - %2 = load %struct.latLong*, %struct.latLong** %__p.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorI7latLongE7destroyEPS1_(%"class.__gnu_cxx::new_allocator.3"* %1, %struct.latLong* %2) - ret void -} - -declare dso_local void @__cxa_rethrow() - -declare dso_local void @__cxa_end_catch() - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorI7latLongSaIS0_EE8max_sizeEv(%"class.std::vector.0"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %call = call dereferenceable(1) %"class.std::allocator.2"* @_ZNKSt12_Vector_baseI7latLongSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base.1"* %0) - %call2 = call i64 @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE8max_sizeERKS2_(%"class.std::allocator.2"* dereferenceable(1) %call) - ret i64 %call2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorI7latLongSaIS0_EE4sizeEv(%"class.std::vector.0"* %this) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector.0"*, align 8 - store %"class.std::vector.0"* %this, %"class.std::vector.0"** %this.addr, align 8 - %this1 = load %"class.std::vector.0"*, %"class.std::vector.0"** %this.addr, align 8 - %0 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - %1 = load %struct.latLong*, %struct.latLong** %_M_finish, align 8 - %2 = bitcast %"class.std::vector.0"* %this1 to %"struct.std::_Vector_base.1"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %2, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 0 - %3 = load %struct.latLong*, %struct.latLong** %_M_start, align 8 - %sub.ptr.lhs.cast = ptrtoint %struct.latLong* %1 to i64 - %sub.ptr.rhs.cast = ptrtoint %struct.latLong* %3 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - ret i64 %sub.ptr.div -} - -; Function Attrs: noreturn -declare dso_local void @_ZSt20__throw_length_errorPKc(i8*) #10 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) i64* @_ZSt3maxImERKT_S2_S2_(i64* dereferenceable(8) %__a, i64* dereferenceable(8) %__b) #4 comdat { -entry: - %retval = alloca i64*, align 8 - %__a.addr = alloca i64*, align 8 - %__b.addr = alloca i64*, align 8 - store i64* %__a, i64** %__a.addr, align 8 - store i64* %__b, i64** %__b.addr, align 8 - %0 = load i64*, i64** %__a.addr, align 8 - %1 = load i64, i64* %0, align 8 - %2 = load i64*, i64** %__b.addr, align 8 - %3 = load i64, i64* %2, align 8 - %cmp = icmp ult i64 %1, %3 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %4 = load i64*, i64** %__b.addr, align 8 - store i64* %4, i64** %retval, align 8 - br label %return - -if.end: ; preds = %entry - %5 = load i64*, i64** %__a.addr, align 8 - store i64* %5, i64** %retval, align 8 - br label %return - -return: ; preds = %if.end, %if.then - %6 = load i64*, i64** %retval, align 8 - ret i64* %6 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE8max_sizeERKS2_(%"class.std::allocator.2"* dereferenceable(1) %__a) #4 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.2"*, align 8 - store %"class.std::allocator.2"* %__a, %"class.std::allocator.2"** %__a.addr, align 8 - %0 = load %"class.std::allocator.2"*, %"class.std::allocator.2"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator.2"* %0 to %"class.__gnu_cxx::new_allocator.3"* - %call = call i64 @_ZNK9__gnu_cxx13new_allocatorI7latLongE8max_sizeEv(%"class.__gnu_cxx::new_allocator.3"* %1) #12 - ret i64 %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator.2"* @_ZNKSt12_Vector_baseI7latLongSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base.1"* %this) #4 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base.1"*, align 8 - store %"struct.std::_Vector_base.1"* %this, %"struct.std::_Vector_base.1"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base.1"*, %"struct.std::_Vector_base.1"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base.1", %"struct.std::_Vector_base.1"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator.2"* - ret %"class.std::allocator.2"* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNK9__gnu_cxx13new_allocatorI7latLongE8max_sizeEv(%"class.__gnu_cxx::new_allocator.3"* %this) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.3"*, align 8 - store %"class.__gnu_cxx::new_allocator.3"* %this, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.3"*, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - ret i64 2305843009213693951 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx17__normal_iteratorIP7latLongSt6vectorIS1_SaIS1_EEEC2ERKS2_(%"class.__gnu_cxx::__normal_iterator"* %this, %struct.latLong** dereferenceable(8) %__i) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator"*, align 8 - %__i.addr = alloca %struct.latLong**, align 8 - store %"class.__gnu_cxx::__normal_iterator"* %this, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - store %struct.latLong** %__i, %struct.latLong*** %__i.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator"*, %"class.__gnu_cxx::__normal_iterator"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator", %"class.__gnu_cxx::__normal_iterator"* %this1, i32 0, i32 0 - %0 = load %struct.latLong**, %struct.latLong*** %__i.addr, align 8 - %1 = load %struct.latLong*, %struct.latLong** %0, align 8 - store %struct.latLong* %1, %struct.latLong** %_M_current, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZN9__gnu_cxx14__alloc_traitsISaI7latLongEE8allocateERS2_m(%"class.std::allocator.2"* dereferenceable(1) %__a, i64 %__n) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator.2"*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::allocator.2"* %__a, %"class.std::allocator.2"** %__a.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load %"class.std::allocator.2"*, %"class.std::allocator.2"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator.2"* %0 to %"class.__gnu_cxx::new_allocator.3"* - %2 = load i64, i64* %__n.addr, align 8 - %call = call %struct.latLong* @_ZN9__gnu_cxx13new_allocatorI7latLongE8allocateEmPKv(%"class.__gnu_cxx::new_allocator.3"* %1, i64 %2, i8* null) - ret %struct.latLong* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZN9__gnu_cxx13new_allocatorI7latLongE8allocateEmPKv(%"class.__gnu_cxx::new_allocator.3"* %this, i64 %__n, i8* %0) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.3"*, align 8 - %__n.addr = alloca i64, align 8 - %.addr = alloca i8*, align 8 - store %"class.__gnu_cxx::new_allocator.3"* %this, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - store i8* %0, i8** %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.3"*, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - %1 = load i64, i64* %__n.addr, align 8 - %call = call i64 @_ZNK9__gnu_cxx13new_allocatorI7latLongE8max_sizeEv(%"class.__gnu_cxx::new_allocator.3"* %this1) #12 - %cmp = icmp ugt i64 %1, %call - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - call void @_ZSt17__throw_bad_allocv() #15 - unreachable - -if.end: ; preds = %entry - %2 = load i64, i64* %__n.addr, align 8 - %mul = mul i64 %2, 8 - %call2 = call i8* @_Znwm(i64 %mul) - %3 = bitcast i8* %call2 to %struct.latLong* - ret %struct.latLong* %3 -} - -; Function Attrs: noreturn -declare dso_local void @_ZSt17__throw_bad_allocv() #10 - -; Function Attrs: nobuiltin -declare dso_local noalias i8* @_Znwm(i64) #11 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZSt22__uninitialized_copy_aIP7latLongS1_S0_ET0_T_S3_S2_RSaIT1_E(%struct.latLong* %__first, %struct.latLong* %__last, %struct.latLong* %__result, %"class.std::allocator.2"* dereferenceable(1) %0) #0 comdat { -entry: - %__first.addr = alloca %struct.latLong*, align 8 - %__last.addr = alloca %struct.latLong*, align 8 - %__result.addr = alloca %struct.latLong*, align 8 - %.addr = alloca %"class.std::allocator.2"*, align 8 - store %struct.latLong* %__first, %struct.latLong** %__first.addr, align 8 - store %struct.latLong* %__last, %struct.latLong** %__last.addr, align 8 - store %struct.latLong* %__result, %struct.latLong** %__result.addr, align 8 - store %"class.std::allocator.2"* %0, %"class.std::allocator.2"** %.addr, align 8 - %1 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %2 = load %struct.latLong*, %struct.latLong** %__last.addr, align 8 - %3 = load %struct.latLong*, %struct.latLong** %__result.addr, align 8 - %call = call %struct.latLong* @_ZSt18uninitialized_copyIP7latLongS1_ET0_T_S3_S2_(%struct.latLong* %1, %struct.latLong* %2, %struct.latLong* %3) - ret %struct.latLong* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZSt18uninitialized_copyIP7latLongS1_ET0_T_S3_S2_(%struct.latLong* %__first, %struct.latLong* %__last, %struct.latLong* %__result) #0 comdat { -entry: - %__first.addr = alloca %struct.latLong*, align 8 - %__last.addr = alloca %struct.latLong*, align 8 - %__result.addr = alloca %struct.latLong*, align 8 - %__assignable = alloca i8, align 1 - store %struct.latLong* %__first, %struct.latLong** %__first.addr, align 8 - store %struct.latLong* %__last, %struct.latLong** %__last.addr, align 8 - store %struct.latLong* %__result, %struct.latLong** %__result.addr, align 8 - store i8 1, i8* %__assignable, align 1 - %0 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %1 = load %struct.latLong*, %struct.latLong** %__last.addr, align 8 - %2 = load %struct.latLong*, %struct.latLong** %__result.addr, align 8 - %call = call %struct.latLong* @_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIP7latLongS3_EET0_T_S5_S4_(%struct.latLong* %0, %struct.latLong* %1, %struct.latLong* %2) - ret %struct.latLong* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIP7latLongS3_EET0_T_S5_S4_(%struct.latLong* %__first, %struct.latLong* %__last, %struct.latLong* %__result) #0 comdat align 2 { -entry: - %__first.addr = alloca %struct.latLong*, align 8 - %__last.addr = alloca %struct.latLong*, align 8 - %__result.addr = alloca %struct.latLong*, align 8 - store %struct.latLong* %__first, %struct.latLong** %__first.addr, align 8 - store %struct.latLong* %__last, %struct.latLong** %__last.addr, align 8 - store %struct.latLong* %__result, %struct.latLong** %__result.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %1 = load %struct.latLong*, %struct.latLong** %__last.addr, align 8 - %2 = load %struct.latLong*, %struct.latLong** %__result.addr, align 8 - %call = call %struct.latLong* @_ZSt4copyIP7latLongS1_ET0_T_S3_S2_(%struct.latLong* %0, %struct.latLong* %1, %struct.latLong* %2) - ret %struct.latLong* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZSt4copyIP7latLongS1_ET0_T_S3_S2_(%struct.latLong* %__first, %struct.latLong* %__last, %struct.latLong* %__result) #0 comdat { -entry: - %__first.addr = alloca %struct.latLong*, align 8 - %__last.addr = alloca %struct.latLong*, align 8 - %__result.addr = alloca %struct.latLong*, align 8 - store %struct.latLong* %__first, %struct.latLong** %__first.addr, align 8 - store %struct.latLong* %__last, %struct.latLong** %__last.addr, align 8 - store %struct.latLong* %__result, %struct.latLong** %__result.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %call = call %struct.latLong* @_ZSt12__miter_baseIP7latLongET_S2_(%struct.latLong* %0) - %1 = load %struct.latLong*, %struct.latLong** %__last.addr, align 8 - %call1 = call %struct.latLong* @_ZSt12__miter_baseIP7latLongET_S2_(%struct.latLong* %1) - %2 = load %struct.latLong*, %struct.latLong** %__result.addr, align 8 - %call2 = call %struct.latLong* @_ZSt14__copy_move_a2ILb0EP7latLongS1_ET1_T0_S3_S2_(%struct.latLong* %call, %struct.latLong* %call1, %struct.latLong* %2) - ret %struct.latLong* %call2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZSt14__copy_move_a2ILb0EP7latLongS1_ET1_T0_S3_S2_(%struct.latLong* %__first, %struct.latLong* %__last, %struct.latLong* %__result) #0 comdat { -entry: - %__first.addr = alloca %struct.latLong*, align 8 - %__last.addr = alloca %struct.latLong*, align 8 - %__result.addr = alloca %struct.latLong*, align 8 - store %struct.latLong* %__first, %struct.latLong** %__first.addr, align 8 - store %struct.latLong* %__last, %struct.latLong** %__last.addr, align 8 - store %struct.latLong* %__result, %struct.latLong** %__result.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %call = call %struct.latLong* @_ZSt12__niter_baseIP7latLongET_S2_(%struct.latLong* %0) - %1 = load %struct.latLong*, %struct.latLong** %__last.addr, align 8 - %call1 = call %struct.latLong* @_ZSt12__niter_baseIP7latLongET_S2_(%struct.latLong* %1) - %2 = load %struct.latLong*, %struct.latLong** %__result.addr, align 8 - %call2 = call %struct.latLong* @_ZSt12__niter_baseIP7latLongET_S2_(%struct.latLong* %2) - %call3 = call %struct.latLong* @_ZSt13__copy_move_aILb0EP7latLongS1_ET1_T0_S3_S2_(%struct.latLong* %call, %struct.latLong* %call1, %struct.latLong* %call2) - ret %struct.latLong* %call3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZSt12__miter_baseIP7latLongET_S2_(%struct.latLong* %__it) #4 comdat { -entry: - %__it.addr = alloca %struct.latLong*, align 8 - store %struct.latLong* %__it, %struct.latLong** %__it.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__it.addr, align 8 - ret %struct.latLong* %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZSt13__copy_move_aILb0EP7latLongS1_ET1_T0_S3_S2_(%struct.latLong* %__first, %struct.latLong* %__last, %struct.latLong* %__result) #0 comdat { -entry: - %__first.addr = alloca %struct.latLong*, align 8 - %__last.addr = alloca %struct.latLong*, align 8 - %__result.addr = alloca %struct.latLong*, align 8 - %__simple = alloca i8, align 1 - store %struct.latLong* %__first, %struct.latLong** %__first.addr, align 8 - store %struct.latLong* %__last, %struct.latLong** %__last.addr, align 8 - store %struct.latLong* %__result, %struct.latLong** %__result.addr, align 8 - store i8 1, i8* %__simple, align 1 - %0 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %1 = load %struct.latLong*, %struct.latLong** %__last.addr, align 8 - %2 = load %struct.latLong*, %struct.latLong** %__result.addr, align 8 - %call = call %struct.latLong* @_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mI7latLongEEPT_PKS4_S7_S5_(%struct.latLong* %0, %struct.latLong* %1, %struct.latLong* %2) - ret %struct.latLong* %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZSt12__niter_baseIP7latLongET_S2_(%struct.latLong* %__it) #4 comdat { -entry: - %__it.addr = alloca %struct.latLong*, align 8 - store %struct.latLong* %__it, %struct.latLong** %__it.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__it.addr, align 8 - ret %struct.latLong* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %struct.latLong* @_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mI7latLongEEPT_PKS4_S7_S5_(%struct.latLong* %__first, %struct.latLong* %__last, %struct.latLong* %__result) #4 comdat align 2 { -entry: - %__first.addr = alloca %struct.latLong*, align 8 - %__last.addr = alloca %struct.latLong*, align 8 - %__result.addr = alloca %struct.latLong*, align 8 - %_Num = alloca i64, align 8 - store %struct.latLong* %__first, %struct.latLong** %__first.addr, align 8 - store %struct.latLong* %__last, %struct.latLong** %__last.addr, align 8 - store %struct.latLong* %__result, %struct.latLong** %__result.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__last.addr, align 8 - %1 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %sub.ptr.lhs.cast = ptrtoint %struct.latLong* %0 to i64 - %sub.ptr.rhs.cast = ptrtoint %struct.latLong* %1 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 8 - store i64 %sub.ptr.div, i64* %_Num, align 8 - %2 = load i64, i64* %_Num, align 8 - %tobool = icmp ne i64 %2, 0 - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - %3 = load %struct.latLong*, %struct.latLong** %__result.addr, align 8 - %4 = bitcast %struct.latLong* %3 to i8* - %5 = load %struct.latLong*, %struct.latLong** %__first.addr, align 8 - %6 = bitcast %struct.latLong* %5 to i8* - %7 = load i64, i64* %_Num, align 8 - %mul = mul i64 8, %7 - call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 %4, i8* align 4 %6, i64 %mul, i1 false) - br label %if.end - -if.end: ; preds = %if.then, %entry - %8 = load %struct.latLong*, %struct.latLong** %__result.addr, align 8 - %9 = load i64, i64* %_Num, align 8 - %add.ptr = getelementptr inbounds %struct.latLong, %struct.latLong* %8, i64 %9 - ret %struct.latLong* %add.ptr -} - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) #1 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorI7latLongE7destroyEPS1_(%"class.__gnu_cxx::new_allocator.3"* %this, %struct.latLong* %__p) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator.3"*, align 8 - %__p.addr = alloca %struct.latLong*, align 8 - store %"class.__gnu_cxx::new_allocator.3"* %this, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - store %struct.latLong* %__p, %struct.latLong** %__p.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator.3"*, %"class.__gnu_cxx::new_allocator.3"** %this.addr, align 8 - %0 = load %struct.latLong*, %struct.latLong** %__p.addr, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE9constructIS1_EEvRS2_PS1_RKT_(%"class.std::allocator"* dereferenceable(1) %__a, %struct.record* %__p, %struct.record* dereferenceable(60) %__arg) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - %__p.addr = alloca %struct.record*, align 8 - %__arg.addr = alloca %struct.record*, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - store %struct.record* %__p, %struct.record** %__p.addr, align 8 - store %struct.record* %__arg, %struct.record** %__arg.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator"* %0 to %"class.__gnu_cxx::new_allocator"* - %2 = load %struct.record*, %struct.record** %__p.addr, align 8 - %3 = load %struct.record*, %struct.record** %__arg.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorI6recordE9constructEPS1_RKS1_(%"class.__gnu_cxx::new_allocator"* %1, %struct.record* %2, %struct.record* dereferenceable(60) %3) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZNSt6vectorI6recordSaIS0_EE17_M_realloc_insertEN9__gnu_cxx17__normal_iteratorIPS0_S2_EERKS0_(%"class.std::vector"* %this, %struct.record* %__position.coerce, %struct.record* dereferenceable(60) %__x) #0 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %__position = alloca %"class.__gnu_cxx::__normal_iterator.5", align 8 - %this.addr = alloca %"class.std::vector"*, align 8 - %__x.addr = alloca %struct.record*, align 8 - %__len = alloca i64, align 8 - %__elems_before = alloca i64, align 8 - %ref.tmp = alloca %"class.__gnu_cxx::__normal_iterator.5", align 8 - %__new_start = alloca %struct.record*, align 8 - %__new_finish = alloca %struct.record*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.5", %"class.__gnu_cxx::__normal_iterator.5"* %__position, i32 0, i32 0 - store %struct.record* %__position.coerce, %struct.record** %coerce.dive, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - store %struct.record* %__x, %struct.record** %__x.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %call = call i64 @_ZNKSt6vectorI6recordSaIS0_EE12_M_check_lenEmPKc(%"class.std::vector"* %this1, i64 1, i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.28, i64 0, i64 0)) - store i64 %call, i64* %__len, align 8 - %call2 = call %struct.record* @_ZNSt6vectorI6recordSaIS0_EE5beginEv(%"class.std::vector"* %this1) - %coerce.dive3 = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.5", %"class.__gnu_cxx::__normal_iterator.5"* %ref.tmp, i32 0, i32 0 - store %struct.record* %call2, %struct.record** %coerce.dive3, align 8 - %call4 = call i64 @_ZN9__gnu_cxxmiIP6recordSt6vectorIS1_SaIS1_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKS9_SC_(%"class.__gnu_cxx::__normal_iterator.5"* dereferenceable(8) %__position, %"class.__gnu_cxx::__normal_iterator.5"* dereferenceable(8) %ref.tmp) - store i64 %call4, i64* %__elems_before, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %1 = load i64, i64* %__len, align 8 - %call5 = call %struct.record* @_ZNSt12_Vector_baseI6recordSaIS0_EE11_M_allocateEm(%"struct.std::_Vector_base"* %0, i64 %1) - store %struct.record* %call5, %struct.record** %__new_start, align 8 - %2 = load %struct.record*, %struct.record** %__new_start, align 8 - store %struct.record* %2, %struct.record** %__new_finish, align 8 - %3 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %3, i32 0, i32 0 - %4 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator"* - %5 = load %struct.record*, %struct.record** %__new_start, align 8 - %6 = load i64, i64* %__elems_before, align 8 - %add.ptr = getelementptr inbounds %struct.record, %struct.record* %5, i64 %6 - %7 = load %struct.record*, %struct.record** %__x.addr, align 8 - invoke void @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE9constructIS1_EEvRS2_PS1_RKT_(%"class.std::allocator"* dereferenceable(1) %4, %struct.record* %add.ptr, %struct.record* dereferenceable(60) %7) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - store %struct.record* null, %struct.record** %__new_finish, align 8 - %8 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl6 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %8, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl6, i32 0, i32 0 - %9 = load %struct.record*, %struct.record** %_M_start, align 8 - %call8 = invoke dereferenceable(8) %struct.record** @_ZNK9__gnu_cxx17__normal_iteratorIP6recordSt6vectorIS1_SaIS1_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator.5"* %__position) - to label %invoke.cont7 unwind label %lpad - -invoke.cont7: ; preds = %invoke.cont - %10 = load %struct.record*, %struct.record** %call8, align 8 - %11 = load %struct.record*, %struct.record** %__new_start, align 8 - %12 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call10 = invoke dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseI6recordSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %12) - to label %invoke.cont9 unwind label %lpad - -invoke.cont9: ; preds = %invoke.cont7 - %call12 = invoke %struct.record* @_ZSt34__uninitialized_move_if_noexcept_aIP6recordS1_SaIS0_EET0_T_S4_S3_RT1_(%struct.record* %9, %struct.record* %10, %struct.record* %11, %"class.std::allocator"* dereferenceable(1) %call10) - to label %invoke.cont11 unwind label %lpad - -invoke.cont11: ; preds = %invoke.cont9 - store %struct.record* %call12, %struct.record** %__new_finish, align 8 - %13 = load %struct.record*, %struct.record** %__new_finish, align 8 - %incdec.ptr = getelementptr inbounds %struct.record, %struct.record* %13, i32 1 - store %struct.record* %incdec.ptr, %struct.record** %__new_finish, align 8 - %call14 = invoke dereferenceable(8) %struct.record** @_ZNK9__gnu_cxx17__normal_iteratorIP6recordSt6vectorIS1_SaIS1_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator.5"* %__position) - to label %invoke.cont13 unwind label %lpad - -invoke.cont13: ; preds = %invoke.cont11 - %14 = load %struct.record*, %struct.record** %call14, align 8 - %15 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl15 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %15, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl15, i32 0, i32 1 - %16 = load %struct.record*, %struct.record** %_M_finish, align 8 - %17 = load %struct.record*, %struct.record** %__new_finish, align 8 - %18 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call17 = invoke dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseI6recordSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %18) - to label %invoke.cont16 unwind label %lpad - -invoke.cont16: ; preds = %invoke.cont13 - %call19 = invoke %struct.record* @_ZSt34__uninitialized_move_if_noexcept_aIP6recordS1_SaIS0_EET0_T_S4_S3_RT1_(%struct.record* %14, %struct.record* %16, %struct.record* %17, %"class.std::allocator"* dereferenceable(1) %call17) - to label %invoke.cont18 unwind label %lpad - -invoke.cont18: ; preds = %invoke.cont16 - store %struct.record* %call19, %struct.record** %__new_finish, align 8 - br label %try.cont - -lpad: ; preds = %invoke.cont16, %invoke.cont13, %invoke.cont11, %invoke.cont9, %invoke.cont7, %invoke.cont, %entry - %19 = landingpad { i8*, i32 } - catch i8* null - %20 = extractvalue { i8*, i32 } %19, 0 - store i8* %20, i8** %exn.slot, align 8 - %21 = extractvalue { i8*, i32 } %19, 1 - store i32 %21, i32* %ehselector.slot, align 4 - br label %catch - -catch: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %22 = call i8* @__cxa_begin_catch(i8* %exn) #12 - %23 = load %struct.record*, %struct.record** %__new_finish, align 8 - %tobool = icmp ne %struct.record* %23, null - br i1 %tobool, label %if.else, label %if.then - -if.then: ; preds = %catch - %24 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl20 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %24, i32 0, i32 0 - %25 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl20 to %"class.std::allocator"* - %26 = load %struct.record*, %struct.record** %__new_start, align 8 - %27 = load i64, i64* %__elems_before, align 8 - %add.ptr21 = getelementptr inbounds %struct.record, %struct.record* %26, i64 %27 - invoke void @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE7destroyERS2_PS1_(%"class.std::allocator"* dereferenceable(1) %25, %struct.record* %add.ptr21) - to label %invoke.cont23 unwind label %lpad22 - -invoke.cont23: ; preds = %if.then - br label %if.end - -lpad22: ; preds = %invoke.cont27, %if.end, %invoke.cont24, %if.else, %if.then - %28 = landingpad { i8*, i32 } - cleanup - %29 = extractvalue { i8*, i32 } %28, 0 - store i8* %29, i8** %exn.slot, align 8 - %30 = extractvalue { i8*, i32 } %28, 1 - store i32 %30, i32* %ehselector.slot, align 4 - invoke void @__cxa_end_catch() - to label %invoke.cont28 unwind label %terminate.lpad - -if.else: ; preds = %catch - %31 = load %struct.record*, %struct.record** %__new_start, align 8 - %32 = load %struct.record*, %struct.record** %__new_finish, align 8 - %33 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call25 = invoke dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseI6recordSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %33) - to label %invoke.cont24 unwind label %lpad22 - -invoke.cont24: ; preds = %if.else - invoke void @_ZSt8_DestroyIP6recordS0_EvT_S2_RSaIT0_E(%struct.record* %31, %struct.record* %32, %"class.std::allocator"* dereferenceable(1) %call25) - to label %invoke.cont26 unwind label %lpad22 - -invoke.cont26: ; preds = %invoke.cont24 - br label %if.end - -if.end: ; preds = %invoke.cont26, %invoke.cont23 - %34 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %35 = load %struct.record*, %struct.record** %__new_start, align 8 - %36 = load i64, i64* %__len, align 8 - invoke void @_ZNSt12_Vector_baseI6recordSaIS0_EE13_M_deallocateEPS0_m(%"struct.std::_Vector_base"* %34, %struct.record* %35, i64 %36) - to label %invoke.cont27 unwind label %lpad22 - -invoke.cont27: ; preds = %if.end - invoke void @__cxa_rethrow() #15 - to label %unreachable unwind label %lpad22 - -invoke.cont28: ; preds = %lpad22 - br label %eh.resume - -try.cont: ; preds = %invoke.cont18 - %37 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl29 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %37, i32 0, i32 0 - %_M_start30 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl29, i32 0, i32 0 - %38 = load %struct.record*, %struct.record** %_M_start30, align 8 - %39 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl31 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %39, i32 0, i32 0 - %_M_finish32 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl31, i32 0, i32 1 - %40 = load %struct.record*, %struct.record** %_M_finish32, align 8 - %41 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call33 = call dereferenceable(1) %"class.std::allocator"* @_ZNSt12_Vector_baseI6recordSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %41) - call void @_ZSt8_DestroyIP6recordS0_EvT_S2_RSaIT0_E(%struct.record* %38, %struct.record* %40, %"class.std::allocator"* dereferenceable(1) %call33) - %42 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %43 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl34 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %43, i32 0, i32 0 - %_M_start35 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl34, i32 0, i32 0 - %44 = load %struct.record*, %struct.record** %_M_start35, align 8 - %45 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl36 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %45, i32 0, i32 0 - %_M_end_of_storage = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl36, i32 0, i32 2 - %46 = load %struct.record*, %struct.record** %_M_end_of_storage, align 8 - %47 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl37 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %47, i32 0, i32 0 - %_M_start38 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl37, i32 0, i32 0 - %48 = load %struct.record*, %struct.record** %_M_start38, align 8 - %sub.ptr.lhs.cast = ptrtoint %struct.record* %46 to i64 - %sub.ptr.rhs.cast = ptrtoint %struct.record* %48 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 60 - call void @_ZNSt12_Vector_baseI6recordSaIS0_EE13_M_deallocateEPS0_m(%"struct.std::_Vector_base"* %42, %struct.record* %44, i64 %sub.ptr.div) - %49 = load %struct.record*, %struct.record** %__new_start, align 8 - %50 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl39 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %50, i32 0, i32 0 - %_M_start40 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl39, i32 0, i32 0 - store %struct.record* %49, %struct.record** %_M_start40, align 8 - %51 = load %struct.record*, %struct.record** %__new_finish, align 8 - %52 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl41 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %52, i32 0, i32 0 - %_M_finish42 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl41, i32 0, i32 1 - store %struct.record* %51, %struct.record** %_M_finish42, align 8 - %53 = load %struct.record*, %struct.record** %__new_start, align 8 - %54 = load i64, i64* %__len, align 8 - %add.ptr43 = getelementptr inbounds %struct.record, %struct.record* %53, i64 %54 - %55 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl44 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %55, i32 0, i32 0 - %_M_end_of_storage45 = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl44, i32 0, i32 2 - store %struct.record* %add.ptr43, %struct.record** %_M_end_of_storage45, align 8 - ret void - -eh.resume: ; preds = %invoke.cont28 - %exn46 = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn46, 0 - %lpad.val47 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val47 - -terminate.lpad: ; preds = %lpad22 - %56 = landingpad { i8*, i32 } - catch i8* null - %57 = extractvalue { i8*, i32 } %56, 0 - call void @__clang_call_terminate(i8* %57) #13 - unreachable - -unreachable: ; preds = %invoke.cont27 - unreachable -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZNSt6vectorI6recordSaIS0_EE3endEv(%"class.std::vector"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator.5", align 8 - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - call void @_ZN9__gnu_cxx17__normal_iteratorIP6recordSt6vectorIS1_SaIS1_EEEC2ERKS2_(%"class.__gnu_cxx::__normal_iterator.5"* %retval, %struct.record** dereferenceable(8) %_M_finish) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.5", %"class.__gnu_cxx::__normal_iterator.5"* %retval, i32 0, i32 0 - %1 = load %struct.record*, %struct.record** %coerce.dive, align 8 - ret %struct.record* %1 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorI6recordE9constructEPS1_RKS1_(%"class.__gnu_cxx::new_allocator"* %this, %struct.record* %__p, %struct.record* dereferenceable(60) %__val) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - %__p.addr = alloca %struct.record*, align 8 - %__val.addr = alloca %struct.record*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - store %struct.record* %__p, %struct.record** %__p.addr, align 8 - store %struct.record* %__val, %struct.record** %__val.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %0 = load %struct.record*, %struct.record** %__p.addr, align 8 - %1 = bitcast %struct.record* %0 to i8* - %2 = bitcast i8* %1 to %struct.record* - %3 = load %struct.record*, %struct.record** %__val.addr, align 8 - %4 = bitcast %struct.record* %2 to i8* - %5 = bitcast %struct.record* %3 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %4, i8* align 4 %5, i64 60, i1 false) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorI6recordSaIS0_EE12_M_check_lenEmPKc(%"class.std::vector"* %this, i64 %__n, i8* %__s) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - %__n.addr = alloca i64, align 8 - %__s.addr = alloca i8*, align 8 - %__len = alloca i64, align 8 - %ref.tmp = alloca i64, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - store i8* %__s, i8** %__s.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %call = call i64 @_ZNKSt6vectorI6recordSaIS0_EE8max_sizeEv(%"class.std::vector"* %this1) - %call2 = call i64 @_ZNKSt6vectorI6recordSaIS0_EE4sizeEv(%"class.std::vector"* %this1) - %sub = sub i64 %call, %call2 - %0 = load i64, i64* %__n.addr, align 8 - %cmp = icmp ult i64 %sub, %0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load i8*, i8** %__s.addr, align 8 - call void @_ZSt20__throw_length_errorPKc(i8* %1) #15 - unreachable - -if.end: ; preds = %entry - %call3 = call i64 @_ZNKSt6vectorI6recordSaIS0_EE4sizeEv(%"class.std::vector"* %this1) - %call4 = call i64 @_ZNKSt6vectorI6recordSaIS0_EE4sizeEv(%"class.std::vector"* %this1) - store i64 %call4, i64* %ref.tmp, align 8 - %call5 = call dereferenceable(8) i64* @_ZSt3maxImERKT_S2_S2_(i64* dereferenceable(8) %ref.tmp, i64* dereferenceable(8) %__n.addr) - %2 = load i64, i64* %call5, align 8 - %add = add i64 %call3, %2 - store i64 %add, i64* %__len, align 8 - %3 = load i64, i64* %__len, align 8 - %call6 = call i64 @_ZNKSt6vectorI6recordSaIS0_EE4sizeEv(%"class.std::vector"* %this1) - %cmp7 = icmp ult i64 %3, %call6 - br i1 %cmp7, label %cond.true, label %lor.lhs.false - -lor.lhs.false: ; preds = %if.end - %4 = load i64, i64* %__len, align 8 - %call8 = call i64 @_ZNKSt6vectorI6recordSaIS0_EE8max_sizeEv(%"class.std::vector"* %this1) - %cmp9 = icmp ugt i64 %4, %call8 - br i1 %cmp9, label %cond.true, label %cond.false - -cond.true: ; preds = %lor.lhs.false, %if.end - %call10 = call i64 @_ZNKSt6vectorI6recordSaIS0_EE8max_sizeEv(%"class.std::vector"* %this1) - br label %cond.end - -cond.false: ; preds = %lor.lhs.false - %5 = load i64, i64* %__len, align 8 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i64 [ %call10, %cond.true ], [ %5, %cond.false ] - ret i64 %cond -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZN9__gnu_cxxmiIP6recordSt6vectorIS1_SaIS1_EEEENS_17__normal_iteratorIT_T0_E15difference_typeERKS9_SC_(%"class.__gnu_cxx::__normal_iterator.5"* dereferenceable(8) %__lhs, %"class.__gnu_cxx::__normal_iterator.5"* dereferenceable(8) %__rhs) #0 comdat { -entry: - %__lhs.addr = alloca %"class.__gnu_cxx::__normal_iterator.5"*, align 8 - %__rhs.addr = alloca %"class.__gnu_cxx::__normal_iterator.5"*, align 8 - store %"class.__gnu_cxx::__normal_iterator.5"* %__lhs, %"class.__gnu_cxx::__normal_iterator.5"** %__lhs.addr, align 8 - store %"class.__gnu_cxx::__normal_iterator.5"* %__rhs, %"class.__gnu_cxx::__normal_iterator.5"** %__rhs.addr, align 8 - %0 = load %"class.__gnu_cxx::__normal_iterator.5"*, %"class.__gnu_cxx::__normal_iterator.5"** %__lhs.addr, align 8 - %call = call dereferenceable(8) %struct.record** @_ZNK9__gnu_cxx17__normal_iteratorIP6recordSt6vectorIS1_SaIS1_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator.5"* %0) - %1 = load %struct.record*, %struct.record** %call, align 8 - %2 = load %"class.__gnu_cxx::__normal_iterator.5"*, %"class.__gnu_cxx::__normal_iterator.5"** %__rhs.addr, align 8 - %call1 = call dereferenceable(8) %struct.record** @_ZNK9__gnu_cxx17__normal_iteratorIP6recordSt6vectorIS1_SaIS1_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator.5"* %2) - %3 = load %struct.record*, %struct.record** %call1, align 8 - %sub.ptr.lhs.cast = ptrtoint %struct.record* %1 to i64 - %sub.ptr.rhs.cast = ptrtoint %struct.record* %3 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 60 - ret i64 %sub.ptr.div -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZNSt6vectorI6recordSaIS0_EE5beginEv(%"class.std::vector"* %this) #0 comdat align 2 { -entry: - %retval = alloca %"class.__gnu_cxx::__normal_iterator.5", align 8 - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 0 - call void @_ZN9__gnu_cxx17__normal_iteratorIP6recordSt6vectorIS1_SaIS1_EEEC2ERKS2_(%"class.__gnu_cxx::__normal_iterator.5"* %retval, %struct.record** dereferenceable(8) %_M_start) - %coerce.dive = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.5", %"class.__gnu_cxx::__normal_iterator.5"* %retval, i32 0, i32 0 - %1 = load %struct.record*, %struct.record** %coerce.dive, align 8 - ret %struct.record* %1 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZNSt12_Vector_baseI6recordSaIS0_EE11_M_allocateEm(%"struct.std::_Vector_base"* %this, i64 %__n) #0 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - %__n.addr = alloca i64, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %0 = load i64, i64* %__n.addr, align 8 - %cmp = icmp ne i64 %0, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %1 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator"* - %2 = load i64, i64* %__n.addr, align 8 - %call = call %struct.record* @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE8allocateERS2_m(%"class.std::allocator"* dereferenceable(1) %1, i64 %2) - br label %cond.end - -cond.false: ; preds = %entry - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi %struct.record* [ %call, %cond.true ], [ null, %cond.false ] - ret %struct.record* %cond -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZSt34__uninitialized_move_if_noexcept_aIP6recordS1_SaIS0_EET0_T_S4_S3_RT1_(%struct.record* %__first, %struct.record* %__last, %struct.record* %__result, %"class.std::allocator"* dereferenceable(1) %__alloc) #0 comdat { -entry: - %__first.addr = alloca %struct.record*, align 8 - %__last.addr = alloca %struct.record*, align 8 - %__result.addr = alloca %struct.record*, align 8 - %__alloc.addr = alloca %"class.std::allocator"*, align 8 - store %struct.record* %__first, %struct.record** %__first.addr, align 8 - store %struct.record* %__last, %struct.record** %__last.addr, align 8 - store %struct.record* %__result, %struct.record** %__result.addr, align 8 - store %"class.std::allocator"* %__alloc, %"class.std::allocator"** %__alloc.addr, align 8 - %0 = load %struct.record*, %struct.record** %__first.addr, align 8 - %1 = load %struct.record*, %struct.record** %__last.addr, align 8 - %2 = load %struct.record*, %struct.record** %__result.addr, align 8 - %3 = load %"class.std::allocator"*, %"class.std::allocator"** %__alloc.addr, align 8 - %call = call %struct.record* @_ZSt22__uninitialized_copy_aIP6recordS1_S0_ET0_T_S3_S2_RSaIT1_E(%struct.record* %0, %struct.record* %1, %struct.record* %2, %"class.std::allocator"* dereferenceable(1) %3) - ret %struct.record* %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(8) %struct.record** @_ZNK9__gnu_cxx17__normal_iteratorIP6recordSt6vectorIS1_SaIS1_EEE4baseEv(%"class.__gnu_cxx::__normal_iterator.5"* %this) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator.5"*, align 8 - store %"class.__gnu_cxx::__normal_iterator.5"* %this, %"class.__gnu_cxx::__normal_iterator.5"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator.5"*, %"class.__gnu_cxx::__normal_iterator.5"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.5", %"class.__gnu_cxx::__normal_iterator.5"* %this1, i32 0, i32 0 - ret %struct.record** %_M_current -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE7destroyERS2_PS1_(%"class.std::allocator"* dereferenceable(1) %__a, %struct.record* %__p) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - %__p.addr = alloca %struct.record*, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - store %struct.record* %__p, %struct.record** %__p.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator"* %0 to %"class.__gnu_cxx::new_allocator"* - %2 = load %struct.record*, %struct.record** %__p.addr, align 8 - call void @_ZN9__gnu_cxx13new_allocatorI6recordE7destroyEPS1_(%"class.__gnu_cxx::new_allocator"* %1, %struct.record* %2) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorI6recordSaIS0_EE8max_sizeEv(%"class.std::vector"* %this) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %call = call dereferenceable(1) %"class.std::allocator"* @_ZNKSt12_Vector_baseI6recordSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %0) - %call2 = call i64 @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE8max_sizeERKS2_(%"class.std::allocator"* dereferenceable(1) %call) - ret i64 %call2 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNKSt6vectorI6recordSaIS0_EE4sizeEv(%"class.std::vector"* %this) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.std::vector"*, align 8 - store %"class.std::vector"* %this, %"class.std::vector"** %this.addr, align 8 - %this1 = load %"class.std::vector"*, %"class.std::vector"** %this.addr, align 8 - %0 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %0, i32 0, i32 0 - %_M_finish = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl, i32 0, i32 1 - %1 = load %struct.record*, %struct.record** %_M_finish, align 8 - %2 = bitcast %"class.std::vector"* %this1 to %"struct.std::_Vector_base"* - %_M_impl2 = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %2, i32 0, i32 0 - %_M_start = getelementptr inbounds %"struct.std::_Vector_base >::_Vector_impl", %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl2, i32 0, i32 0 - %3 = load %struct.record*, %struct.record** %_M_start, align 8 - %sub.ptr.lhs.cast = ptrtoint %struct.record* %1 to i64 - %sub.ptr.rhs.cast = ptrtoint %struct.record* %3 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 60 - ret i64 %sub.ptr.div -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE8max_sizeERKS2_(%"class.std::allocator"* dereferenceable(1) %__a) #4 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator"* %0 to %"class.__gnu_cxx::new_allocator"* - %call = call i64 @_ZNK9__gnu_cxx13new_allocatorI6recordE8max_sizeEv(%"class.__gnu_cxx::new_allocator"* %1) #12 - ret i64 %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local dereferenceable(1) %"class.std::allocator"* @_ZNKSt12_Vector_baseI6recordSaIS0_EE19_M_get_Tp_allocatorEv(%"struct.std::_Vector_base"* %this) #4 comdat align 2 { -entry: - %this.addr = alloca %"struct.std::_Vector_base"*, align 8 - store %"struct.std::_Vector_base"* %this, %"struct.std::_Vector_base"** %this.addr, align 8 - %this1 = load %"struct.std::_Vector_base"*, %"struct.std::_Vector_base"** %this.addr, align 8 - %_M_impl = getelementptr inbounds %"struct.std::_Vector_base", %"struct.std::_Vector_base"* %this1, i32 0, i32 0 - %0 = bitcast %"struct.std::_Vector_base >::_Vector_impl"* %_M_impl to %"class.std::allocator"* - ret %"class.std::allocator"* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZNK9__gnu_cxx13new_allocatorI6recordE8max_sizeEv(%"class.__gnu_cxx::new_allocator"* %this) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - ret i64 307445734561825860 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx17__normal_iteratorIP6recordSt6vectorIS1_SaIS1_EEEC2ERKS2_(%"class.__gnu_cxx::__normal_iterator.5"* %this, %struct.record** dereferenceable(8) %__i) unnamed_addr #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::__normal_iterator.5"*, align 8 - %__i.addr = alloca %struct.record**, align 8 - store %"class.__gnu_cxx::__normal_iterator.5"* %this, %"class.__gnu_cxx::__normal_iterator.5"** %this.addr, align 8 - store %struct.record** %__i, %struct.record*** %__i.addr, align 8 - %this1 = load %"class.__gnu_cxx::__normal_iterator.5"*, %"class.__gnu_cxx::__normal_iterator.5"** %this.addr, align 8 - %_M_current = getelementptr inbounds %"class.__gnu_cxx::__normal_iterator.5", %"class.__gnu_cxx::__normal_iterator.5"* %this1, i32 0, i32 0 - %0 = load %struct.record**, %struct.record*** %__i.addr, align 8 - %1 = load %struct.record*, %struct.record** %0, align 8 - store %struct.record* %1, %struct.record** %_M_current, align 8 - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZN9__gnu_cxx14__alloc_traitsISaI6recordEE8allocateERS2_m(%"class.std::allocator"* dereferenceable(1) %__a, i64 %__n) #0 comdat align 2 { -entry: - %__a.addr = alloca %"class.std::allocator"*, align 8 - %__n.addr = alloca i64, align 8 - store %"class.std::allocator"* %__a, %"class.std::allocator"** %__a.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - %0 = load %"class.std::allocator"*, %"class.std::allocator"** %__a.addr, align 8 - %1 = bitcast %"class.std::allocator"* %0 to %"class.__gnu_cxx::new_allocator"* - %2 = load i64, i64* %__n.addr, align 8 - %call = call %struct.record* @_ZN9__gnu_cxx13new_allocatorI6recordE8allocateEmPKv(%"class.__gnu_cxx::new_allocator"* %1, i64 %2, i8* null) - ret %struct.record* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZN9__gnu_cxx13new_allocatorI6recordE8allocateEmPKv(%"class.__gnu_cxx::new_allocator"* %this, i64 %__n, i8* %0) #0 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - %__n.addr = alloca i64, align 8 - %.addr = alloca i8*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - store i64 %__n, i64* %__n.addr, align 8 - store i8* %0, i8** %.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %1 = load i64, i64* %__n.addr, align 8 - %call = call i64 @_ZNK9__gnu_cxx13new_allocatorI6recordE8max_sizeEv(%"class.__gnu_cxx::new_allocator"* %this1) #12 - %cmp = icmp ugt i64 %1, %call - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - call void @_ZSt17__throw_bad_allocv() #15 - unreachable - -if.end: ; preds = %entry - %2 = load i64, i64* %__n.addr, align 8 - %mul = mul i64 %2, 60 - %call2 = call i8* @_Znwm(i64 %mul) - %3 = bitcast i8* %call2 to %struct.record* - ret %struct.record* %3 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZSt22__uninitialized_copy_aIP6recordS1_S0_ET0_T_S3_S2_RSaIT1_E(%struct.record* %__first, %struct.record* %__last, %struct.record* %__result, %"class.std::allocator"* dereferenceable(1) %0) #0 comdat { -entry: - %__first.addr = alloca %struct.record*, align 8 - %__last.addr = alloca %struct.record*, align 8 - %__result.addr = alloca %struct.record*, align 8 - %.addr = alloca %"class.std::allocator"*, align 8 - store %struct.record* %__first, %struct.record** %__first.addr, align 8 - store %struct.record* %__last, %struct.record** %__last.addr, align 8 - store %struct.record* %__result, %struct.record** %__result.addr, align 8 - store %"class.std::allocator"* %0, %"class.std::allocator"** %.addr, align 8 - %1 = load %struct.record*, %struct.record** %__first.addr, align 8 - %2 = load %struct.record*, %struct.record** %__last.addr, align 8 - %3 = load %struct.record*, %struct.record** %__result.addr, align 8 - %call = call %struct.record* @_ZSt18uninitialized_copyIP6recordS1_ET0_T_S3_S2_(%struct.record* %1, %struct.record* %2, %struct.record* %3) - ret %struct.record* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZSt18uninitialized_copyIP6recordS1_ET0_T_S3_S2_(%struct.record* %__first, %struct.record* %__last, %struct.record* %__result) #0 comdat { -entry: - %__first.addr = alloca %struct.record*, align 8 - %__last.addr = alloca %struct.record*, align 8 - %__result.addr = alloca %struct.record*, align 8 - %__assignable = alloca i8, align 1 - store %struct.record* %__first, %struct.record** %__first.addr, align 8 - store %struct.record* %__last, %struct.record** %__last.addr, align 8 - store %struct.record* %__result, %struct.record** %__result.addr, align 8 - store i8 1, i8* %__assignable, align 1 - %0 = load %struct.record*, %struct.record** %__first.addr, align 8 - %1 = load %struct.record*, %struct.record** %__last.addr, align 8 - %2 = load %struct.record*, %struct.record** %__result.addr, align 8 - %call = call %struct.record* @_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIP6recordS3_EET0_T_S5_S4_(%struct.record* %0, %struct.record* %1, %struct.record* %2) - ret %struct.record* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZNSt20__uninitialized_copyILb1EE13__uninit_copyIP6recordS3_EET0_T_S5_S4_(%struct.record* %__first, %struct.record* %__last, %struct.record* %__result) #0 comdat align 2 { -entry: - %__first.addr = alloca %struct.record*, align 8 - %__last.addr = alloca %struct.record*, align 8 - %__result.addr = alloca %struct.record*, align 8 - store %struct.record* %__first, %struct.record** %__first.addr, align 8 - store %struct.record* %__last, %struct.record** %__last.addr, align 8 - store %struct.record* %__result, %struct.record** %__result.addr, align 8 - %0 = load %struct.record*, %struct.record** %__first.addr, align 8 - %1 = load %struct.record*, %struct.record** %__last.addr, align 8 - %2 = load %struct.record*, %struct.record** %__result.addr, align 8 - %call = call %struct.record* @_ZSt4copyIP6recordS1_ET0_T_S3_S2_(%struct.record* %0, %struct.record* %1, %struct.record* %2) - ret %struct.record* %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZSt4copyIP6recordS1_ET0_T_S3_S2_(%struct.record* %__first, %struct.record* %__last, %struct.record* %__result) #0 comdat { -entry: - %__first.addr = alloca %struct.record*, align 8 - %__last.addr = alloca %struct.record*, align 8 - %__result.addr = alloca %struct.record*, align 8 - store %struct.record* %__first, %struct.record** %__first.addr, align 8 - store %struct.record* %__last, %struct.record** %__last.addr, align 8 - store %struct.record* %__result, %struct.record** %__result.addr, align 8 - %0 = load %struct.record*, %struct.record** %__first.addr, align 8 - %call = call %struct.record* @_ZSt12__miter_baseIP6recordET_S2_(%struct.record* %0) - %1 = load %struct.record*, %struct.record** %__last.addr, align 8 - %call1 = call %struct.record* @_ZSt12__miter_baseIP6recordET_S2_(%struct.record* %1) - %2 = load %struct.record*, %struct.record** %__result.addr, align 8 - %call2 = call %struct.record* @_ZSt14__copy_move_a2ILb0EP6recordS1_ET1_T0_S3_S2_(%struct.record* %call, %struct.record* %call1, %struct.record* %2) - ret %struct.record* %call2 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZSt14__copy_move_a2ILb0EP6recordS1_ET1_T0_S3_S2_(%struct.record* %__first, %struct.record* %__last, %struct.record* %__result) #0 comdat { -entry: - %__first.addr = alloca %struct.record*, align 8 - %__last.addr = alloca %struct.record*, align 8 - %__result.addr = alloca %struct.record*, align 8 - store %struct.record* %__first, %struct.record** %__first.addr, align 8 - store %struct.record* %__last, %struct.record** %__last.addr, align 8 - store %struct.record* %__result, %struct.record** %__result.addr, align 8 - %0 = load %struct.record*, %struct.record** %__first.addr, align 8 - %call = call %struct.record* @_ZSt12__niter_baseIP6recordET_S2_(%struct.record* %0) - %1 = load %struct.record*, %struct.record** %__last.addr, align 8 - %call1 = call %struct.record* @_ZSt12__niter_baseIP6recordET_S2_(%struct.record* %1) - %2 = load %struct.record*, %struct.record** %__result.addr, align 8 - %call2 = call %struct.record* @_ZSt12__niter_baseIP6recordET_S2_(%struct.record* %2) - %call3 = call %struct.record* @_ZSt13__copy_move_aILb0EP6recordS1_ET1_T0_S3_S2_(%struct.record* %call, %struct.record* %call1, %struct.record* %call2) - ret %struct.record* %call3 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZSt12__miter_baseIP6recordET_S2_(%struct.record* %__it) #4 comdat { -entry: - %__it.addr = alloca %struct.record*, align 8 - store %struct.record* %__it, %struct.record** %__it.addr, align 8 - %0 = load %struct.record*, %struct.record** %__it.addr, align 8 - ret %struct.record* %0 -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZSt13__copy_move_aILb0EP6recordS1_ET1_T0_S3_S2_(%struct.record* %__first, %struct.record* %__last, %struct.record* %__result) #0 comdat { -entry: - %__first.addr = alloca %struct.record*, align 8 - %__last.addr = alloca %struct.record*, align 8 - %__result.addr = alloca %struct.record*, align 8 - %__simple = alloca i8, align 1 - store %struct.record* %__first, %struct.record** %__first.addr, align 8 - store %struct.record* %__last, %struct.record** %__last.addr, align 8 - store %struct.record* %__result, %struct.record** %__result.addr, align 8 - store i8 1, i8* %__simple, align 1 - %0 = load %struct.record*, %struct.record** %__first.addr, align 8 - %1 = load %struct.record*, %struct.record** %__last.addr, align 8 - %2 = load %struct.record*, %struct.record** %__result.addr, align 8 - %call = call %struct.record* @_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mI6recordEEPT_PKS4_S7_S5_(%struct.record* %0, %struct.record* %1, %struct.record* %2) - ret %struct.record* %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZSt12__niter_baseIP6recordET_S2_(%struct.record* %__it) #4 comdat { -entry: - %__it.addr = alloca %struct.record*, align 8 - store %struct.record* %__it, %struct.record** %__it.addr, align 8 - %0 = load %struct.record*, %struct.record** %__it.addr, align 8 - ret %struct.record* %0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local %struct.record* @_ZNSt11__copy_moveILb0ELb1ESt26random_access_iterator_tagE8__copy_mI6recordEEPT_PKS4_S7_S5_(%struct.record* %__first, %struct.record* %__last, %struct.record* %__result) #4 comdat align 2 { -entry: - %__first.addr = alloca %struct.record*, align 8 - %__last.addr = alloca %struct.record*, align 8 - %__result.addr = alloca %struct.record*, align 8 - %_Num = alloca i64, align 8 - store %struct.record* %__first, %struct.record** %__first.addr, align 8 - store %struct.record* %__last, %struct.record** %__last.addr, align 8 - store %struct.record* %__result, %struct.record** %__result.addr, align 8 - %0 = load %struct.record*, %struct.record** %__last.addr, align 8 - %1 = load %struct.record*, %struct.record** %__first.addr, align 8 - %sub.ptr.lhs.cast = ptrtoint %struct.record* %0 to i64 - %sub.ptr.rhs.cast = ptrtoint %struct.record* %1 to i64 - %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast - %sub.ptr.div = sdiv exact i64 %sub.ptr.sub, 60 - store i64 %sub.ptr.div, i64* %_Num, align 8 - %2 = load i64, i64* %_Num, align 8 - %tobool = icmp ne i64 %2, 0 - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %entry - %3 = load %struct.record*, %struct.record** %__result.addr, align 8 - %4 = bitcast %struct.record* %3 to i8* - %5 = load %struct.record*, %struct.record** %__first.addr, align 8 - %6 = bitcast %struct.record* %5 to i8* - %7 = load i64, i64* %_Num, align 8 - %mul = mul i64 60, %7 - call void @llvm.memmove.p0i8.p0i8.i64(i8* align 4 %4, i8* align 4 %6, i64 %mul, i1 false) - br label %if.end - -if.end: ; preds = %if.then, %entry - %8 = load %struct.record*, %struct.record** %__result.addr, align 8 - %9 = load i64, i64* %_Num, align 8 - %add.ptr = getelementptr inbounds %struct.record, %struct.record* %8, i64 %9 - ret %struct.record* %add.ptr -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9__gnu_cxx13new_allocatorI6recordE7destroyEPS1_(%"class.__gnu_cxx::new_allocator"* %this, %struct.record* %__p) #4 comdat align 2 { -entry: - %this.addr = alloca %"class.__gnu_cxx::new_allocator"*, align 8 - %__p.addr = alloca %struct.record*, align 8 - store %"class.__gnu_cxx::new_allocator"* %this, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - store %struct.record* %__p, %struct.record** %__p.addr, align 8 - %this1 = load %"class.__gnu_cxx::new_allocator"*, %"class.__gnu_cxx::new_allocator"** %this.addr, align 8 - %0 = load %struct.record*, %struct.record** %__p.addr, align 8 - ret void -} - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (%struct.latLong*, float*, i32, float, float)* @_Z6euclidP7latLongPfiff to i8*), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { argmemonly nounwind willreturn } -attributes #2 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { noinline noreturn nounwind } -attributes #7 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #9 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #10 = { noreturn "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #11 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #12 = { nounwind } -attributes #13 = { noreturn nounwind } -attributes #14 = { nounwind readonly } -attributes #15 = { noreturn } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/nn/nn_cuda.cu b/examples/nn/nn_cuda.cu deleted file mode 100644 index 1749bea..0000000 --- a/examples/nn/nn_cuda.cu +++ /dev/null @@ -1,328 +0,0 @@ -#include "cuda.h" -#include -#include -#include -#include - -#ifdef TIMING -#include "timing.h" - -struct timeval tv; -struct timeval tv_total_start, tv_total_end; -struct timeval tv_h2d_start, tv_h2d_end; -struct timeval tv_d2h_start, tv_d2h_end; -struct timeval tv_kernel_start, tv_kernel_end; -struct timeval tv_mem_alloc_start, tv_mem_alloc_end; -struct timeval tv_close_start, tv_close_end; -float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, - d2h_time = 0, close_time = 0, total_time = 0; -#endif - -#define min(a, b) a > b ? b : a -#define ceilDiv(a, b) (a + b - 1) / b -#define print(x) printf(#x ": %lu\n", (unsigned long)x) -#define DEBUG false - -#define DEFAULT_THREADS_PER_BLOCK 256 - -#define MAX_ARGS 10 -#define REC_LENGTH 53 // size of a record in db -#define LATITUDE_POS \ - 28 // character position of the latitude value in each record -#define OPEN 10000 // initial value of nearest neighbors - -typedef struct latLong { - float lat; - float lng; -} LatLong; - -typedef struct record { - char recString[REC_LENGTH]; - float distance; -} Record; - -int loadData(char *filename, std::vector &records, - std::vector &locations); -void findLowest(std::vector &records, float *distances, int numRecords, - int topN); -void printUsage(); -int parseCommandline(int argc, char *argv[], char *filename, int *r, float *lat, - float *lng, int *q, int *t, int *p, int *d); - -/** - * Kernel - * Executed on GPU - * Calculates the Euclidean distance from each record in the database to the - * target position - */ -__global__ void euclid(LatLong *d_locations, float *d_distances, int numRecords, - float lat, float lng) { - // int globalId = gridDim.x * blockDim.x * blockIdx.y + blockDim.x * - // blockIdx.x + threadIdx.x; - int globalId = blockDim.x * (gridDim.x * blockIdx.y + blockIdx.x) + - threadIdx.x; // more efficient - LatLong *latLong = d_locations + globalId; - if (globalId < numRecords) { - float *dist = d_distances + globalId; - *dist = (float)sqrt((lat - latLong->lat) * (lat - latLong->lat) + - (lng - latLong->lng) * (lng - latLong->lng)); - } -} - -/** - * This program finds the k-nearest neighbors - **/ - -int main(int argc, char *argv[]) { - cudaSetDevice(0); - int i = 0; - float lat, lng; - int quiet = 0, timing = 0, platform = 0, device = 0; - - std::vector records; - std::vector locations; - char filename[100]; - int resultsCount = 10; - - // parse command line - if (parseCommandline(argc, argv, filename, &resultsCount, &lat, &lng, &quiet, - &timing, &platform, &device)) { - printUsage(); - return 0; - } - printf("before all\n"); - int numRecords = loadData(filename, records, locations); - if (resultsCount > numRecords) - resultsCount = numRecords; - printf("after before all\n"); - - // Pointers to host memory - float *distances; - // Pointers to device memory - LatLong *d_locations; - float *d_distances; - - // Scaling calculations - added by Sam Kauffman - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - cudaDeviceSynchronize(); - unsigned long maxGridX = deviceProp.maxGridSize[0]; - unsigned long threadsPerBlock = 256; - size_t totalDeviceMemory; - size_t freeDeviceMemory; - unsigned long blocks = - ceilDiv(numRecords, threadsPerBlock); // extra threads will do nothing - unsigned long gridY = ceilDiv(blocks, maxGridX); - unsigned long gridX = ceilDiv(blocks, gridY); - // There will be no more than (gridY - 1) extra blocks - dim3 gridDim(gridX, gridY); - - /** - * Allocate memory on host and device - */ - distances = (float *)malloc(sizeof(float) * numRecords); - cudaMalloc((void **)&d_locations, sizeof(LatLong) * numRecords); - cudaMalloc((void **)&d_distances, sizeof(float) * numRecords); - - /** - * Transfer data from host to device - */ - cudaMemcpy(d_locations, &locations[0], sizeof(LatLong) * numRecords, - cudaMemcpyHostToDevice); - - /** - * Execute kernel - */ - printf("before call\n"); - euclid<<>>(d_locations, d_distances, numRecords, - lat, lng); - cudaDeviceSynchronize(); - printf("after call\n"); - // Copy data from device memory to host memory - cudaMemcpy(distances, d_distances, sizeof(float) * numRecords, - cudaMemcpyDeviceToHost); - - // find the resultsCount least distances - printf("before find\n"); - findLowest(records, distances, numRecords, resultsCount); - printf("after find\n"); - // print out results - if (!quiet) - for (i = 0; i < resultsCount; i++) { - printf("%s --> Distance=%f\n", records[i].recString, records[i].distance); - } - free(distances); - // Free memory - cudaFree(d_locations); - cudaFree(d_distances); - -#ifdef TIMING - printf("Exec: %f\n", kernel_time); -#endif -} - -int loadData(char *filename, std::vector &records, - std::vector &locations) { - FILE *flist, *fp; - int i = 0; - char dbname[64]; - int recNum = 0; - - /**Main processing **/ - - flist = fopen(filename, "r"); - while (!feof(flist)) { - /** - * Read in all records of length REC_LENGTH - * If this is the last file in the filelist, then done - * else open next file to be read next iteration - */ - if (fscanf(flist, "%s\n", dbname) != 1) { - fprintf(stderr, "error reading filelist\n"); - exit(0); - } - fp = fopen(dbname, "r"); - if (!fp) { - printf("error opening a db\n"); - exit(1); - } - // read each record - while (!feof(fp)) { - Record record; - LatLong latLong; - fgets(record.recString, 49, fp); - fgetc(fp); // newline - if (feof(fp)) - break; - - // parse for lat and long - char substr[6]; - - for (i = 0; i < 5; i++) - substr[i] = *(record.recString + i + 28); - substr[5] = '\0'; - latLong.lat = atof(substr); - - for (i = 0; i < 5; i++) - substr[i] = *(record.recString + i + 33); - substr[5] = '\0'; - latLong.lng = atof(substr); - - locations.push_back(latLong); - records.push_back(record); - recNum++; - } - fclose(fp); - } - fclose(flist); - // for(i=0;i &records, float *distances, int numRecords, - int topN) { - int i, j; - float val; - int minLoc; - Record *tempRec; - float tempDist; - - for (i = 0; i < topN; i++) { - minLoc = i; - for (j = i; j < numRecords; j++) { - val = distances[j]; - if (val < distances[minLoc]) - minLoc = j; - } - // swap locations and distances - tempRec = &records[i]; - records[i] = records[minLoc]; - records[minLoc] = *tempRec; - - tempDist = distances[i]; - distances[i] = distances[minLoc]; - distances[minLoc] = tempDist; - - // add distance to the min we just found - records[i].distance = distances[i]; - } -} - -int parseCommandline(int argc, char *argv[], char *filename, int *r, float *lat, - float *lng, int *q, int *t, int *p, int *d) { - int i; - if (argc < 2) - return 1; // error - strncpy(filename, argv[1], 100); - char flag; - - for (i = 1; i < argc; i++) { - if (argv[i][0] == '-') { // flag - flag = argv[i][1]; - switch (flag) { - case 'r': // number of results - i++; - *r = atoi(argv[i]); - break; - case 'l': // lat or lng - if (argv[i][2] == 'a') { // lat - *lat = atof(argv[i + 1]); - } else { // lng - *lng = atof(argv[i + 1]); - } - i++; - break; - case 'h': // help - return 1; - case 'q': // quiet - *q = 1; - break; - case 't': // timing - *t = 1; - break; - case 'p': // platform - i++; - *p = atoi(argv[i]); - break; - case 'd': // device - i++; - *d = atoi(argv[i]); - break; - } - } - } - if ((*d >= 0 && *p < 0) || - (*p >= 0 && - *d < 0)) // both p and d must be specified if either are specified - return 1; - return 0; -} - -void printUsage() { - printf("Nearest Neighbor Usage\n"); - printf("\n"); - printf("nearestNeighbor [filename] -r [int] -lat [float] -lng [float] [-hqt] " - "[-p [int] -d [int]]\n"); - printf("\n"); - printf("example:\n"); - printf("$ ./nearestNeighbor filelist.txt -r 5 -lat 30 -lng 90\n"); - printf("\n"); - printf("filename the filename that lists the data input files\n"); - printf("-r [int] the number of records to return (default: 10)\n"); - printf("-lat [float] the latitude for nearest neighbors (default: 0)\n"); - printf("-lng [float] the longitude for nearest neighbors (default: 0)\n"); - printf("\n"); - printf("-h, --help Display the help file\n"); - printf("-q Quiet mode. Suppress all text output.\n"); - printf("-t Print timing information.\n"); - printf("\n"); - printf("-p [int] Choose the platform (must choose both platform and " - "device)\n"); - printf("-d [int] Choose the device (must choose both platform and " - "device)\n"); - printf("\n"); - printf("\n"); - printf("Notes: 1. The filename is required as the first parameter.\n"); - printf(" 2. If you declare either the device or the platform,\n"); - printf(" you must declare both.\n\n"); -} diff --git a/examples/nn/run.sh b/examples/nn/run.sh deleted file mode 100644 index d99021e..0000000 --- a/examples/nn/run.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -set -e -llvm-as nn_cuda-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as nn_cuda-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator nn_cuda-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator nn_cuda-host-x86_64-unknown-linux-gnu.bc host.bc - -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - -g++ -Wall -L../../build/runtime -L../../build/runtime/threadPool \ - -o nn -fPIC -no-pie host.o kernel.o -lc -lx86Runtime -lthreadPool -lpthread - -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -./nn filelist_4 -r 3 -lat 30 -lng 90 >> res.log -if grep -q "1988 12 27 0 18 TONY 30.0 89.8 113 39 --> Distance=0.199997" res.log; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/nw/needle-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/nw/needle-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 956112e..0000000 --- a/examples/nw/needle-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,923 +0,0 @@ -; ModuleID = 'needle-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "needle.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 -@_ZZ20needle_cuda_shared_1PiS_iiiiE4temp = internal addrspace(3) global [17 x [17 x i32]] undef, align 4 -@_ZZ20needle_cuda_shared_1PiS_iiiiE3ref = internal addrspace(3) global [16 x [16 x i32]] undef, align 4 -@_ZZ20needle_cuda_shared_2PiS_iiiiE4temp = internal addrspace(3) global [17 x [17 x i32]] undef, align 4 -@_ZZ20needle_cuda_shared_2PiS_iiiiE3ref = internal addrspace(3) global [16 x [16 x i32]] undef, align 4 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local i32 @_Z14maximum_deviceiii(i32 %a, i32 %b, i32 %c) #0 { -entry: - %retval = alloca i32, align 4 - %a.addr = alloca i32, align 4 - %b.addr = alloca i32, align 4 - %c.addr = alloca i32, align 4 - %k = alloca i32, align 4 - store i32 %a, i32* %a.addr, align 4 - store i32 %b, i32* %b.addr, align 4 - store i32 %c, i32* %c.addr, align 4 - %0 = load i32, i32* %a.addr, align 4 - %1 = load i32, i32* %b.addr, align 4 - %cmp = icmp sle i32 %0, %1 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %2 = load i32, i32* %b.addr, align 4 - store i32 %2, i32* %k, align 4 - br label %if.end - -if.else: ; preds = %entry - %3 = load i32, i32* %a.addr, align 4 - store i32 %3, i32* %k, align 4 - br label %if.end - -if.end: ; preds = %if.else, %if.then - %4 = load i32, i32* %k, align 4 - %5 = load i32, i32* %c.addr, align 4 - %cmp1 = icmp sle i32 %4, %5 - br i1 %cmp1, label %if.then2, label %if.else3 - -if.then2: ; preds = %if.end - %6 = load i32, i32* %c.addr, align 4 - store i32 %6, i32* %retval, align 4 - br label %return - -if.else3: ; preds = %if.end - %7 = load i32, i32* %k, align 4 - store i32 %7, i32* %retval, align 4 - br label %return - -return: ; preds = %if.else3, %if.then2 - %8 = load i32, i32* %retval, align 4 - ret i32 %8 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z20needle_cuda_shared_1PiS_iiii(i32* %referrence, i32* %matrix_cuda, i32 %cols, i32 %penalty, i32 %i, i32 %block_width) #0 { -entry: - %referrence.addr = alloca i32*, align 8 - %matrix_cuda.addr = alloca i32*, align 8 - %cols.addr = alloca i32, align 4 - %penalty.addr = alloca i32, align 4 - %i.addr = alloca i32, align 4 - %block_width.addr = alloca i32, align 4 - %bx = alloca i32, align 4 - %tx = alloca i32, align 4 - %b_index_x = alloca i32, align 4 - %b_index_y = alloca i32, align 4 - %index = alloca i32, align 4 - %index_n = alloca i32, align 4 - %index_w = alloca i32, align 4 - %index_nw = alloca i32, align 4 - %ty = alloca i32, align 4 - %m = alloca i32, align 4 - %t_index_x = alloca i32, align 4 - %t_index_y = alloca i32, align 4 - %m90 = alloca i32, align 4 - %t_index_x96 = alloca i32, align 4 - %t_index_y99 = alloca i32, align 4 - %ty134 = alloca i32, align 4 - store i32* %referrence, i32** %referrence.addr, align 8 - store i32* %matrix_cuda, i32** %matrix_cuda.addr, align 8 - store i32 %cols, i32* %cols.addr, align 4 - store i32 %penalty, i32* %penalty.addr, align 4 - store i32 %i, i32* %i.addr, align 4 - store i32 %block_width, i32* %block_width.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %bx, align 4 - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call1, i32* %tx, align 4 - %0 = load i32, i32* %bx, align 4 - store i32 %0, i32* %b_index_x, align 4 - %1 = load i32, i32* %i.addr, align 4 - %sub = sub nsw i32 %1, 1 - %2 = load i32, i32* %bx, align 4 - %sub2 = sub nsw i32 %sub, %2 - store i32 %sub2, i32* %b_index_y, align 4 - %3 = load i32, i32* %cols.addr, align 4 - %mul = mul nsw i32 %3, 16 - %4 = load i32, i32* %b_index_y, align 4 - %mul3 = mul nsw i32 %mul, %4 - %5 = load i32, i32* %b_index_x, align 4 - %mul4 = mul nsw i32 16, %5 - %add = add nsw i32 %mul3, %mul4 - %6 = load i32, i32* %tx, align 4 - %add5 = add nsw i32 %add, %6 - %7 = load i32, i32* %cols.addr, align 4 - %add6 = add nsw i32 %7, 1 - %add7 = add nsw i32 %add5, %add6 - store i32 %add7, i32* %index, align 4 - %8 = load i32, i32* %cols.addr, align 4 - %mul8 = mul nsw i32 %8, 16 - %9 = load i32, i32* %b_index_y, align 4 - %mul9 = mul nsw i32 %mul8, %9 - %10 = load i32, i32* %b_index_x, align 4 - %mul10 = mul nsw i32 16, %10 - %add11 = add nsw i32 %mul9, %mul10 - %11 = load i32, i32* %tx, align 4 - %add12 = add nsw i32 %add11, %11 - %add13 = add nsw i32 %add12, 1 - store i32 %add13, i32* %index_n, align 4 - %12 = load i32, i32* %cols.addr, align 4 - %mul14 = mul nsw i32 %12, 16 - %13 = load i32, i32* %b_index_y, align 4 - %mul15 = mul nsw i32 %mul14, %13 - %14 = load i32, i32* %b_index_x, align 4 - %mul16 = mul nsw i32 16, %14 - %add17 = add nsw i32 %mul15, %mul16 - %15 = load i32, i32* %cols.addr, align 4 - %add18 = add nsw i32 %add17, %15 - store i32 %add18, i32* %index_w, align 4 - %16 = load i32, i32* %cols.addr, align 4 - %mul19 = mul nsw i32 %16, 16 - %17 = load i32, i32* %b_index_y, align 4 - %mul20 = mul nsw i32 %mul19, %17 - %18 = load i32, i32* %b_index_x, align 4 - %mul21 = mul nsw i32 16, %18 - %add22 = add nsw i32 %mul20, %mul21 - store i32 %add22, i32* %index_nw, align 4 - %19 = load i32, i32* %tx, align 4 - %cmp = icmp eq i32 %19, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %20 = load i32*, i32** %matrix_cuda.addr, align 8 - %21 = load i32, i32* %index_nw, align 4 - %idxprom = sext i32 %21 to i64 - %arrayidx = getelementptr inbounds i32, i32* %20, i64 %idxprom - %22 = load i32, i32* %arrayidx, align 4 - %23 = load i32, i32* %tx, align 4 - %idxprom23 = sext i32 %23 to i64 - %arrayidx24 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom23 - %arrayidx25 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx24, i64 0, i64 0 - store i32 %22, i32* %arrayidx25, align 4 - br label %if.end - -if.end: ; preds = %if.then, %entry - store i32 0, i32* %ty, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end - %24 = load i32, i32* %ty, align 4 - %cmp26 = icmp slt i32 %24, 16 - br i1 %cmp26, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %25 = load i32*, i32** %referrence.addr, align 8 - %26 = load i32, i32* %index, align 4 - %27 = load i32, i32* %cols.addr, align 4 - %28 = load i32, i32* %ty, align 4 - %mul27 = mul nsw i32 %27, %28 - %add28 = add nsw i32 %26, %mul27 - %idxprom29 = sext i32 %add28 to i64 - %arrayidx30 = getelementptr inbounds i32, i32* %25, i64 %idxprom29 - %29 = load i32, i32* %arrayidx30, align 4 - %30 = load i32, i32* %ty, align 4 - %idxprom31 = sext i32 %30 to i64 - %arrayidx32 = getelementptr inbounds [16 x [16 x i32]], [16 x [16 x i32]]* addrspacecast ([16 x [16 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE3ref to [16 x [16 x i32]]*), i64 0, i64 %idxprom31 - %31 = load i32, i32* %tx, align 4 - %idxprom33 = sext i32 %31 to i64 - %arrayidx34 = getelementptr inbounds [16 x i32], [16 x i32]* %arrayidx32, i64 0, i64 %idxprom33 - store i32 %29, i32* %arrayidx34, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %32 = load i32, i32* %ty, align 4 - %inc = add nsw i32 %32, 1 - store i32 %inc, i32* %ty, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - call void @llvm.nvvm.barrier0() - %33 = load i32*, i32** %matrix_cuda.addr, align 8 - %34 = load i32, i32* %index_w, align 4 - %35 = load i32, i32* %cols.addr, align 4 - %36 = load i32, i32* %tx, align 4 - %mul35 = mul nsw i32 %35, %36 - %add36 = add nsw i32 %34, %mul35 - %idxprom37 = sext i32 %add36 to i64 - %arrayidx38 = getelementptr inbounds i32, i32* %33, i64 %idxprom37 - %37 = load i32, i32* %arrayidx38, align 4 - %38 = load i32, i32* %tx, align 4 - %add39 = add nsw i32 %38, 1 - %idxprom40 = sext i32 %add39 to i64 - %arrayidx41 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom40 - %arrayidx42 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx41, i64 0, i64 0 - store i32 %37, i32* %arrayidx42, align 4 - call void @llvm.nvvm.barrier0() - %39 = load i32*, i32** %matrix_cuda.addr, align 8 - %40 = load i32, i32* %index_n, align 4 - %idxprom43 = sext i32 %40 to i64 - %arrayidx44 = getelementptr inbounds i32, i32* %39, i64 %idxprom43 - %41 = load i32, i32* %arrayidx44, align 4 - %42 = load i32, i32* %tx, align 4 - %add45 = add nsw i32 %42, 1 - %idxprom46 = sext i32 %add45 to i64 - %arrayidx47 = getelementptr inbounds [17 x i32], [17 x i32]* getelementptr inbounds ([17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 0), i64 0, i64 %idxprom46 - store i32 %41, i32* %arrayidx47, align 4 - call void @llvm.nvvm.barrier0() - store i32 0, i32* %m, align 4 - br label %for.cond48 - -for.cond48: ; preds = %for.inc87, %for.end - %43 = load i32, i32* %m, align 4 - %cmp49 = icmp slt i32 %43, 16 - br i1 %cmp49, label %for.body50, label %for.end89 - -for.body50: ; preds = %for.cond48 - %44 = load i32, i32* %tx, align 4 - %45 = load i32, i32* %m, align 4 - %cmp51 = icmp sle i32 %44, %45 - br i1 %cmp51, label %if.then52, label %if.end86 - -if.then52: ; preds = %for.body50 - %46 = load i32, i32* %tx, align 4 - %add53 = add nsw i32 %46, 1 - store i32 %add53, i32* %t_index_x, align 4 - %47 = load i32, i32* %m, align 4 - %48 = load i32, i32* %tx, align 4 - %sub54 = sub nsw i32 %47, %48 - %add55 = add nsw i32 %sub54, 1 - store i32 %add55, i32* %t_index_y, align 4 - %49 = load i32, i32* %t_index_y, align 4 - %sub56 = sub nsw i32 %49, 1 - %idxprom57 = sext i32 %sub56 to i64 - %arrayidx58 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom57 - %50 = load i32, i32* %t_index_x, align 4 - %sub59 = sub nsw i32 %50, 1 - %idxprom60 = sext i32 %sub59 to i64 - %arrayidx61 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx58, i64 0, i64 %idxprom60 - %51 = load i32, i32* %arrayidx61, align 4 - %52 = load i32, i32* %t_index_y, align 4 - %sub62 = sub nsw i32 %52, 1 - %idxprom63 = sext i32 %sub62 to i64 - %arrayidx64 = getelementptr inbounds [16 x [16 x i32]], [16 x [16 x i32]]* addrspacecast ([16 x [16 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE3ref to [16 x [16 x i32]]*), i64 0, i64 %idxprom63 - %53 = load i32, i32* %t_index_x, align 4 - %sub65 = sub nsw i32 %53, 1 - %idxprom66 = sext i32 %sub65 to i64 - %arrayidx67 = getelementptr inbounds [16 x i32], [16 x i32]* %arrayidx64, i64 0, i64 %idxprom66 - %54 = load i32, i32* %arrayidx67, align 4 - %add68 = add nsw i32 %51, %54 - %55 = load i32, i32* %t_index_y, align 4 - %idxprom69 = sext i32 %55 to i64 - %arrayidx70 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom69 - %56 = load i32, i32* %t_index_x, align 4 - %sub71 = sub nsw i32 %56, 1 - %idxprom72 = sext i32 %sub71 to i64 - %arrayidx73 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx70, i64 0, i64 %idxprom72 - %57 = load i32, i32* %arrayidx73, align 4 - %58 = load i32, i32* %penalty.addr, align 4 - %sub74 = sub nsw i32 %57, %58 - %59 = load i32, i32* %t_index_y, align 4 - %sub75 = sub nsw i32 %59, 1 - %idxprom76 = sext i32 %sub75 to i64 - %arrayidx77 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom76 - %60 = load i32, i32* %t_index_x, align 4 - %idxprom78 = sext i32 %60 to i64 - %arrayidx79 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx77, i64 0, i64 %idxprom78 - %61 = load i32, i32* %arrayidx79, align 4 - %62 = load i32, i32* %penalty.addr, align 4 - %sub80 = sub nsw i32 %61, %62 - %call81 = call i32 @_Z14maximum_deviceiii(i32 %add68, i32 %sub74, i32 %sub80) #2 - %63 = load i32, i32* %t_index_y, align 4 - %idxprom82 = sext i32 %63 to i64 - %arrayidx83 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom82 - %64 = load i32, i32* %t_index_x, align 4 - %idxprom84 = sext i32 %64 to i64 - %arrayidx85 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx83, i64 0, i64 %idxprom84 - store i32 %call81, i32* %arrayidx85, align 4 - br label %if.end86 - -if.end86: ; preds = %if.then52, %for.body50 - call void @llvm.nvvm.barrier0() - br label %for.inc87 - -for.inc87: ; preds = %if.end86 - %65 = load i32, i32* %m, align 4 - %inc88 = add nsw i32 %65, 1 - store i32 %inc88, i32* %m, align 4 - br label %for.cond48 - -for.end89: ; preds = %for.cond48 - store i32 14, i32* %m90, align 4 - br label %for.cond91 - -for.cond91: ; preds = %for.inc132, %for.end89 - %66 = load i32, i32* %m90, align 4 - %cmp92 = icmp sge i32 %66, 0 - br i1 %cmp92, label %for.body93, label %for.end133 - -for.body93: ; preds = %for.cond91 - %67 = load i32, i32* %tx, align 4 - %68 = load i32, i32* %m90, align 4 - %cmp94 = icmp sle i32 %67, %68 - br i1 %cmp94, label %if.then95, label %if.end131 - -if.then95: ; preds = %for.body93 - %69 = load i32, i32* %tx, align 4 - %add97 = add nsw i32 %69, 16 - %70 = load i32, i32* %m90, align 4 - %sub98 = sub nsw i32 %add97, %70 - store i32 %sub98, i32* %t_index_x96, align 4 - %71 = load i32, i32* %tx, align 4 - %sub100 = sub nsw i32 16, %71 - store i32 %sub100, i32* %t_index_y99, align 4 - %72 = load i32, i32* %t_index_y99, align 4 - %sub101 = sub nsw i32 %72, 1 - %idxprom102 = sext i32 %sub101 to i64 - %arrayidx103 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom102 - %73 = load i32, i32* %t_index_x96, align 4 - %sub104 = sub nsw i32 %73, 1 - %idxprom105 = sext i32 %sub104 to i64 - %arrayidx106 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx103, i64 0, i64 %idxprom105 - %74 = load i32, i32* %arrayidx106, align 4 - %75 = load i32, i32* %t_index_y99, align 4 - %sub107 = sub nsw i32 %75, 1 - %idxprom108 = sext i32 %sub107 to i64 - %arrayidx109 = getelementptr inbounds [16 x [16 x i32]], [16 x [16 x i32]]* addrspacecast ([16 x [16 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE3ref to [16 x [16 x i32]]*), i64 0, i64 %idxprom108 - %76 = load i32, i32* %t_index_x96, align 4 - %sub110 = sub nsw i32 %76, 1 - %idxprom111 = sext i32 %sub110 to i64 - %arrayidx112 = getelementptr inbounds [16 x i32], [16 x i32]* %arrayidx109, i64 0, i64 %idxprom111 - %77 = load i32, i32* %arrayidx112, align 4 - %add113 = add nsw i32 %74, %77 - %78 = load i32, i32* %t_index_y99, align 4 - %idxprom114 = sext i32 %78 to i64 - %arrayidx115 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom114 - %79 = load i32, i32* %t_index_x96, align 4 - %sub116 = sub nsw i32 %79, 1 - %idxprom117 = sext i32 %sub116 to i64 - %arrayidx118 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx115, i64 0, i64 %idxprom117 - %80 = load i32, i32* %arrayidx118, align 4 - %81 = load i32, i32* %penalty.addr, align 4 - %sub119 = sub nsw i32 %80, %81 - %82 = load i32, i32* %t_index_y99, align 4 - %sub120 = sub nsw i32 %82, 1 - %idxprom121 = sext i32 %sub120 to i64 - %arrayidx122 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom121 - %83 = load i32, i32* %t_index_x96, align 4 - %idxprom123 = sext i32 %83 to i64 - %arrayidx124 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx122, i64 0, i64 %idxprom123 - %84 = load i32, i32* %arrayidx124, align 4 - %85 = load i32, i32* %penalty.addr, align 4 - %sub125 = sub nsw i32 %84, %85 - %call126 = call i32 @_Z14maximum_deviceiii(i32 %add113, i32 %sub119, i32 %sub125) #2 - %86 = load i32, i32* %t_index_y99, align 4 - %idxprom127 = sext i32 %86 to i64 - %arrayidx128 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom127 - %87 = load i32, i32* %t_index_x96, align 4 - %idxprom129 = sext i32 %87 to i64 - %arrayidx130 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx128, i64 0, i64 %idxprom129 - store i32 %call126, i32* %arrayidx130, align 4 - br label %if.end131 - -if.end131: ; preds = %if.then95, %for.body93 - call void @llvm.nvvm.barrier0() - br label %for.inc132 - -for.inc132: ; preds = %if.end131 - %88 = load i32, i32* %m90, align 4 - %dec = add nsw i32 %88, -1 - store i32 %dec, i32* %m90, align 4 - br label %for.cond91 - -for.end133: ; preds = %for.cond91 - store i32 0, i32* %ty134, align 4 - br label %for.cond135 - -for.cond135: ; preds = %for.inc148, %for.end133 - %89 = load i32, i32* %ty134, align 4 - %cmp136 = icmp slt i32 %89, 16 - br i1 %cmp136, label %for.body137, label %for.end150 - -for.body137: ; preds = %for.cond135 - %90 = load i32, i32* %ty134, align 4 - %add138 = add nsw i32 %90, 1 - %idxprom139 = sext i32 %add138 to i64 - %arrayidx140 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_1PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom139 - %91 = load i32, i32* %tx, align 4 - %add141 = add nsw i32 %91, 1 - %idxprom142 = sext i32 %add141 to i64 - %arrayidx143 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx140, i64 0, i64 %idxprom142 - %92 = load i32, i32* %arrayidx143, align 4 - %93 = load i32*, i32** %matrix_cuda.addr, align 8 - %94 = load i32, i32* %index, align 4 - %95 = load i32, i32* %ty134, align 4 - %96 = load i32, i32* %cols.addr, align 4 - %mul144 = mul nsw i32 %95, %96 - %add145 = add nsw i32 %94, %mul144 - %idxprom146 = sext i32 %add145 to i64 - %arrayidx147 = getelementptr inbounds i32, i32* %93, i64 %idxprom146 - store i32 %92, i32* %arrayidx147, align 4 - br label %for.inc148 - -for.inc148: ; preds = %for.body137 - %97 = load i32, i32* %ty134, align 4 - %inc149 = add nsw i32 %97, 1 - store i32 %inc149, i32* %ty134, align 4 - br label %for.cond135 - -for.end150: ; preds = %for.cond135 - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: convergent nounwind -declare void @llvm.nvvm.barrier0() #2 - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z20needle_cuda_shared_2PiS_iiii(i32* %referrence, i32* %matrix_cuda, i32 %cols, i32 %penalty, i32 %i, i32 %block_width) #0 { -entry: - %referrence.addr = alloca i32*, align 8 - %matrix_cuda.addr = alloca i32*, align 8 - %cols.addr = alloca i32, align 4 - %penalty.addr = alloca i32, align 4 - %i.addr = alloca i32, align 4 - %block_width.addr = alloca i32, align 4 - %bx = alloca i32, align 4 - %tx = alloca i32, align 4 - %b_index_x = alloca i32, align 4 - %b_index_y = alloca i32, align 4 - %index = alloca i32, align 4 - %index_n = alloca i32, align 4 - %index_w = alloca i32, align 4 - %index_nw = alloca i32, align 4 - %ty = alloca i32, align 4 - %m = alloca i32, align 4 - %t_index_x = alloca i32, align 4 - %t_index_y = alloca i32, align 4 - %m92 = alloca i32, align 4 - %t_index_x98 = alloca i32, align 4 - %t_index_y101 = alloca i32, align 4 - %ty136 = alloca i32, align 4 - store i32* %referrence, i32** %referrence.addr, align 8 - store i32* %matrix_cuda, i32** %matrix_cuda.addr, align 8 - store i32 %cols, i32* %cols.addr, align 4 - store i32 %penalty, i32* %penalty.addr, align 4 - store i32 %i, i32* %i.addr, align 4 - store i32 %block_width, i32* %block_width.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %bx, align 4 - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call1, i32* %tx, align 4 - %0 = load i32, i32* %bx, align 4 - %1 = load i32, i32* %block_width.addr, align 4 - %add = add nsw i32 %0, %1 - %2 = load i32, i32* %i.addr, align 4 - %sub = sub nsw i32 %add, %2 - store i32 %sub, i32* %b_index_x, align 4 - %3 = load i32, i32* %block_width.addr, align 4 - %4 = load i32, i32* %bx, align 4 - %sub2 = sub nsw i32 %3, %4 - %sub3 = sub nsw i32 %sub2, 1 - store i32 %sub3, i32* %b_index_y, align 4 - %5 = load i32, i32* %cols.addr, align 4 - %mul = mul nsw i32 %5, 16 - %6 = load i32, i32* %b_index_y, align 4 - %mul4 = mul nsw i32 %mul, %6 - %7 = load i32, i32* %b_index_x, align 4 - %mul5 = mul nsw i32 16, %7 - %add6 = add nsw i32 %mul4, %mul5 - %8 = load i32, i32* %tx, align 4 - %add7 = add nsw i32 %add6, %8 - %9 = load i32, i32* %cols.addr, align 4 - %add8 = add nsw i32 %9, 1 - %add9 = add nsw i32 %add7, %add8 - store i32 %add9, i32* %index, align 4 - %10 = load i32, i32* %cols.addr, align 4 - %mul10 = mul nsw i32 %10, 16 - %11 = load i32, i32* %b_index_y, align 4 - %mul11 = mul nsw i32 %mul10, %11 - %12 = load i32, i32* %b_index_x, align 4 - %mul12 = mul nsw i32 16, %12 - %add13 = add nsw i32 %mul11, %mul12 - %13 = load i32, i32* %tx, align 4 - %add14 = add nsw i32 %add13, %13 - %add15 = add nsw i32 %add14, 1 - store i32 %add15, i32* %index_n, align 4 - %14 = load i32, i32* %cols.addr, align 4 - %mul16 = mul nsw i32 %14, 16 - %15 = load i32, i32* %b_index_y, align 4 - %mul17 = mul nsw i32 %mul16, %15 - %16 = load i32, i32* %b_index_x, align 4 - %mul18 = mul nsw i32 16, %16 - %add19 = add nsw i32 %mul17, %mul18 - %17 = load i32, i32* %cols.addr, align 4 - %add20 = add nsw i32 %add19, %17 - store i32 %add20, i32* %index_w, align 4 - %18 = load i32, i32* %cols.addr, align 4 - %mul21 = mul nsw i32 %18, 16 - %19 = load i32, i32* %b_index_y, align 4 - %mul22 = mul nsw i32 %mul21, %19 - %20 = load i32, i32* %b_index_x, align 4 - %mul23 = mul nsw i32 16, %20 - %add24 = add nsw i32 %mul22, %mul23 - store i32 %add24, i32* %index_nw, align 4 - store i32 0, i32* %ty, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %21 = load i32, i32* %ty, align 4 - %cmp = icmp slt i32 %21, 16 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %22 = load i32*, i32** %referrence.addr, align 8 - %23 = load i32, i32* %index, align 4 - %24 = load i32, i32* %cols.addr, align 4 - %25 = load i32, i32* %ty, align 4 - %mul25 = mul nsw i32 %24, %25 - %add26 = add nsw i32 %23, %mul25 - %idxprom = sext i32 %add26 to i64 - %arrayidx = getelementptr inbounds i32, i32* %22, i64 %idxprom - %26 = load i32, i32* %arrayidx, align 4 - %27 = load i32, i32* %ty, align 4 - %idxprom27 = sext i32 %27 to i64 - %arrayidx28 = getelementptr inbounds [16 x [16 x i32]], [16 x [16 x i32]]* addrspacecast ([16 x [16 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE3ref to [16 x [16 x i32]]*), i64 0, i64 %idxprom27 - %28 = load i32, i32* %tx, align 4 - %idxprom29 = sext i32 %28 to i64 - %arrayidx30 = getelementptr inbounds [16 x i32], [16 x i32]* %arrayidx28, i64 0, i64 %idxprom29 - store i32 %26, i32* %arrayidx30, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %29 = load i32, i32* %ty, align 4 - %inc = add nsw i32 %29, 1 - store i32 %inc, i32* %ty, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - call void @llvm.nvvm.barrier0() - %30 = load i32, i32* %tx, align 4 - %cmp31 = icmp eq i32 %30, 0 - br i1 %cmp31, label %if.then, label %if.end - -if.then: ; preds = %for.end - %31 = load i32*, i32** %matrix_cuda.addr, align 8 - %32 = load i32, i32* %index_nw, align 4 - %idxprom32 = sext i32 %32 to i64 - %arrayidx33 = getelementptr inbounds i32, i32* %31, i64 %idxprom32 - %33 = load i32, i32* %arrayidx33, align 4 - %34 = load i32, i32* %tx, align 4 - %idxprom34 = sext i32 %34 to i64 - %arrayidx35 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom34 - %arrayidx36 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx35, i64 0, i64 0 - store i32 %33, i32* %arrayidx36, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.end - %35 = load i32*, i32** %matrix_cuda.addr, align 8 - %36 = load i32, i32* %index_w, align 4 - %37 = load i32, i32* %cols.addr, align 4 - %38 = load i32, i32* %tx, align 4 - %mul37 = mul nsw i32 %37, %38 - %add38 = add nsw i32 %36, %mul37 - %idxprom39 = sext i32 %add38 to i64 - %arrayidx40 = getelementptr inbounds i32, i32* %35, i64 %idxprom39 - %39 = load i32, i32* %arrayidx40, align 4 - %40 = load i32, i32* %tx, align 4 - %add41 = add nsw i32 %40, 1 - %idxprom42 = sext i32 %add41 to i64 - %arrayidx43 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom42 - %arrayidx44 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx43, i64 0, i64 0 - store i32 %39, i32* %arrayidx44, align 4 - call void @llvm.nvvm.barrier0() - %41 = load i32*, i32** %matrix_cuda.addr, align 8 - %42 = load i32, i32* %index_n, align 4 - %idxprom45 = sext i32 %42 to i64 - %arrayidx46 = getelementptr inbounds i32, i32* %41, i64 %idxprom45 - %43 = load i32, i32* %arrayidx46, align 4 - %44 = load i32, i32* %tx, align 4 - %add47 = add nsw i32 %44, 1 - %idxprom48 = sext i32 %add47 to i64 - %arrayidx49 = getelementptr inbounds [17 x i32], [17 x i32]* getelementptr inbounds ([17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 0), i64 0, i64 %idxprom48 - store i32 %43, i32* %arrayidx49, align 4 - call void @llvm.nvvm.barrier0() - store i32 0, i32* %m, align 4 - br label %for.cond50 - -for.cond50: ; preds = %for.inc89, %if.end - %45 = load i32, i32* %m, align 4 - %cmp51 = icmp slt i32 %45, 16 - br i1 %cmp51, label %for.body52, label %for.end91 - -for.body52: ; preds = %for.cond50 - %46 = load i32, i32* %tx, align 4 - %47 = load i32, i32* %m, align 4 - %cmp53 = icmp sle i32 %46, %47 - br i1 %cmp53, label %if.then54, label %if.end88 - -if.then54: ; preds = %for.body52 - %48 = load i32, i32* %tx, align 4 - %add55 = add nsw i32 %48, 1 - store i32 %add55, i32* %t_index_x, align 4 - %49 = load i32, i32* %m, align 4 - %50 = load i32, i32* %tx, align 4 - %sub56 = sub nsw i32 %49, %50 - %add57 = add nsw i32 %sub56, 1 - store i32 %add57, i32* %t_index_y, align 4 - %51 = load i32, i32* %t_index_y, align 4 - %sub58 = sub nsw i32 %51, 1 - %idxprom59 = sext i32 %sub58 to i64 - %arrayidx60 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom59 - %52 = load i32, i32* %t_index_x, align 4 - %sub61 = sub nsw i32 %52, 1 - %idxprom62 = sext i32 %sub61 to i64 - %arrayidx63 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx60, i64 0, i64 %idxprom62 - %53 = load i32, i32* %arrayidx63, align 4 - %54 = load i32, i32* %t_index_y, align 4 - %sub64 = sub nsw i32 %54, 1 - %idxprom65 = sext i32 %sub64 to i64 - %arrayidx66 = getelementptr inbounds [16 x [16 x i32]], [16 x [16 x i32]]* addrspacecast ([16 x [16 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE3ref to [16 x [16 x i32]]*), i64 0, i64 %idxprom65 - %55 = load i32, i32* %t_index_x, align 4 - %sub67 = sub nsw i32 %55, 1 - %idxprom68 = sext i32 %sub67 to i64 - %arrayidx69 = getelementptr inbounds [16 x i32], [16 x i32]* %arrayidx66, i64 0, i64 %idxprom68 - %56 = load i32, i32* %arrayidx69, align 4 - %add70 = add nsw i32 %53, %56 - %57 = load i32, i32* %t_index_y, align 4 - %idxprom71 = sext i32 %57 to i64 - %arrayidx72 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom71 - %58 = load i32, i32* %t_index_x, align 4 - %sub73 = sub nsw i32 %58, 1 - %idxprom74 = sext i32 %sub73 to i64 - %arrayidx75 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx72, i64 0, i64 %idxprom74 - %59 = load i32, i32* %arrayidx75, align 4 - %60 = load i32, i32* %penalty.addr, align 4 - %sub76 = sub nsw i32 %59, %60 - %61 = load i32, i32* %t_index_y, align 4 - %sub77 = sub nsw i32 %61, 1 - %idxprom78 = sext i32 %sub77 to i64 - %arrayidx79 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom78 - %62 = load i32, i32* %t_index_x, align 4 - %idxprom80 = sext i32 %62 to i64 - %arrayidx81 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx79, i64 0, i64 %idxprom80 - %63 = load i32, i32* %arrayidx81, align 4 - %64 = load i32, i32* %penalty.addr, align 4 - %sub82 = sub nsw i32 %63, %64 - %call83 = call i32 @_Z14maximum_deviceiii(i32 %add70, i32 %sub76, i32 %sub82) #2 - %65 = load i32, i32* %t_index_y, align 4 - %idxprom84 = sext i32 %65 to i64 - %arrayidx85 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom84 - %66 = load i32, i32* %t_index_x, align 4 - %idxprom86 = sext i32 %66 to i64 - %arrayidx87 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx85, i64 0, i64 %idxprom86 - store i32 %call83, i32* %arrayidx87, align 4 - br label %if.end88 - -if.end88: ; preds = %if.then54, %for.body52 - call void @llvm.nvvm.barrier0() - br label %for.inc89 - -for.inc89: ; preds = %if.end88 - %67 = load i32, i32* %m, align 4 - %inc90 = add nsw i32 %67, 1 - store i32 %inc90, i32* %m, align 4 - br label %for.cond50 - -for.end91: ; preds = %for.cond50 - store i32 14, i32* %m92, align 4 - br label %for.cond93 - -for.cond93: ; preds = %for.inc134, %for.end91 - %68 = load i32, i32* %m92, align 4 - %cmp94 = icmp sge i32 %68, 0 - br i1 %cmp94, label %for.body95, label %for.end135 - -for.body95: ; preds = %for.cond93 - %69 = load i32, i32* %tx, align 4 - %70 = load i32, i32* %m92, align 4 - %cmp96 = icmp sle i32 %69, %70 - br i1 %cmp96, label %if.then97, label %if.end133 - -if.then97: ; preds = %for.body95 - %71 = load i32, i32* %tx, align 4 - %add99 = add nsw i32 %71, 16 - %72 = load i32, i32* %m92, align 4 - %sub100 = sub nsw i32 %add99, %72 - store i32 %sub100, i32* %t_index_x98, align 4 - %73 = load i32, i32* %tx, align 4 - %sub102 = sub nsw i32 16, %73 - store i32 %sub102, i32* %t_index_y101, align 4 - %74 = load i32, i32* %t_index_y101, align 4 - %sub103 = sub nsw i32 %74, 1 - %idxprom104 = sext i32 %sub103 to i64 - %arrayidx105 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom104 - %75 = load i32, i32* %t_index_x98, align 4 - %sub106 = sub nsw i32 %75, 1 - %idxprom107 = sext i32 %sub106 to i64 - %arrayidx108 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx105, i64 0, i64 %idxprom107 - %76 = load i32, i32* %arrayidx108, align 4 - %77 = load i32, i32* %t_index_y101, align 4 - %sub109 = sub nsw i32 %77, 1 - %idxprom110 = sext i32 %sub109 to i64 - %arrayidx111 = getelementptr inbounds [16 x [16 x i32]], [16 x [16 x i32]]* addrspacecast ([16 x [16 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE3ref to [16 x [16 x i32]]*), i64 0, i64 %idxprom110 - %78 = load i32, i32* %t_index_x98, align 4 - %sub112 = sub nsw i32 %78, 1 - %idxprom113 = sext i32 %sub112 to i64 - %arrayidx114 = getelementptr inbounds [16 x i32], [16 x i32]* %arrayidx111, i64 0, i64 %idxprom113 - %79 = load i32, i32* %arrayidx114, align 4 - %add115 = add nsw i32 %76, %79 - %80 = load i32, i32* %t_index_y101, align 4 - %idxprom116 = sext i32 %80 to i64 - %arrayidx117 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom116 - %81 = load i32, i32* %t_index_x98, align 4 - %sub118 = sub nsw i32 %81, 1 - %idxprom119 = sext i32 %sub118 to i64 - %arrayidx120 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx117, i64 0, i64 %idxprom119 - %82 = load i32, i32* %arrayidx120, align 4 - %83 = load i32, i32* %penalty.addr, align 4 - %sub121 = sub nsw i32 %82, %83 - %84 = load i32, i32* %t_index_y101, align 4 - %sub122 = sub nsw i32 %84, 1 - %idxprom123 = sext i32 %sub122 to i64 - %arrayidx124 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom123 - %85 = load i32, i32* %t_index_x98, align 4 - %idxprom125 = sext i32 %85 to i64 - %arrayidx126 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx124, i64 0, i64 %idxprom125 - %86 = load i32, i32* %arrayidx126, align 4 - %87 = load i32, i32* %penalty.addr, align 4 - %sub127 = sub nsw i32 %86, %87 - %call128 = call i32 @_Z14maximum_deviceiii(i32 %add115, i32 %sub121, i32 %sub127) #2 - %88 = load i32, i32* %t_index_y101, align 4 - %idxprom129 = sext i32 %88 to i64 - %arrayidx130 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom129 - %89 = load i32, i32* %t_index_x98, align 4 - %idxprom131 = sext i32 %89 to i64 - %arrayidx132 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx130, i64 0, i64 %idxprom131 - store i32 %call128, i32* %arrayidx132, align 4 - br label %if.end133 - -if.end133: ; preds = %if.then97, %for.body95 - call void @llvm.nvvm.barrier0() - br label %for.inc134 - -for.inc134: ; preds = %if.end133 - %90 = load i32, i32* %m92, align 4 - %dec = add nsw i32 %90, -1 - store i32 %dec, i32* %m92, align 4 - br label %for.cond93 - -for.end135: ; preds = %for.cond93 - store i32 0, i32* %ty136, align 4 - br label %for.cond137 - -for.cond137: ; preds = %for.inc150, %for.end135 - %91 = load i32, i32* %ty136, align 4 - %cmp138 = icmp slt i32 %91, 16 - br i1 %cmp138, label %for.body139, label %for.end152 - -for.body139: ; preds = %for.cond137 - %92 = load i32, i32* %ty136, align 4 - %add140 = add nsw i32 %92, 1 - %idxprom141 = sext i32 %add140 to i64 - %arrayidx142 = getelementptr inbounds [17 x [17 x i32]], [17 x [17 x i32]]* addrspacecast ([17 x [17 x i32]] addrspace(3)* @_ZZ20needle_cuda_shared_2PiS_iiiiE4temp to [17 x [17 x i32]]*), i64 0, i64 %idxprom141 - %93 = load i32, i32* %tx, align 4 - %add143 = add nsw i32 %93, 1 - %idxprom144 = sext i32 %add143 to i64 - %arrayidx145 = getelementptr inbounds [17 x i32], [17 x i32]* %arrayidx142, i64 0, i64 %idxprom144 - %94 = load i32, i32* %arrayidx145, align 4 - %95 = load i32*, i32** %matrix_cuda.addr, align 8 - %96 = load i32, i32* %index, align 4 - %97 = load i32, i32* %ty136, align 4 - %98 = load i32, i32* %cols.addr, align 4 - %mul146 = mul nsw i32 %97, %98 - %add147 = add nsw i32 %96, %mul146 - %idxprom148 = sext i32 %add147 to i64 - %arrayidx149 = getelementptr inbounds i32, i32* %95, i64 %idxprom148 - store i32 %94, i32* %arrayidx149, align 4 - br label %for.inc150 - -for.inc150: ; preds = %for.body139 - %99 = load i32, i32* %ty136, align 4 - %inc151 = add nsw i32 %99, 1 - store i32 %inc151, i32* %ty136, align 4 - br label %for.cond137 - -for.end152: ; preds = %for.cond137 - ret void -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind } -attributes #3 = { nounwind readnone } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !6, !5, !7, !7, !7, !7, !8, !8, !7} -!llvm.ident = !{!9} -!nvvmir.version = !{!10} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (i32*, i32*, i32, i32, i32, i32)* @_Z20needle_cuda_shared_1PiS_iiii, !"kernel", i32 1} -!4 = !{void (i32*, i32*, i32, i32, i32, i32)* @_Z20needle_cuda_shared_2PiS_iiii, !"kernel", i32 1} -!5 = !{null, !"align", i32 8} -!6 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!7 = !{null, !"align", i32 16} -!8 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!9 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!10 = !{i32 1, i32 4} diff --git a/examples/nw/needle-host-x86_64-unknown-linux-gnu.ll b/examples/nw/needle-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 92bde07..0000000 --- a/examples/nw/needle-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,1218 +0,0 @@ -; ModuleID = 'needle-host-x86_64-unknown-linux-gnu.bc' -source_filename = "needle.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque -%struct.timeval = type { i64, i64 } -%struct.timezone = type { i32, i32 } - -$_ZN4dim3C2Ejjj = comdat any - -@blosum62 = dso_local global [24 x [24 x i32]] [[24 x i32] [i32 4, i32 -1, i32 -2, i32 -2, i32 0, i32 -1, i32 -1, i32 0, i32 -2, i32 -1, i32 -1, i32 -1, i32 -1, i32 -2, i32 -1, i32 1, i32 0, i32 -3, i32 -2, i32 0, i32 -2, i32 -1, i32 0, i32 -4], [24 x i32] [i32 -1, i32 5, i32 0, i32 -2, i32 -3, i32 1, i32 0, i32 -2, i32 0, i32 -3, i32 -2, i32 2, i32 -1, i32 -3, i32 -2, i32 -1, i32 -1, i32 -3, i32 -2, i32 -3, i32 -1, i32 0, i32 -1, i32 -4], [24 x i32] [i32 -2, i32 0, i32 6, i32 1, i32 -3, i32 0, i32 0, i32 0, i32 1, i32 -3, i32 -3, i32 0, i32 -2, i32 -3, i32 -2, i32 1, i32 0, i32 -4, i32 -2, i32 -3, i32 3, i32 0, i32 -1, i32 -4], [24 x i32] [i32 -2, i32 -2, i32 1, i32 6, i32 -3, i32 0, i32 2, i32 -1, i32 -1, i32 -3, i32 -4, i32 -1, i32 -3, i32 -3, i32 -1, i32 0, i32 -1, i32 -4, i32 -3, i32 -3, i32 4, i32 1, i32 -1, i32 -4], [24 x i32] [i32 0, i32 -3, i32 -3, i32 -3, i32 9, i32 -3, i32 -4, i32 -3, i32 -3, i32 -1, i32 -1, i32 -3, i32 -1, i32 -2, i32 -3, i32 -1, i32 -1, i32 -2, i32 -2, i32 -1, i32 -3, i32 -3, i32 -2, i32 -4], [24 x i32] [i32 -1, i32 1, i32 0, i32 0, i32 -3, i32 5, i32 2, i32 -2, i32 0, i32 -3, i32 -2, i32 1, i32 0, i32 -3, i32 -1, i32 0, i32 -1, i32 -2, i32 -1, i32 -2, i32 0, i32 3, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 0, i32 0, i32 2, i32 -4, i32 2, i32 5, i32 -2, i32 0, i32 -3, i32 -3, i32 1, i32 -2, i32 -3, i32 -1, i32 0, i32 -1, i32 -3, i32 -2, i32 -2, i32 1, i32 4, i32 -1, i32 -4], [24 x i32] [i32 0, i32 -2, i32 0, i32 -1, i32 -3, i32 -2, i32 -2, i32 6, i32 -2, i32 -4, i32 -4, i32 -2, i32 -3, i32 -3, i32 -2, i32 0, i32 -2, i32 -2, i32 -3, i32 -3, i32 -1, i32 -2, i32 -1, i32 -4], [24 x i32] [i32 -2, i32 0, i32 1, i32 -1, i32 -3, i32 0, i32 0, i32 -2, i32 8, i32 -3, i32 -3, i32 -1, i32 -2, i32 -1, i32 -2, i32 -1, i32 -2, i32 -2, i32 2, i32 -3, i32 0, i32 0, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 -3, i32 -3, i32 -3, i32 -1, i32 -3, i32 -3, i32 -4, i32 -3, i32 4, i32 2, i32 -3, i32 1, i32 0, i32 -3, i32 -2, i32 -1, i32 -3, i32 -1, i32 3, i32 -3, i32 -3, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 -2, i32 -3, i32 -4, i32 -1, i32 -2, i32 -3, i32 -4, i32 -3, i32 2, i32 4, i32 -2, i32 2, i32 0, i32 -3, i32 -2, i32 -1, i32 -2, i32 -1, i32 1, i32 -4, i32 -3, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 2, i32 0, i32 -1, i32 -3, i32 1, i32 1, i32 -2, i32 -1, i32 -3, i32 -2, i32 5, i32 -1, i32 -3, i32 -1, i32 0, i32 -1, i32 -3, i32 -2, i32 -2, i32 0, i32 1, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 -1, i32 -2, i32 -3, i32 -1, i32 0, i32 -2, i32 -3, i32 -2, i32 1, i32 2, i32 -1, i32 5, i32 0, i32 -2, i32 -1, i32 -1, i32 -1, i32 -1, i32 1, i32 -3, i32 -1, i32 -1, i32 -4], [24 x i32] [i32 -2, i32 -3, i32 -3, i32 -3, i32 -2, i32 -3, i32 -3, i32 -3, i32 -1, i32 0, i32 0, i32 -3, i32 0, i32 6, i32 -4, i32 -2, i32 -2, i32 1, i32 3, i32 -1, i32 -3, i32 -3, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 -2, i32 -2, i32 -1, i32 -3, i32 -1, i32 -1, i32 -2, i32 -2, i32 -3, i32 -3, i32 -1, i32 -2, i32 -4, i32 7, i32 -1, i32 -1, i32 -4, i32 -3, i32 -2, i32 -2, i32 -1, i32 -2, i32 -4], [24 x i32] [i32 1, i32 -1, i32 1, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 -1, i32 -2, i32 -2, i32 0, i32 -1, i32 -2, i32 -1, i32 4, i32 1, i32 -3, i32 -2, i32 -2, i32 0, i32 0, i32 0, i32 -4], [24 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 -1, i32 -1, i32 -1, i32 -2, i32 -2, i32 -1, i32 -1, i32 -1, i32 -1, i32 -2, i32 -1, i32 1, i32 5, i32 -2, i32 -2, i32 0, i32 -1, i32 -1, i32 0, i32 -4], [24 x i32] [i32 -3, i32 -3, i32 -4, i32 -4, i32 -2, i32 -2, i32 -3, i32 -2, i32 -2, i32 -3, i32 -2, i32 -3, i32 -1, i32 1, i32 -4, i32 -3, i32 -2, i32 11, i32 2, i32 -3, i32 -4, i32 -3, i32 -2, i32 -4], [24 x i32] [i32 -2, i32 -2, i32 -2, i32 -3, i32 -2, i32 -1, i32 -2, i32 -3, i32 2, i32 -1, i32 -1, i32 -2, i32 -1, i32 3, i32 -3, i32 -2, i32 -2, i32 2, i32 7, i32 -1, i32 -3, i32 -2, i32 -1, i32 -4], [24 x i32] [i32 0, i32 -3, i32 -3, i32 -3, i32 -1, i32 -2, i32 -2, i32 -3, i32 -3, i32 3, i32 1, i32 -2, i32 1, i32 -1, i32 -2, i32 -2, i32 0, i32 -3, i32 -1, i32 4, i32 -3, i32 -2, i32 -1, i32 -4], [24 x i32] [i32 -2, i32 -1, i32 3, i32 4, i32 -3, i32 0, i32 1, i32 -1, i32 0, i32 -3, i32 -4, i32 0, i32 -3, i32 -3, i32 -2, i32 0, i32 -1, i32 -4, i32 -3, i32 -3, i32 4, i32 1, i32 -1, i32 -4], [24 x i32] [i32 -1, i32 0, i32 0, i32 1, i32 -3, i32 3, i32 4, i32 -2, i32 0, i32 -3, i32 -3, i32 1, i32 -1, i32 -3, i32 -1, i32 0, i32 -1, i32 -3, i32 -2, i32 -2, i32 1, i32 4, i32 -1, i32 -4], [24 x i32] [i32 0, i32 -1, i32 -1, i32 -1, i32 -2, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -2, i32 0, i32 0, i32 -2, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -4], [24 x i32] [i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 -4, i32 1]], align 16 -@.str = private unnamed_addr constant [25 x i8] c"WG size of kernel = %d \0A\00", align 1 -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str.1 = private unnamed_addr constant [42 x i8] c"Usage: %s \0A\00", align 1 -@.str.2 = private unnamed_addr constant [36 x i8] c"\09 - x and y dimensions\0A\00", align 1 -@.str.3 = private unnamed_addr constant [40 x i8] c"\09 - penalty(positive integer)\0A\00", align 1 -@.str.4 = private unnamed_addr constant [47 x i8] c"The dimension values must be a multiple of 16\0A\00", align 1 -@.str.5 = private unnamed_addr constant [31 x i8] c"error: can not allocate memory\00", align 1 -@.str.6 = private unnamed_addr constant [24 x i8] c"Start Needleman-Wunsch\0A\00", align 1 -@.str.7 = private unnamed_addr constant [28 x i8] c"Processing top-left matrix\0A\00", align 1 -@.str.8 = private unnamed_addr constant [32 x i8] c"Processing bottom-right matrix\0A\00", align 1 -@.str.9 = private unnamed_addr constant [11 x i8] c"result.txt\00", align 1 -@.str.10 = private unnamed_addr constant [2 x i8] c"w\00", align 1 -@.str.11 = private unnamed_addr constant [28 x i8] c"print traceback value GPU:\0A\00", align 1 -@.str.12 = private unnamed_addr constant [4 x i8] c"%d \00", align 1 -@0 = private unnamed_addr constant [33 x i8] c"_Z20needle_cuda_shared_1PiS_iiii\00", align 1 -@1 = private unnamed_addr constant [33 x i8] c"_Z20needle_cuda_shared_2PiS_iiii\00", align 1 -@2 = private constant [48849 x i8] c"P\EDU\BA\01\00\10\00\C0\BE\00\00\00\00\00\00\02\00\01\01@\00\00\00\88\A8\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\E0\A7\00\00\00\00\00\00`\A4\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0E\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z20needle_cuda_shared_2PiS_iiii\00.nv.info._Z20needle_cuda_shared_2PiS_iiii\00.nv.shared._Z20needle_cuda_shared_2PiS_iiii\00.nv.global\00.nv.constant0._Z20needle_cuda_shared_2PiS_iiii\00.text._Z20needle_cuda_shared_1PiS_iiii\00.nv.info._Z20needle_cuda_shared_1PiS_iiii\00.nv.shared._Z20needle_cuda_shared_1PiS_iiii\00.nv.constant0._Z20needle_cuda_shared_1PiS_iiii\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z20needle_cuda_shared_2PiS_iiii\00.text._Z20needle_cuda_shared_2PiS_iiii\00.nv.info._Z20needle_cuda_shared_2PiS_iiii\00.nv.shared._Z20needle_cuda_shared_2PiS_iiii\00.nv.global\00blockIdx\00threadIdx\00$_Z20needle_cuda_shared_2PiS_iiii$_Z14maximum_deviceiii\00$___ZZ20needle_cuda_shared_2PiS_iiiiE4temp__635\00$___ZZ20needle_cuda_shared_2PiS_iiiiE3ref__637\00.nv.constant0._Z20needle_cuda_shared_2PiS_iiii\00_param\00_Z20needle_cuda_shared_1PiS_iiii\00.text._Z20needle_cuda_shared_1PiS_iiii\00.nv.info._Z20needle_cuda_shared_1PiS_iiii\00.nv.shared._Z20needle_cuda_shared_1PiS_iiii\00$_Z20needle_cuda_shared_1PiS_iiii$_Z14maximum_deviceiii\00$___ZZ20needle_cuda_shared_1PiS_iiiiE4temp__240\00$___ZZ20needle_cuda_shared_1PiS_iiiiE3ref__242\00.nv.constant0._Z20needle_cuda_shared_1PiS_iiii\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00S\00\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A4\00\00\00\03\00\0B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\D0\00\00\00\03\00\0C\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\DB\00\00\00\01\00\0C\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\E4\00\00\00\01\00\0C\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\85\01\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\DC\01\00\00\03\00\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00-\02\00\00\03\00\0D\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\F0\02\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\09\00\00\00\00\00\00\00\00\00@M\00\00\00\00\00\00\EE\00\00\00\12\02\09\00\B0E\00\00\00\00\00\00\90\07\00\00\00\00\00\00\BB\01\00\00\12\10\0A\00\00\00\00\00\00\00\00\00@L\00\00\00\00\00\00Y\02\00\00\12\02\0A\00\D8D\00\00\00\00\00\00h\07\00\00\00\00\00\00\04/\08\00\0C\00\00\00\13\00\00\00\04#\08\00\0D\00\00\00\00\00\00\00\04\12\08\00\0D\00\00\00\00\00\00\00\04\11\08\00\0D\00\00\00\00\00\00\00\04#\08\00\0C\00\00\00\00\00\00\00\04\12\08\00\0C\00\00\00x\00\00\00\04\11\08\00\0C\00\00\00x\00\00\00\04/\08\00\0A\00\00\00\13\00\00\00\04#\08\00\0B\00\00\00\00\00\00\00\04\12\08\00\0B\00\00\00\00\00\00\00\04\11\08\00\0B\00\00\00\00\00\00\00\04#\08\00\0A\00\00\00\00\00\00\00\04\12\08\00\0A\00\00\00x\00\00\00\04\11\08\00\0A\00\00\00x\00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\01 \00\03\19 \00\04\17\0C\00\00\00\00\00\05\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\14\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\88\04\00\00\04\1C\04\00\A8E\00\00\04\1E\04\00`\00\00\00\010\00\00\01*\00\00\04\0A\08\00\09\00\00\00@\01 \00\03\19 \00\04\17\0C\00\00\00\00\00\05\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\14\00\00\F0\11\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0\11\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\88\04\00\00\04\1C\04\00\D0D\00\00\04\1E\04\00`\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBZ\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\7Fvisible\D9\04\0F\D1_Z14maximum_dA\09Iiii(\88\03\0F#\00\02\0E\AB\0C\0F+\00\0D\1F1+\00\17\0F\A9\0C\01\1E4\BD\03?6[2\BD\03\16xpred %p\06\08\02\CF\03\1F1\E6\0E\0D\1F6\BE\03\18\00i\03\0F\D9\00\0A\1E]\F1\03\0F3\00\0C\1F1{\03\00\0F3\00\0C*0]W\02\02m\02\0F\A5\0C\02\1F2\A5\0C\02\113s\00\02m\00$4,E\00\07\15\00$5,E\00\B0;\0Asetp.gt.s\1A\002p1,4\00\F2\0E%r5;\0A@%p1 bra LBB6_2;\0Abra.uni\10\0021;\0A\08\00\16:Y\00\187Y\00\0EG\03\1B7C\00\133C\00\172C\00\176\B1\00\0FC\00\00\1F6C\00\04\173C\00%8,3\00\07\F5\00%9,%\01\0C\F6\00\222,6\001%r9\F6\00\162\F6\00\1B5p\00\134p\00\174p\00)11[\00\07\B5\00\03\80\0C\1B1E\00\136E\00\185E\00\180\B6\00\0FE\00\00\1F0E\00\04\186E\00$2,4\00\0F\18\0D\11\1F2t\04\02\F0\02entry _Z20needle_~\05\E9_shared_1PiS_ie\04\00T\03\0F.\00\0D\0Ep\04\0F6\00\18\07{\04\00\E7\00\0F6\00\14\1F26\00\22\1F36\00\22\1F46\00\22\1F5\E5\08\13O7[96(\05\1D\1E8(\05\1D6\F9\08 10'\002\0A\09.\A0\00\0B\AB\05\1FZ\C5\00\0C\CFE4temp[1156]F\00(\803ref[102\F0\03\0F\C7\05\08\1F7\C7\05\18\00\F4\04\0F\83\01\15\1E]\05\06\0F>\00\17\1F4\10\06\00\0F>\00\17\0F\96\09\01\0F>\00\17\0F\80\09\01\0F}\00\18\0Fj\09\01\0F~\00\18#0]\90\01#tob\19\07\AD\0A\02G\09\01 \14\0A\1C\00\1446\09\0F;\00\03\145\98\09\0F;\00\00\116\1C\00\1F5\CA\09\02\1F6\CA\09\02\1F4\CA\09\09\04\16\00/20H\07\01\03\E0\09\1B3\16\00\02Y\00\184\DF\09\00\B3\00\7Fctaid.x\0C\0A\02\185-\00\00\C4\00\1Ft+\00\00\05\DA\06\0DH\07\1C3Q\06$40I\07\0D\EE\06/24\EE\06\01\01B\00Unot.bf\06\01\E6\06Uadd.s\BE\06\04\19\00\1B8n\00\154\C0\06\0Ec\06\0A^\07513,4\00\84;\0Amul.loa\00#4,8\00\00'\00T;\0Ashl\8E\00#5, \00\184K\00\05\03\08)0].\00#7,\1E\00\194\C0\00&8,K\00\08\19\01\151\04\01(6]1\00620,7\00\09\0B\01&21\AA\00)20\1A\00#2, \00\0BA\02\134\89\09\172y\00/23#\01\02/24#\01\06325,8\00\00'\00\08\F5\00326, \00\08#\01/27#\01\03328,\1E\00\09#\01629,K\00\178H\00/30#\01\03531,7\00\193\09\01\113\09\01\1E3\09\01\135|\0A\08\09\01\1F3\09\01\03\1F3\09\01\07335,8\00\00'\00\08\DB\00\113\BE\00\1B3\09\01\1F3\09\01\04\113\09\01\1C3\09\01639,K\00\188\1A\00640, \00\0B\F9\03\125\A1\03(40X\0B\1F1\DB\00\02/42\DB\00\06343,8\00\00'\00\08\AD\00344, \00\08\DB\00/45\DB\00\03346,\1E\00\09\DB\00647,K\00\1A6\C1\00\1266\04\184\1E\03(48\FB\01\01\10\0B)ne\06\0CJ8, 0\05\0C\1B7\05\0C\137\05\0C\147\05\0C\02\AC\05\08\06\0C\14l\A0\005d8,\8D\00\03\CF\00\02\87\08#9,\1E\00\132\CF\00\03\8D\06#0,L\00\00$\00\08\B9\00\01\D7\03\00!\00\09_\00(11\D0\00\04z\01\03L\00$2,#\00'68g\08 rd\B7\04\0F\EA\08\14\03\97\06\02\1D\00\05@\00\02\D3\06*13\BC\00(5,\1D\00\08^\06\00\1D\00\02\8D\01\1B9K\01\132K\01(2:`\06\110\85\01\09\CD\01\02\95\05+50@\00\133@\00\08\0A\0D551,5\00\0F\F5\0C\02\00!\00*15\F5\0C;7_6[\00\134[\00\184\E6\01'96l\0C\07\1F\06%8,\1A\05\09\18\00\1F98\06\04\180\A3\00\0A9\06\02\87\04\121\88\04\1A1\89\04\121\8A\04\121\F3\04\22137\08\00\82\01\04\12\02\1290\06*32o\02$8,\1D\00\0Aq\02499,\D7\00\01'\00\09\A5\00\123t\02=99]g\005100\9F\00\08Z\02?01,\FF\0A\14\0FZ\02\03\130\B5\02)01\C0\00E103,w\00\1B63\03*4,;\00\193\22\03)05\F3\03\08T\00%6,\22\00\0C\87\03*7,\\\00\1A6\CE\02\2207$\08+335\02\1355\02\09\E5\0E(34\EE\01\08\CE\01$5, \00\1F1\F0\02\02?135\F1\02\04\B06:\0Abar.sync,\03\06\A2\02*16\89\04\03\14\03\04\A3\05\195\89\02\1F5\AC\06\03/54W\04\01\02\1B\00#5,8\00\00'\00\08\CE\00356,i\00\00&\00\0E2\02\01S\01*56\84\01$8,\1C\00\0B\82\01$9,\CD\00\01'\00\08\9D\00\137\0B\05)9]\7F\00#8,\BA\00\0E\15\03\02\FD\08\1A5\06\05/21\06\05+\02\1E\09\04\22d2\B1\05\182!\07\135h\06\192>\02\1F62\09\04361,\1E\00\0Ft\01\00\01\C2\01\1A6\C8\03\00\F8\04\03\1C\00\0A\93\00(31\1A\01)30\1A\01\2231\1A\01\1A9\1A\01\07d\0C\1E21\06\02\A3\0A\1C6\BC\07\137@\03\187~\13\04\06\03\1E6\1C\14#3,!\00\031\06\1631\06\1C1\\\00\138\\\00\198\E2\12\09L\03\07\18\00\1F5u\00\07$7,:\00\01)\00\01y\00\177y\00\0C\06\07\139y\00\199y\00\1F6\B5\01\03\03\DD\04\01 \00\0Bv\04#72v\13\09R\0C(08\AA\00\07\C2\00\09k\0C5sub\\\0D$0,8\00\01'\00\0Ay\0D$1,$\00\0C~\00\03\AD\0E\0Az\0D\05{\0D\1B7\C7\00\00\C6\08\03 \00\1E-\7F\02\127\84\00)13\F4\03\1F7\F4\03,\127\A0\02\1D7\F4\03\127\F4\03\1E7\F4\03874,:\00\193\DC\00%4,y\01\0A\DC\00$5, \00\0F\DC\00\01\03\1E\00\195\\\03\127,\05\1D7\CE\06877,\89\00\196\82\00\126\F0\03:77])\01\1F8\C8\07*\127\BF\05)78\A2\00\138\DE\0A\0D\C4\07781,6\00*80\1D\00(2,$\00\0A\BF\00\03\0C\06\1C8@\01$8,\DE\00\01&\00\0DC\01\148=\02\0C\C9\01484, \00\0B\C9\01(85\C9\01*84\1D\00(6,$\00\0A\A5\00\03T\05\1A8\01\04\00 \06\04\8F\11\09?\03\03\91\0F\02\03\07>120\BD\00\137\1E\02\0A^\01\02\D6\05\1D8L\07(89\00\02\198\E4\09#22\9C\00\1B9\84\00$3,\1F\00\03\84\00F{ \0A\09]\15\00\22\03\03\16\13Ireg;&\1B\01\0B\00\1C0\8B\17\02\16\00\05\1D\18\1F84\00\00\1F14\00\02\151\B9\17\1F14\00\00\1F24\00\02\1524\00\1A34\00\03\0C\18a;\0AcallA\05\14(W\1C?, \0A\A1\1A\02R, \0A(\0A\BE\00\22, \09\00\141\09\0072\0A)R\14\03\B2\11\01\F3\10\06u\18f;\0A} \0A\09\B0\07(90\D1\04\0A\B4\0D\129\E5\02\1E9G\04(92~\02(91\02\08(93G\04\08\F7\0B$4, \00\0B\F7\0B'5,U\00)94o\07#95<\01\1C4\F8\0E\140\F9\0E\1A0~\0A\0A&\00\05\1F\0F\191\A5\19\186F\06\07\AC\03\02\A7\08\01 \00\1F1\C4\07\02?127\C5\07\04\1917\0E\106\FC\09\0B\01\15\128\08\02\1D6\CB\00\04H\0B\181:\0E\156b\09\140\94\07\14l\94\07#4,!\00\02\1F\10\174\8F\07\1C8]\00\04;\0E\1811\1B/79\09\08\02-80t\00\05\08\08#6,8\00\00'\00\01w\00\176w\00\0C\B1\0E$15w\00\08~\0C/81w\00\03\192w\00\05~\04\02\19\05\02m\05(82\A9\01\02\1A\05\00 \00+16e\01\02\BA\0A(84`\00\09w\0D\06\A5\01#86@\00\06r\00#7,\18\00\006\00\0BZ\00\02\02\02\198\FB\15\04\8F\08\1A8I\02\02\E7\04\00\1E\00\0F3\07\00\02#\12)89\E5\06/45\0E\08+\02\95\12\11\02\22\11\1F8>\11\01\02\E8\15\1A6\B0\06\1F3\B8\17,\133\8B\1E\1C3\80\02\02\88\14\01u\00\0B\80\02\026\1A\05:\00\185\AF\1D\0F\15\12\04\00d\0F\02\1E\00\0F\D7\00\00\01\DA\04\1B7\15\12\02\AF\02\1D3\AF\02\02\99\1A\05\84\00\08\B9\0C\137\AF\06839]#\13/40\FD\14\01/73\9E\17\02(74\FD\14\09\E6\14$75\9B\01\00'\00\09\DE\00#6,R\00\00&\00\0E\90\07\02\FD\00\0A\E6\14\00 \1B\03\1C\00\0A\9C\05\02\1E\1B#d4\B0\07\0A3\19\2243\F6\1E\0De\02\04`\0B\192\85&\1F7e\02\03\00\C9\0F\02\1E\00\1F1\22\03\02/78\22\03\05?22:)%\1E\1F2)%\1A\1F2)%\22\1F2)%\22\1F2)%\22\1F2)%\22\1F2)%\22\1F2)%#\1F8)%2/40)%3\1F2)%2\1F2)%\1D\1F8)%3\1F2)%*\1F2)%*\1F2)%*\1F2)%+\1F2)%+\1F2)%\FFi\0E\13%\0A\17\0F\01D\07\137\E8$\0Ey++24\BC\17\14,4\00.10\86%\0F\18%\06.28\18%\0F\89%\00\144\0F%\07\09\0A'5,/%\0F\8B%\03\1E5)%\0DS\1F\1F7\8B%\07\148C%\02,%\07]%\149\15%\09h$\1F0h$\04\02 $\1C0h$%2,K\00.21\12%\0E\8B%&4,]%\09\0C\01&25\AA\00\1A2e\00\05\12%\0F\8B%\04\1E6)%\0D\8B%\1F8\8B%\07\149C%\02,%\07T$\140\15%\09\82$\1F1\82$\04\05)%\0A\82$\02\DB$\1E0,%\1F4\8B%\04\0B\12%\07\1A\00\05\12%\0F\8B%\04\1E6)%\0D\8B%\0F\09\01\07$39C%\02,%\06\09\01\06,%\1E4\12%\0E\B0$\142\F8$\0A\B0$%3,K\00)42\1A\00\05\0F%\00\7F\00\0F\8B%\03\0E)%\0E\8B%\1F6\8B%\07\147C%\03,%\06\AD\00#8, \00\1D4s$\0E\DB\00\02^\12+49\DB\00\02\\\12$48\A7#\0F\8B%\00\195\CA -#include -#include -#include -#include -#include - -// includes, kernels -#include "needle_kernel.cu" - -#ifdef TIMING -#include "timing.h" - -struct timeval tv; -struct timeval tv_total_start, tv_total_end; -struct timeval tv_h2d_start, tv_h2d_end; -struct timeval tv_d2h_start, tv_d2h_end; -struct timeval tv_kernel_start, tv_kernel_end; -struct timeval tv_mem_alloc_start, tv_mem_alloc_end; -struct timeval tv_close_start, tv_close_end; -float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, - d2h_time = 0, close_time = 0, total_time = 0; -#endif - -//////////////////////////////////////////////////////////////////////////////// -// declaration, forward -void runTest(int argc, char **argv); - -int maximum(int a, int b, int c) { - - int k; - if (a <= b) - k = b; - else - k = a; - - if (k <= c) - return (c); - else - return (k); -} - -int blosum62[24][24] = {{4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, - -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4}, - {-1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, - -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4}, - {-2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, - -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4}, - {-2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, - -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4}, - {0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, - -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4}, - {-1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, - 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4}, - {-1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, - -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, - {0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, - -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4}, - {-2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, - -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4}, - {-1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, - 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4}, - {-1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, - 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4}, - {-1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, - -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4}, - {-1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, - 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4}, - {-2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, - 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4}, - {-1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, - -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4}, - {1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, - -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4}, - {0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, - -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4}, - {-3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, - -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4}, - {-2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, - -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4}, - {0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, - 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4}, - {-2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, - -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4}, - {-1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, - -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4}, - {0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4}, - {-4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, - -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1}}; - -double gettime() { - struct timeval t; - gettimeofday(&t, NULL); - return t.tv_sec + t.tv_usec * 1e-6; -} - -//////////////////////////////////////////////////////////////////////////////// -// Program main -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { - cudaSetDevice(0); - printf("WG size of kernel = %d \n", BLOCK_SIZE); - - runTest(argc, argv); - - return EXIT_SUCCESS; -} - -void usage(int argc, char **argv) { - fprintf(stderr, "Usage: %s \n", argv[0]); - fprintf(stderr, "\t - x and y dimensions\n"); - fprintf(stderr, "\t - penalty(positive integer)\n"); - exit(1); -} - -void runTest(int argc, char **argv) { - int max_rows, max_cols, penalty; - int *input_itemsets, *output_itemsets, *referrence; - int *matrix_cuda, *referrence_cuda; - int size; - - // the lengths of the two sequences should be able to divided by 16. - // And at current stage max_rows needs to equal max_cols - if (argc == 3) { - max_rows = atoi(argv[1]); - max_cols = atoi(argv[1]); - penalty = atoi(argv[2]); - } else { - usage(argc, argv); - } - - if (atoi(argv[1]) % 16 != 0) { - fprintf(stderr, "The dimension values must be a multiple of 16\n"); - exit(1); - } - - max_rows = max_rows + 1; - max_cols = max_cols + 1; - referrence = (int *)malloc(max_rows * max_cols * sizeof(int)); - input_itemsets = (int *)malloc(max_rows * max_cols * sizeof(int)); - output_itemsets = (int *)malloc(max_rows * max_cols * sizeof(int)); - - if (!input_itemsets) - fprintf(stderr, "error: can not allocate memory"); - - srand(7); - - for (int i = 0; i < max_cols; i++) { - for (int j = 0; j < max_rows; j++) { - input_itemsets[i * max_cols + j] = 0; - } - } - - printf("Start Needleman-Wunsch\n"); - - for (int i = 1; i < max_rows; i++) { // please define your own sequence. - input_itemsets[i * max_cols] = rand() % 10 + 1; - } - for (int j = 1; j < max_cols; j++) { // please define your own sequence. - input_itemsets[j] = rand() % 10 + 1; - } - - for (int i = 1; i < max_cols; i++) { - for (int j = 1; j < max_rows; j++) { - referrence[i * max_cols + j] = - blosum62[input_itemsets[i * max_cols]][input_itemsets[j]]; - } - } - - for (int i = 1; i < max_rows; i++) - input_itemsets[i * max_cols] = -i * penalty; - for (int j = 1; j < max_cols; j++) - input_itemsets[j] = -j * penalty; - - size = max_cols * max_rows; - cudaMalloc((void **)&referrence_cuda, sizeof(int) * size); - cudaMalloc((void **)&matrix_cuda, sizeof(int) * size); - - cudaMemcpy(referrence_cuda, referrence, sizeof(int) * size, - cudaMemcpyHostToDevice); - cudaMemcpy(matrix_cuda, input_itemsets, sizeof(int) * size, - cudaMemcpyHostToDevice); - cudaDeviceSynchronize(); - - dim3 dimGrid; - dim3 dimBlock(BLOCK_SIZE, 1); - int block_width = (max_cols - 1) / BLOCK_SIZE; - -#ifdef TIMING - gettimeofday(&tv_kernel_start, NULL); -#endif - - printf("Processing top-left matrix\n"); - // process top-left matrix - for (int i = 1; i <= block_width; i++) { - dimGrid.x = i; - dimGrid.y = 1; - needle_cuda_shared_1<<>>( - referrence_cuda, matrix_cuda, max_cols, penalty, i, block_width); - cudaDeviceSynchronize(); - } - cudaMemcpy(output_itemsets, matrix_cuda, sizeof(int) * size, - cudaMemcpyDeviceToHost); - cudaDeviceSynchronize(); - - printf("Processing bottom-right matrix\n"); - // process bottom-right matrix - for (int i = block_width - 1; i >= 1; i--) { - dimGrid.x = i; - dimGrid.y = 1; - needle_cuda_shared_2<<>>( - referrence_cuda, matrix_cuda, max_cols, penalty, i, block_width); - cudaDeviceSynchronize(); - } - -#ifdef TIMING - gettimeofday(&tv_kernel_end, NULL); - tvsub(&tv_kernel_end, &tv_kernel_start, &tv); - kernel_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; -#endif - - cudaMemcpy(output_itemsets, matrix_cuda, sizeof(int) * size, - cudaMemcpyDeviceToHost); - - //#define TRACEBACK - - FILE *fpo = fopen("result.txt", "w"); - fprintf(fpo, "print traceback value GPU:\n"); - - for (int i = max_rows - 2, j = max_rows - 2; i >= 0, j >= 0;) { - int nw, n, w, traceback; - if (i == max_rows - 2 && j == max_rows - 2) - fprintf(fpo, "%d ", - output_itemsets[i * max_cols + j]); // print the first element - if (i == 0 && j == 0) - break; - if (i > 0 && j > 0) { - nw = output_itemsets[(i - 1) * max_cols + j - 1]; - w = output_itemsets[i * max_cols + j - 1]; - n = output_itemsets[(i - 1) * max_cols + j]; - } else if (i == 0) { - nw = n = LIMIT; - w = output_itemsets[i * max_cols + j - 1]; - } else if (j == 0) { - nw = w = LIMIT; - n = output_itemsets[(i - 1) * max_cols + j]; - } else { - } - - // traceback = maximum(nw, w, n); - int new_nw, new_w, new_n; - new_nw = nw + referrence[i * max_cols + j]; - new_w = w - penalty; - new_n = n - penalty; - - traceback = maximum(new_nw, new_w, new_n); - if (traceback == new_nw) - traceback = nw; - if (traceback == new_w) - traceback = w; - if (traceback == new_n) - traceback = n; - - fprintf(fpo, "%d ", traceback); - - if (traceback == nw) { - i--; - j--; - continue; - } - - else if (traceback == w) { - j--; - continue; - } - - else if (traceback == n) { - i--; - continue; - } - - else - ; - } - - fclose(fpo); - - cudaFree(referrence_cuda); - cudaFree(matrix_cuda); - - free(referrence); - free(input_itemsets); - free(output_itemsets); - -#ifdef TIMING - printf("Exec: %f\n", kernel_time); -#endif -} diff --git a/examples/nw/needle.h b/examples/nw/needle.h deleted file mode 100644 index a0907b3..0000000 --- a/examples/nw/needle.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifdef RD_WG_SIZE_0_0 -#define BLOCK_SIZE RD_WG_SIZE_0_0 -#elif defined(RD_WG_SIZE_0) -#define BLOCK_SIZE RD_WG_SIZE_0 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE RD_WG_SIZE -#else -#define BLOCK_SIZE 16 -#endif -//#define TRACE diff --git a/examples/nw/needle_kernel.cu b/examples/nw/needle_kernel.cu deleted file mode 100644 index d180012..0000000 --- a/examples/nw/needle_kernel.cu +++ /dev/null @@ -1,165 +0,0 @@ -#include "needle.h" -#include - -#define SDATA(index) CUT_BANK_CHECKER(sdata, index) - -__device__ int maximum_device(int a, int b, int c) { - - int k; - if (a <= b) - k = b; - else - k = a; - - if (k <= c) - return (c); - else - return (k); -} -__global__ void needle_cuda_shared_1(int *referrence, int *matrix_cuda, - int cols, int penalty, int i, - int block_width) { - int bx = blockIdx.x; - int tx = threadIdx.x; - - int b_index_x = bx; - int b_index_y = i - 1 - bx; - - int index = - cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + (cols + 1); - int index_n = - cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + (1); - int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + (cols); - int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; - - __shared__ int temp[BLOCK_SIZE + 1][BLOCK_SIZE + 1]; - __shared__ int ref[BLOCK_SIZE][BLOCK_SIZE]; - - if (tx == 0) - temp[tx][0] = matrix_cuda[index_nw]; - - for (int ty = 0; ty < BLOCK_SIZE; ty++) - ref[ty][tx] = referrence[index + cols * ty]; - - __syncthreads(); - - temp[tx + 1][0] = matrix_cuda[index_w + cols * tx]; - - __syncthreads(); - - temp[0][tx + 1] = matrix_cuda[index_n]; - - __syncthreads(); - - for (int m = 0; m < BLOCK_SIZE; m++) { - - if (tx <= m) { - - int t_index_x = tx + 1; - int t_index_y = m - tx + 1; - - temp[t_index_y][t_index_x] = - maximum_device(temp[t_index_y - 1][t_index_x - 1] + - ref[t_index_y - 1][t_index_x - 1], - temp[t_index_y][t_index_x - 1] - penalty, - temp[t_index_y - 1][t_index_x] - penalty); - } - - __syncthreads(); - } - - for (int m = BLOCK_SIZE - 2; m >= 0; m--) { - - if (tx <= m) { - - int t_index_x = tx + BLOCK_SIZE - m; - int t_index_y = BLOCK_SIZE - tx; - - temp[t_index_y][t_index_x] = - maximum_device(temp[t_index_y - 1][t_index_x - 1] + - ref[t_index_y - 1][t_index_x - 1], - temp[t_index_y][t_index_x - 1] - penalty, - temp[t_index_y - 1][t_index_x] - penalty); - } - - __syncthreads(); - } - - for (int ty = 0; ty < BLOCK_SIZE; ty++) - matrix_cuda[index + ty * cols] = temp[ty + 1][tx + 1]; -} - -__global__ void needle_cuda_shared_2(int *referrence, int *matrix_cuda, - - int cols, int penalty, int i, - int block_width) { - - int bx = blockIdx.x; - int tx = threadIdx.x; - - int b_index_x = bx + block_width - i; - int b_index_y = block_width - bx - 1; - - int index = - cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + (cols + 1); - int index_n = - cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + tx + (1); - int index_w = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x + (cols); - int index_nw = cols * BLOCK_SIZE * b_index_y + BLOCK_SIZE * b_index_x; - - __shared__ int temp[BLOCK_SIZE + 1][BLOCK_SIZE + 1]; - __shared__ int ref[BLOCK_SIZE][BLOCK_SIZE]; - - for (int ty = 0; ty < BLOCK_SIZE; ty++) - ref[ty][tx] = referrence[index + cols * ty]; - - __syncthreads(); - - if (tx == 0) - temp[tx][0] = matrix_cuda[index_nw]; - - temp[tx + 1][0] = matrix_cuda[index_w + cols * tx]; - - __syncthreads(); - - temp[0][tx + 1] = matrix_cuda[index_n]; - - __syncthreads(); - - for (int m = 0; m < BLOCK_SIZE; m++) { - - if (tx <= m) { - - int t_index_x = tx + 1; - int t_index_y = m - tx + 1; - - temp[t_index_y][t_index_x] = - maximum_device(temp[t_index_y - 1][t_index_x - 1] + - ref[t_index_y - 1][t_index_x - 1], - temp[t_index_y][t_index_x - 1] - penalty, - temp[t_index_y - 1][t_index_x] - penalty); - } - - __syncthreads(); - } - - for (int m = BLOCK_SIZE - 2; m >= 0; m--) { - - if (tx <= m) { - - int t_index_x = tx + BLOCK_SIZE - m; - int t_index_y = BLOCK_SIZE - tx; - - temp[t_index_y][t_index_x] = - maximum_device(temp[t_index_y - 1][t_index_x - 1] + - ref[t_index_y - 1][t_index_x - 1], - temp[t_index_y][t_index_x - 1] - penalty, - temp[t_index_y - 1][t_index_x] - penalty); - } - - __syncthreads(); - } - - for (int ty = 0; ty < BLOCK_SIZE; ty++) - matrix_cuda[index + ty * cols] = temp[ty + 1][tx + 1]; -} diff --git a/examples/nw/run.sh b/examples/nw/run.sh deleted file mode 100644 index 0dd3f29..0000000 --- a/examples/nw/run.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -e -llvm-as needle-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as needle-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator needle-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator needle-host-x86_64-unknown-linux-gnu.bc host.bc -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - -g++ -Wall -L../../build/runtime \ - -L../../build/runtime/threadPool \ - -o needle -fPIC -no-pie host.o kernel.o -lc -lx86Runtime -lthreadPool -lpthread -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -./needle 16 10 -if grep -q -e "-11 -7 -5 -6 -7 -7 -4 -2 -2 2 -7 -9 -9 -7 -3 0" result.txt; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/particlefilter/ex_particle_CUDA_naive_seq-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/particlefilter/ex_particle_CUDA_naive_seq-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 35ef954..0000000 --- a/examples/particlefilter/ex_particle_CUDA_naive_seq-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,482 +0,0 @@ -; ModuleID = 'ex_particle_CUDA_naive_seq-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "ex_particle_CUDA_naive_seq.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_blockDim_t = type { i8 } -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@blockDim = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockDim_t, align 1 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local i32 @_Z12findIndexSeqPdid(double* %CDF, i32 %lengthCDF, double %value) #0 { -entry: - %retval = alloca i32, align 4 - %CDF.addr = alloca double*, align 8 - %lengthCDF.addr = alloca i32, align 4 - %value.addr = alloca double, align 8 - %index = alloca i32, align 4 - %x = alloca i32, align 4 - store double* %CDF, double** %CDF.addr, align 8 - store i32 %lengthCDF, i32* %lengthCDF.addr, align 4 - store double %value, double* %value.addr, align 8 - store i32 -1, i32* %index, align 4 - store i32 0, i32* %x, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %x, align 4 - %1 = load i32, i32* %lengthCDF.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load double*, double** %CDF.addr, align 8 - %3 = load i32, i32* %x, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds double, double* %2, i64 %idxprom - %4 = load double, double* %arrayidx, align 8 - %5 = load double, double* %value.addr, align 8 - %cmp1 = fcmp oge double %4, %5 - br i1 %cmp1, label %if.then, label %if.end - -if.then: ; preds = %for.body - %6 = load i32, i32* %x, align 4 - store i32 %6, i32* %index, align 4 - br label %for.end - -if.end: ; preds = %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %7 = load i32, i32* %x, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, i32* %x, align 4 - br label %for.cond - -for.end: ; preds = %if.then, %for.cond - %8 = load i32, i32* %index, align 4 - %cmp2 = icmp eq i32 %8, -1 - br i1 %cmp2, label %if.then3, label %if.end4 - -if.then3: ; preds = %for.end - %9 = load i32, i32* %lengthCDF.addr, align 4 - %sub = sub nsw i32 %9, 1 - store i32 %sub, i32* %retval, align 4 - br label %return - -if.end4: ; preds = %for.end - %10 = load i32, i32* %index, align 4 - store i32 %10, i32* %retval, align 4 - br label %return - -return: ; preds = %if.end4, %if.then3 - %11 = load i32, i32* %retval, align 4 - ret i32 %11 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local i32 @_Z12findIndexBinPdiid(double* %CDF, i32 %beginIndex, i32 %endIndex, double %value) #0 { -entry: - %retval = alloca i32, align 4 - %CDF.addr = alloca double*, align 8 - %beginIndex.addr = alloca i32, align 4 - %endIndex.addr = alloca i32, align 4 - %value.addr = alloca double, align 8 - %middleIndex = alloca i32, align 4 - store double* %CDF, double** %CDF.addr, align 8 - store i32 %beginIndex, i32* %beginIndex.addr, align 4 - store i32 %endIndex, i32* %endIndex.addr, align 4 - store double %value, double* %value.addr, align 8 - %0 = load i32, i32* %endIndex.addr, align 4 - %1 = load i32, i32* %beginIndex.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - store i32 -1, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - br label %while.cond - -while.cond: ; preds = %if.end34, %if.end - %2 = load i32, i32* %endIndex.addr, align 4 - %3 = load i32, i32* %beginIndex.addr, align 4 - %cmp1 = icmp sgt i32 %2, %3 - br i1 %cmp1, label %while.body, label %while.end35 - -while.body: ; preds = %while.cond - %4 = load i32, i32* %beginIndex.addr, align 4 - %5 = load i32, i32* %endIndex.addr, align 4 - %6 = load i32, i32* %beginIndex.addr, align 4 - %sub = sub nsw i32 %5, %6 - %div = sdiv i32 %sub, 2 - %add = add nsw i32 %4, %div - store i32 %add, i32* %middleIndex, align 4 - %7 = load double*, double** %CDF.addr, align 8 - %8 = load i32, i32* %middleIndex, align 4 - %idxprom = sext i32 %8 to i64 - %arrayidx = getelementptr inbounds double, double* %7, i64 %idxprom - %9 = load double, double* %arrayidx, align 8 - %10 = load double, double* %value.addr, align 8 - %cmp2 = fcmp oge double %9, %10 - br i1 %cmp2, label %if.then3, label %if.end26 - -if.then3: ; preds = %while.body - %11 = load i32, i32* %middleIndex, align 4 - %cmp4 = icmp eq i32 %11, 0 - br i1 %cmp4, label %if.then5, label %if.else - -if.then5: ; preds = %if.then3 - %12 = load i32, i32* %middleIndex, align 4 - store i32 %12, i32* %retval, align 4 - br label %return - -if.else: ; preds = %if.then3 - %13 = load double*, double** %CDF.addr, align 8 - %14 = load i32, i32* %middleIndex, align 4 - %sub6 = sub nsw i32 %14, 1 - %idxprom7 = sext i32 %sub6 to i64 - %arrayidx8 = getelementptr inbounds double, double* %13, i64 %idxprom7 - %15 = load double, double* %arrayidx8, align 8 - %16 = load double, double* %value.addr, align 8 - %cmp9 = fcmp olt double %15, %16 - br i1 %cmp9, label %if.then10, label %if.else11 - -if.then10: ; preds = %if.else - %17 = load i32, i32* %middleIndex, align 4 - store i32 %17, i32* %retval, align 4 - br label %return - -if.else11: ; preds = %if.else - %18 = load double*, double** %CDF.addr, align 8 - %19 = load i32, i32* %middleIndex, align 4 - %sub12 = sub nsw i32 %19, 1 - %idxprom13 = sext i32 %sub12 to i64 - %arrayidx14 = getelementptr inbounds double, double* %18, i64 %idxprom13 - %20 = load double, double* %arrayidx14, align 8 - %21 = load double, double* %value.addr, align 8 - %cmp15 = fcmp oeq double %20, %21 - br i1 %cmp15, label %if.then16, label %if.end23 - -if.then16: ; preds = %if.else11 - br label %while.cond17 - -while.cond17: ; preds = %while.body22, %if.then16 - %22 = load double*, double** %CDF.addr, align 8 - %23 = load i32, i32* %middleIndex, align 4 - %idxprom18 = sext i32 %23 to i64 - %arrayidx19 = getelementptr inbounds double, double* %22, i64 %idxprom18 - %24 = load double, double* %arrayidx19, align 8 - %25 = load double, double* %value.addr, align 8 - %cmp20 = fcmp oeq double %24, %25 - br i1 %cmp20, label %land.rhs, label %land.end - -land.rhs: ; preds = %while.cond17 - %26 = load i32, i32* %middleIndex, align 4 - %cmp21 = icmp sge i32 %26, 0 - br label %land.end - -land.end: ; preds = %land.rhs, %while.cond17 - %27 = phi i1 [ false, %while.cond17 ], [ %cmp21, %land.rhs ] - br i1 %27, label %while.body22, label %while.end - -while.body22: ; preds = %land.end - %28 = load i32, i32* %middleIndex, align 4 - %dec = add nsw i32 %28, -1 - store i32 %dec, i32* %middleIndex, align 4 - br label %while.cond17 - -while.end: ; preds = %land.end - %29 = load i32, i32* %middleIndex, align 4 - %inc = add nsw i32 %29, 1 - store i32 %inc, i32* %middleIndex, align 4 - %30 = load i32, i32* %middleIndex, align 4 - store i32 %30, i32* %retval, align 4 - br label %return - -if.end23: ; preds = %if.else11 - br label %if.end24 - -if.end24: ; preds = %if.end23 - br label %if.end25 - -if.end25: ; preds = %if.end24 - br label %if.end26 - -if.end26: ; preds = %if.end25, %while.body - %31 = load double*, double** %CDF.addr, align 8 - %32 = load i32, i32* %middleIndex, align 4 - %idxprom27 = sext i32 %32 to i64 - %arrayidx28 = getelementptr inbounds double, double* %31, i64 %idxprom27 - %33 = load double, double* %arrayidx28, align 8 - %34 = load double, double* %value.addr, align 8 - %cmp29 = fcmp ogt double %33, %34 - br i1 %cmp29, label %if.then30, label %if.else32 - -if.then30: ; preds = %if.end26 - %35 = load i32, i32* %middleIndex, align 4 - %sub31 = sub nsw i32 %35, 1 - store i32 %sub31, i32* %endIndex.addr, align 4 - br label %if.end34 - -if.else32: ; preds = %if.end26 - %36 = load i32, i32* %middleIndex, align 4 - %add33 = add nsw i32 %36, 1 - store i32 %add33, i32* %beginIndex.addr, align 4 - br label %if.end34 - -if.end34: ; preds = %if.else32, %if.then30 - br label %while.cond - -while.end35: ; preds = %while.cond - store i32 -1, i32* %retval, align 4 - br label %return - -return: ; preds = %while.end35, %while.end, %if.then10, %if.then5, %if.then - %37 = load i32, i32* %retval, align 4 - ret i32 %37 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z6kernelPdS_S_S_S_S_i(double* %arrayX, double* %arrayY, double* %CDF, double* %u, double* %xj, double* %yj, i32 %Nparticles) #0 { -entry: - %arrayX.addr = alloca double*, align 8 - %arrayY.addr = alloca double*, align 8 - %CDF.addr = alloca double*, align 8 - %u.addr = alloca double*, align 8 - %xj.addr = alloca double*, align 8 - %yj.addr = alloca double*, align 8 - %Nparticles.addr = alloca i32, align 4 - %block_id = alloca i32, align 4 - %i = alloca i32, align 4 - %index = alloca i32, align 4 - %x = alloca i32, align 4 - store double* %arrayX, double** %arrayX.addr, align 8 - store double* %arrayY, double** %arrayY.addr, align 8 - store double* %CDF, double** %CDF.addr, align 8 - store double* %u, double** %u.addr, align 8 - store double* %xj, double** %xj.addr, align 8 - store double* %yj, double** %yj.addr, align 8 - store i32 %Nparticles, i32* %Nparticles.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #3 - store i32 %call, i32* %block_id, align 4 - %call1 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #3 - %0 = load i32, i32* %block_id, align 4 - %mul = mul i32 %call1, %0 - %call2 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #3 - %add = add i32 %mul, %call2 - store i32 %add, i32* %i, align 4 - %1 = load i32, i32* %i, align 4 - %2 = load i32, i32* %Nparticles.addr, align 4 - %cmp = icmp slt i32 %1, %2 - br i1 %cmp, label %if.then, label %if.end19 - -if.then: ; preds = %entry - store i32 -1, i32* %index, align 4 - store i32 0, i32* %x, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.then - %3 = load i32, i32* %x, align 4 - %4 = load i32, i32* %Nparticles.addr, align 4 - %cmp3 = icmp slt i32 %3, %4 - br i1 %cmp3, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %5 = load double*, double** %CDF.addr, align 8 - %6 = load i32, i32* %x, align 4 - %idxprom = sext i32 %6 to i64 - %arrayidx = getelementptr inbounds double, double* %5, i64 %idxprom - %7 = load double, double* %arrayidx, align 8 - %8 = load double*, double** %u.addr, align 8 - %9 = load i32, i32* %i, align 4 - %idxprom4 = sext i32 %9 to i64 - %arrayidx5 = getelementptr inbounds double, double* %8, i64 %idxprom4 - %10 = load double, double* %arrayidx5, align 8 - %cmp6 = fcmp oge double %7, %10 - br i1 %cmp6, label %if.then7, label %if.end - -if.then7: ; preds = %for.body - %11 = load i32, i32* %x, align 4 - store i32 %11, i32* %index, align 4 - br label %for.end - -if.end: ; preds = %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %12 = load i32, i32* %x, align 4 - %inc = add nsw i32 %12, 1 - store i32 %inc, i32* %x, align 4 - br label %for.cond - -for.end: ; preds = %if.then7, %for.cond - %13 = load i32, i32* %index, align 4 - %cmp8 = icmp eq i32 %13, -1 - br i1 %cmp8, label %if.then9, label %if.end10 - -if.then9: ; preds = %for.end - %14 = load i32, i32* %Nparticles.addr, align 4 - %sub = sub nsw i32 %14, 1 - store i32 %sub, i32* %index, align 4 - br label %if.end10 - -if.end10: ; preds = %if.then9, %for.end - %15 = load double*, double** %arrayX.addr, align 8 - %16 = load i32, i32* %index, align 4 - %idxprom11 = sext i32 %16 to i64 - %arrayidx12 = getelementptr inbounds double, double* %15, i64 %idxprom11 - %17 = load double, double* %arrayidx12, align 8 - %18 = load double*, double** %xj.addr, align 8 - %19 = load i32, i32* %i, align 4 - %idxprom13 = sext i32 %19 to i64 - %arrayidx14 = getelementptr inbounds double, double* %18, i64 %idxprom13 - store double %17, double* %arrayidx14, align 8 - %20 = load double*, double** %arrayY.addr, align 8 - %21 = load i32, i32* %index, align 4 - %idxprom15 = sext i32 %21 to i64 - %arrayidx16 = getelementptr inbounds double, double* %20, i64 %idxprom15 - %22 = load double, double* %arrayidx16, align 8 - %23 = load double*, double** %yj.addr, align 8 - %24 = load i32, i32* %i, align 4 - %idxprom17 = sext i32 %24 to i64 - %arrayidx18 = getelementptr inbounds double, double* %23, i64 %idxprom17 - store double %22, double* %arrayidx18, align 8 - br label %if.end19 - -if.end19: ; preds = %if.end10, %entry - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } -attributes #3 = { convergent nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !4, !6, !6, !6, !6, !7, !7, !6} -!llvm.ident = !{!8} -!nvvmir.version = !{!9} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (double*, double*, double*, double*, double*, double*, i32)* @_Z6kernelPdS_S_S_S_S_i, !"kernel", i32 1} -!4 = !{null, !"align", i32 8} -!5 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!6 = !{null, !"align", i32 16} -!7 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!8 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!9 = !{i32 1, i32 4} diff --git a/examples/particlefilter/ex_particle_CUDA_naive_seq-host-x86_64-unknown-linux-gnu.ll b/examples/particlefilter/ex_particle_CUDA_naive_seq-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index a5835c5..0000000 --- a/examples/particlefilter/ex_particle_CUDA_naive_seq-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,2920 +0,0 @@ -; ModuleID = 'ex_particle_CUDA_naive_seq-host-x86_64-unknown-linux-gnu.bc' -source_filename = "ex_particle_CUDA_naive_seq.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct.timeval = type { i64, i64 } -%struct.timezone = type { i32, i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque - -$_ZSt3powdi = comdat any - -$_ZSt4fabsIiEN9__gnu_cxx11__enable_ifIXsr12__is_integerIT_EE7__valueEdE6__typeES2_ = comdat any - -$_ZN4dim3C2Ejjj = comdat any - -@M = dso_local global i64 2147483647, align 8 -@A = dso_local global i32 1103515245, align 4 -@C = dso_local global i32 12345, align 4 -@.str = private unnamed_addr constant [17 x i8] c"\0ACUDA error: %s\0A\00", align 1 -@.str.1 = private unnamed_addr constant [32 x i8] c"TIME TO GET NEIGHBORS TOOK: %f\0A\00", align 1 -@.str.2 = private unnamed_addr constant [29 x i8] c"TIME TO GET WEIGHTSTOOK: %f\0A\00", align 1 -@.str.3 = private unnamed_addr constant [28 x i8] c"TIME TO SET ERROR TOOK: %f\0A\00", align 1 -@.str.4 = private unnamed_addr constant [34 x i8] c"TIME TO GET LIKELIHOODS TOOK: %f\0A\00", align 1 -@.str.5 = private unnamed_addr constant [26 x i8] c"TIME TO GET EXP TOOK: %f\0A\00", align 1 -@.str.6 = private unnamed_addr constant [30 x i8] c"TIME TO SUM WEIGHTS TOOK: %f\0A\00", align 1 -@.str.7 = private unnamed_addr constant [36 x i8] c"TIME TO NORMALIZE WEIGHTS TOOK: %f\0A\00", align 1 -@.str.8 = private unnamed_addr constant [30 x i8] c"TIME TO MOVE OBJECT TOOK: %f\0A\00", align 1 -@.str.9 = private unnamed_addr constant [9 x i8] c"XE: %lf\0A\00", align 1 -@.str.10 = private unnamed_addr constant [9 x i8] c"YE: %lf\0A\00", align 1 -@.str.11 = private unnamed_addr constant [5 x i8] c"%lf\0A\00", align 1 -@.str.12 = private unnamed_addr constant [31 x i8] c"TIME TO CALC CUM SUM TOOK: %f\0A\00", align 1 -@.str.13 = private unnamed_addr constant [25 x i8] c"TIME TO CALC U TOOK: %f\0A\00", align 1 -@.str.14 = private unnamed_addr constant [26 x i8] c"SENDING TO GPU TOOK: %lf\0A\00", align 1 -@.str.15 = private unnamed_addr constant [21 x i8] c"CUDA EXEC TOOK: %lf\0A\00", align 1 -@.str.16 = private unnamed_addr constant [33 x i8] c"SENDING BACK FROM GPU TOOK: %lf\0A\00", align 1 -@.str.17 = private unnamed_addr constant [41 x i8] c"TIME TO CALC NEW ARRAY X AND Y TOOK: %f\0A\00", align 1 -@.str.18 = private unnamed_addr constant [32 x i8] c"TIME TO RESET WEIGHTS TOOK: %f\0A\00", align 1 -@.str.19 = private unnamed_addr constant [8 x i8] c"output\0A\00", align 1 -@.str.20 = private unnamed_addr constant [5 x i8] c"%lf \00", align 1 -@.str.21 = private unnamed_addr constant [56 x i8] c"naive.out -x -y -z -np \00", align 1 -@.str.22 = private unnamed_addr constant [4 x i8] c"%s\0A\00", align 1 -@.str.23 = private unnamed_addr constant [3 x i8] c"-x\00", align 1 -@.str.24 = private unnamed_addr constant [3 x i8] c"-y\00", align 1 -@.str.25 = private unnamed_addr constant [3 x i8] c"-z\00", align 1 -@.str.26 = private unnamed_addr constant [4 x i8] c"-np\00", align 1 -@.str.27 = private unnamed_addr constant [3 x i8] c"%d\00", align 1 -@.str.28 = private unnamed_addr constant [31 x i8] c"ERROR: dimX input is incorrect\00", align 1 -@.str.29 = private unnamed_addr constant [18 x i8] c"dimX must be > 0\0A\00", align 1 -@.str.30 = private unnamed_addr constant [31 x i8] c"ERROR: dimY input is incorrect\00", align 1 -@.str.31 = private unnamed_addr constant [18 x i8] c"dimY must be > 0\0A\00", align 1 -@.str.32 = private unnamed_addr constant [43 x i8] c"ERROR: Number of frames input is incorrect\00", align 1 -@.str.33 = private unnamed_addr constant [30 x i8] c"number of frames must be > 0\0A\00", align 1 -@.str.34 = private unnamed_addr constant [46 x i8] c"ERROR: Number of particles input is incorrect\00", align 1 -@.str.35 = private unnamed_addr constant [33 x i8] c"Number of particles must be > 0\0A\00", align 1 -@.str.36 = private unnamed_addr constant [24 x i8] c"VIDEO SEQUENCE TOOK %f\0A\00", align 1 -@.str.37 = private unnamed_addr constant [25 x i8] c"PARTICLE FILTER TOOK %f\0A\00", align 1 -@.str.38 = private unnamed_addr constant [24 x i8] c"ENTIRE PROGRAM TOOK %f\0A\00", align 1 -@0 = private unnamed_addr constant [23 x i8] c"_Z6kernelPdS_S_S_S_S_i\00", align 1 -@1 = private constant [11281 x i8] c"P\EDU\BA\01\00\10\00\00,\00\00\00\00\00\00\02\00\01\01@\00\00\00\08\1F\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00`\1E\00\00\00\00\00\00 \1C\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\09\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z6kernelPdS_S_S_S_S_i\00.nv.info._Z6kernelPdS_S_S_S_S_i\00.nv.shared._Z6kernelPdS_S_S_S_S_i\00.nv.global\00.nv.constant0._Z6kernelPdS_S_S_S_S_i\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z6kernelPdS_S_S_S_S_i\00.text._Z6kernelPdS_S_S_S_S_i\00.nv.info._Z6kernelPdS_S_S_S_S_i\00.nv.shared._Z6kernelPdS_S_S_S_S_i\00.nv.global\00blockIdx\00blockDim\00threadIdx\00.nv.constant0._Z6kernelPdS_S_S_S_S_i\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00I\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A8\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\B3\00\00\00\01\00\08\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\BC\00\00\00\01\00\08\00\02\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\C5\00\00\00\01\00\08\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\CF\00\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00\00\17\00\00\00\00\00\00\04/\08\00\07\00\00\00\13\00\00\00\04#\08\00\07\00\00\00\00\00\00\00\04\12\08\00\07\00\00\00H\00\00\00\04\11\08\00\07\00\00\00H\00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\014\00\03\194\00\04\17\0C\00\00\00\00\00\06\000\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\04\00\A8\06\00\00\04\1C\04\00\D0\16\00\00\04\1E\04\00`\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\7Fvisible\D9\04\0F\FA\05_Z12findIndexSeqPdid\B7\04\0F\22\00\01\0F\A9\0C\03\0F*\00\08/1,T\00\0E\0F\A5\0C\1B\1F6\B9\03\18xpred %p\BA\03\02\CB\03\161\12\00\10f4\00/fd\DE\03\00\1F6\DE\03\0C\1F6\DE\03\12\03`\00\00\8C\02\0F\FD\00\01\0F\C9\0C\07\0F2\00\0C\0F\F1\02\00\0Fe\00\0D/0]\B9\02\02\1F1\B9\02\08\1Af\B9\02;fd1y\07/-1\CA\02\12\1B0&\00\146\BA\02\F2\01bra.uni LBB6_1;\0A\08\00\10:\DB\00\02=\00%4,3\00\16;\16\00%5,\AD\00\B0;\0Asetp.ge.s\1B\002p1,6\00\D2%r5;\0A@%p1 bra`\00\1B6p\00\132p\00\122p\00\06\8B\04\02\1E\01\01p\00\01U\00\02J\05\05\87\004shl&\02#4,\1E\00\823;\0Aadd.s\17\00#5,K\00\00#\00\01H\00\03\08\02\01^\00Hrd5]\15\00%3,e\01\03\CE\002ltu\1D\003p2,7\00\00(\00\01\D1\00\162\D1\00\1B4\D1\00\133\D1\00\173A\01\198\BA\00\0E\AB\01\1B8D\00\136D\00*4:\18\00\135\18\00\175\\\00\186\\\00$ad.\01\227,\1C\00\1F1\F6\01\02\1F7\F6\01\04\176Y\00%9,\A4\00\03\12\01\14n\E0\01\223, \00!-1\0E\01\163\0E\01\0C\CA\00\137Y\00\177Y\00(11:\02\06\B3\00312,\1E\00\0C\D2\02\03\0A\0E\1B2]\00\139]\00\188]\00\190\B7\00\0EE\00\1F0E\00\04\189E\00\04\0E\02/0]\A2\0E\11\0F\FE\050oBinPdi\FF\05\0A\04#\00\0F\00\06\10\0B+\00\1F1+\00\17/2,\81\00\0F\0F.\0E\1B\1F7-\06 ,13.\06\1E3.\06\1F1\DE\10\00\0F\DF\10\0D\1F70\06*\0C\01\01\1E]B\0A\0F3\00\03\0Fd\06\1A\0C\BD\01\0Fe\06\11\0C\1C\02\0Ff\06\1F\05\16\00\152\A8\0D\0F|\06\04&ldT\06\042\00\1E;-\06\0F\17\06\00\223,6\00:%r48\04*7_\DB\03\137\91\04\107\87\06\07\C5\06\1F2\1B\04\05\1C3@\00#24A\00*2:\19\00\133\18\00\08\9F\05\1F5\C9\00\02\1D6\C9\00\14l\C9\00\02\DB\06\01b\05\01\C9\00\174\C9\00\1B3\8A\00\04\89\00\1F4\10\06\00\01[\00\0Dq\05\01\87\00Usub.s\D2\04\01\1D\00\01(\067shrG\05\02\1E\00/31G\05\00\120M\05\022\00\15s\D7\04\02 \00\0A1\00\01\CF\00\148\CB\04\0F\15\0B\00?14;\9E\07\17\1F2\9E\07\\\1D5\9E\07\165\8F\01+19\8F\01\135\8F\01\08C\07/15\EB\06\07#6,!\00\110[\00\166[\00\1B7Z\00\136Z\00\08D\07/31\8E\06\0B/31\B8\02\05\187\10\09\1A6r\01\03\B6\00\186\\\00\07G\07#7,\1E\00\12-%\1A\00s\01\04\A6\01\02\1C\00\197\A2\01#8,\1A\00\0A\A2\01#9,{\00\00#\00\08\8D\01\124\A2\01\199\A2\01\1D5\A2\01$ge\A2\01#7,7\00\00(\00\01G\01\167G\01\0C\A1\01\138\01\01\08\D5\07\1F3\D5\07\0C/30G\01\05\189G\01/10H\01\02\1F8H\01\04#9,\1E\00\0FH\01\00\03}\03\199I\01\02z\03\01\1C\00\0AK\01\02y\03\12d\94\038d129\01\126N\01)13O\01\1D7O\01\15nO\01#8,8\00\00(\00\01O\01\178\F1\02\1C6J\05\140K\05\1B0\0B\05\151e\05\191&\01\1F8\E0\03\02/19\E1\03\04\132\D6\00\1C9\F6\00421,P\00\01'\00\08\E0\00\02\8B\01\00$\00\09\F7\00/11\F8\00\08\01t\01\02;\00\01+\00\02\E0\0C\034\08\00\16\00\1A0\12\00!2,\18\00\02\82\0C\171#\01\0C\A3\05$12\09\01\172u\02-24\16\04(gt\E1\0C\2224\1B\02\0Et\00\0C`\00\04u\06813:.\00\02-\00W2;\0A@!S\0CL7_15A\00\04\CF\02\181F\06\1F2\B9\02\04329,\1E\00\0F\1A\0E\05/9;\0A\02\05\08\17\05/25`\00\04#6,\1E\00\0F_\00\04\176^\03/27\BB\03\0B/27\BB\03\05,16\B0\02\147\1A\00\1C7\1A\00\045\04,18\1A\00\149\1A\00\0A\0A\04\1F4\E4\02\03\1F5\E4\02\04\00N\05\03 \00\0B\DA\03$7,P\00\01'\00\08\CD\02\138\DA\03\197\E3\02\1E9\CB\06\05\E2\02#9,8\00\00(\00\01\B8\02\179Z\08\0D,\06\04\DA\03(20\B6\01\1F2\B6\01\04\01\84\04\1E2C\09\05\F2\09\0C\17\03\142\17\03\182\0D\10/20`\00\04\02\14\03\1D0\16\02\03g\0A/21_\00\06\0F\A4\09\05(23\FE\09\1F7\FD\09\05\0FB\02\06\182.\03\1F3\D4\0D\1A\1F3\D4\0D\03\F4\04entry _Z6kernelPdS_\02\00\16i\BB\0D\00e\02\0F$\00\03\0E\BC\0D\0F,\00\0E\1F1,\00\18\1F2,\00\18\1F3,\00\18\1F4,\00\18\175,\00/32,\00\0B\1F6C\0E\13O8[72C\0E\1D\1C5B\0E-20B\0E\1E5N\18\0Fw#\0D\1F8O\18\18\02\A6\0D\0F\FF\00\09\0F\DB\0D\00\1F65\00\0D\1F55\00\00\1F55\00\0D\0F\99\18\01\1F45\00\0D\1F3\CE\18\01\0F5\00\0D\0FZ\18\02\0F5\00\06\0F\E4\0E\0E\0FE\03\09\13]\8C\01#to$(\04;\00\02\94\05\04\1C\16\0A\1C\00\03\ED\0A\0F;\00\05\02\F5\0A\1F5;\00\02\03\BF\09\1F9<\00\05$11\CE\0C\0F=\00\01\04\1C\0A\0F>\00\06\143\96\18\0F>\00\01\03`\06\1F3>\00\06\03P\0D\0F>\00\03\04\BE\06\0F>\00\06\03\C6\06\0F>\00\03\023\01/17\89\19\03\1F8k\10\03\1A6\17\00\03\05\06?d14\8D\19\03*12\18\00\03K\08:d10\18\00\134w\00\1A8\09\06\154\F7#\09\1A\17\8A%ctaid.x-\00\1F5 \17\032%nt,\00\0D\CB\10\105\D5\02cmul.lo\ED\06\185\CA\10\06F\00\00h\01\03E\00\0B\8A\15\04-\10\09\88\00\05\8C\15\0D\19\10\1F5\19\10\02/48j\17\03\01\A1\01\1A9j\17+8_\EA\14\138T\11\1A8T\11.10W\07\166+\15\07\B1#\1D1C\18464]e\10\09i\00\132i\00\08N\0B\00\00\03\045\00\1E;j\15\0F\DC\00\01&2,\CE\10\0Bw\17\1B8~\0F\1384\0898_3\E7\09\1B9i\11\03\E8\09(20\8D\00\08\CC\0C$1, \00\0B\CC\0C\152\E5\0C\01'\00\09\B5\0C\03\CB\0C)2]&\11\09\B4\10\07|\00\184\E4\01\08|\00\03\C3\03\1D4|\00$6,Q\00\01'\00\0F.\11\00.26\18\11#3,\9D\00\00'\00\0Ap\13;8_5?\01\134?\01/4:a\10\00\02&\01\0F!\02\00\1B6F\00\137F\00*5:\18\00\136\18\00\08\1B\11(14^\00\07w\0F#5,\1E\00\1F1n\02\03\1F5n\02\04\09c\18%7,\AA\00\0C\D2\11#4,!\00*-1\BD\13\1B8\8C\10\138\8C\10\1A8a\18\188\B2\02\0E/\10\0FO\03\07\0C^\00\139^\00\09\9A\0C\050\0D\1A06\02\198\D1\00\076\02\03\92\02\1C86\02\004\11\03P\00\01'\00\0DO\13\00#\00\09\B2\02\0A\AD\12\07\DA\13\1F2\B2\02\04\00S\0B\03 \00\0B|\00$4,Q\00\01'\00\07L\16\00\1D\00\14]\AC\13\07|\00\1F5U\14\03\1F6\F7\00\04437, \00\0B{\00$8,P\00\01'\00\0F\B9\12\00*38\F7\00\05_\05\0As\01/40\F7\00\04\134\E8\07\0D%\04442,Q\00\01'\00\09\F7\00\2242\F7\00\1C4\96\05\140\97\05\B00:\0Aret;\0A\0A}\0A\00\00\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([11281 x i8], [11281 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local i64 @_Z8get_timev() #0 { -entry: - %tv = alloca %struct.timeval, align 8 - %call = call i32 @gettimeofday(%struct.timeval* %tv, %struct.timezone* null) #10 - %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 0 - %0 = load i64, i64* %tv_sec, align 8 - %mul = mul nsw i64 %0, 1000000 - %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %tv, i32 0, i32 1 - %1 = load i64, i64* %tv_usec, align 8 - %add = add nsw i64 %mul, %1 - ret i64 %add -} - -; Function Attrs: nounwind -declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #1 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local float @_Z12elapsed_timexx(i64 %start_time, i64 %end_time) #0 { -entry: - %start_time.addr = alloca i64, align 8 - %end_time.addr = alloca i64, align 8 - store i64 %start_time, i64* %start_time.addr, align 8 - store i64 %end_time, i64* %end_time.addr, align 8 - %0 = load i64, i64* %end_time.addr, align 8 - %1 = load i64, i64* %start_time.addr, align 8 - %sub = sub nsw i64 %0, %1 - %conv = sitofp i64 %sub to float - %div = fdiv float %conv, 1.000000e+06 - ret float %div -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z11check_error9cudaError(i32 %e) #2 { -entry: - %e.addr = alloca i32, align 4 - store i32 %e, i32* %e.addr, align 4 - %0 = load i32, i32* %e.addr, align 4 - %cmp = icmp ne i32 %0, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load i32, i32* %e.addr, align 4 - %call = call i8* @cudaGetErrorString(i32 %1) - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @.str, i64 0, i64 0), i8* %call) - call void @exit(i32 1) #11 - unreachable - -if.end: ; preds = %entry - ret void -} - -declare dso_local i32 @printf(i8*, ...) #3 - -declare dso_local i8* @cudaGetErrorString(i32) #3 - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #4 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z6kernelPdS_S_S_S_S_i(double* %arrayX, double* %arrayY, double* %CDF, double* %u, double* %xj, double* %yj, i32 %Nparticles) #2 { -entry: - %arrayX.addr = alloca double*, align 8 - %arrayY.addr = alloca double*, align 8 - %CDF.addr = alloca double*, align 8 - %u.addr = alloca double*, align 8 - %xj.addr = alloca double*, align 8 - %yj.addr = alloca double*, align 8 - %Nparticles.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store double* %arrayX, double** %arrayX.addr, align 8 - store double* %arrayY, double** %arrayY.addr, align 8 - store double* %CDF, double** %CDF.addr, align 8 - store double* %u, double** %u.addr, align 8 - store double* %xj, double** %xj.addr, align 8 - store double* %yj, double** %yj.addr, align 8 - store i32 %Nparticles, i32* %Nparticles.addr, align 4 - %kernel_args = alloca i8*, i64 7, align 16 - %0 = bitcast double** %arrayX.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast double** %arrayY.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast double** %CDF.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast double** %u.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast double** %xj.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast double** %yj.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = bitcast i32* %Nparticles.addr to i8* - %13 = getelementptr i8*, i8** %kernel_args, i32 6 - store i8* %12, i8** %13 - %14 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %15 = load i64, i64* %shmem_size, align 8 - %16 = load i8*, i8** %stream, align 8 - %17 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %18 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %17, i8* align 8 %18, i64 12, i1 false) - %19 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %20 = load i64, i64* %19, align 8 - %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %22 = load i32, i32* %21, align 8 - %23 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %24 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %23, i8* align 8 %24, i64 12, i1 false) - %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %26 = load i64, i64* %25, align 8 - %27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %28 = load i32, i32* %27, align 8 - %29 = bitcast i8* %16 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (double*, double*, double*, double*, double*, double*, i32)* @_Z6kernelPdS_S_S_S_S_i to i8*), i64 %20, i32 %22, i64 %26, i32 %28, i8** %kernel_args, i64 %15, %struct.CUstream_st* %29) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #5 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local double @_Z11roundDoubled(double %value) #0 { -entry: - %retval = alloca double, align 8 - %value.addr = alloca double, align 8 - %newValue = alloca i32, align 4 - store double %value, double* %value.addr, align 8 - %0 = load double, double* %value.addr, align 8 - %conv = fptosi double %0 to i32 - store i32 %conv, i32* %newValue, align 4 - %1 = load double, double* %value.addr, align 8 - %2 = load i32, i32* %newValue, align 4 - %conv1 = sitofp i32 %2 to double - %sub = fsub contract double %1, %conv1 - %cmp = fcmp olt double %sub, 5.000000e-01 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %3 = load i32, i32* %newValue, align 4 - %conv2 = sitofp i32 %3 to double - store double %conv2, double* %retval, align 8 - br label %return - -if.else: ; preds = %entry - %4 = load i32, i32* %newValue, align 4 - %inc = add nsw i32 %4, 1 - store i32 %inc, i32* %newValue, align 4 - %conv3 = sitofp i32 %4 to double - store double %conv3, double* %retval, align 8 - br label %return - -return: ; preds = %if.else, %if.then - %5 = load double, double* %retval, align 8 - ret double %5 -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z5setIfiiPiS_S_S_(i32 %testValue, i32 %newValue, i32* %array3D, i32* %dimX, i32* %dimY, i32* %dimZ) #0 { -entry: - %testValue.addr = alloca i32, align 4 - %newValue.addr = alloca i32, align 4 - %array3D.addr = alloca i32*, align 8 - %dimX.addr = alloca i32*, align 8 - %dimY.addr = alloca i32*, align 8 - %dimZ.addr = alloca i32*, align 8 - %x = alloca i32, align 4 - %y = alloca i32, align 4 - %z = alloca i32, align 4 - store i32 %testValue, i32* %testValue.addr, align 4 - store i32 %newValue, i32* %newValue.addr, align 4 - store i32* %array3D, i32** %array3D.addr, align 8 - store i32* %dimX, i32** %dimX.addr, align 8 - store i32* %dimY, i32** %dimY.addr, align 8 - store i32* %dimZ, i32** %dimZ.addr, align 8 - store i32 0, i32* %x, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc21, %entry - %0 = load i32, i32* %x, align 4 - %1 = load i32*, i32** %dimX.addr, align 8 - %2 = load i32, i32* %1, align 4 - %cmp = icmp slt i32 %0, %2 - br i1 %cmp, label %for.body, label %for.end23 - -for.body: ; preds = %for.cond - store i32 0, i32* %y, align 4 - br label %for.cond1 - -for.cond1: ; preds = %for.inc18, %for.body - %3 = load i32, i32* %y, align 4 - %4 = load i32*, i32** %dimY.addr, align 8 - %5 = load i32, i32* %4, align 4 - %cmp2 = icmp slt i32 %3, %5 - br i1 %cmp2, label %for.body3, label %for.end20 - -for.body3: ; preds = %for.cond1 - store i32 0, i32* %z, align 4 - br label %for.cond4 - -for.cond4: ; preds = %for.inc, %for.body3 - %6 = load i32, i32* %z, align 4 - %7 = load i32*, i32** %dimZ.addr, align 8 - %8 = load i32, i32* %7, align 4 - %cmp5 = icmp slt i32 %6, %8 - br i1 %cmp5, label %for.body6, label %for.end - -for.body6: ; preds = %for.cond4 - %9 = load i32*, i32** %array3D.addr, align 8 - %10 = load i32, i32* %x, align 4 - %11 = load i32*, i32** %dimY.addr, align 8 - %12 = load i32, i32* %11, align 4 - %mul = mul nsw i32 %10, %12 - %13 = load i32*, i32** %dimZ.addr, align 8 - %14 = load i32, i32* %13, align 4 - %mul7 = mul nsw i32 %mul, %14 - %15 = load i32, i32* %y, align 4 - %16 = load i32*, i32** %dimZ.addr, align 8 - %17 = load i32, i32* %16, align 4 - %mul8 = mul nsw i32 %15, %17 - %add = add nsw i32 %mul7, %mul8 - %18 = load i32, i32* %z, align 4 - %add9 = add nsw i32 %add, %18 - %idxprom = sext i32 %add9 to i64 - %arrayidx = getelementptr inbounds i32, i32* %9, i64 %idxprom - %19 = load i32, i32* %arrayidx, align 4 - %20 = load i32, i32* %testValue.addr, align 4 - %cmp10 = icmp eq i32 %19, %20 - br i1 %cmp10, label %if.then, label %if.end - -if.then: ; preds = %for.body6 - %21 = load i32, i32* %newValue.addr, align 4 - %22 = load i32*, i32** %array3D.addr, align 8 - %23 = load i32, i32* %x, align 4 - %24 = load i32*, i32** %dimY.addr, align 8 - %25 = load i32, i32* %24, align 4 - %mul11 = mul nsw i32 %23, %25 - %26 = load i32*, i32** %dimZ.addr, align 8 - %27 = load i32, i32* %26, align 4 - %mul12 = mul nsw i32 %mul11, %27 - %28 = load i32, i32* %y, align 4 - %29 = load i32*, i32** %dimZ.addr, align 8 - %30 = load i32, i32* %29, align 4 - %mul13 = mul nsw i32 %28, %30 - %add14 = add nsw i32 %mul12, %mul13 - %31 = load i32, i32* %z, align 4 - %add15 = add nsw i32 %add14, %31 - %idxprom16 = sext i32 %add15 to i64 - %arrayidx17 = getelementptr inbounds i32, i32* %22, i64 %idxprom16 - store i32 %21, i32* %arrayidx17, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body6 - br label %for.inc - -for.inc: ; preds = %if.end - %32 = load i32, i32* %z, align 4 - %inc = add nsw i32 %32, 1 - store i32 %inc, i32* %z, align 4 - br label %for.cond4 - -for.end: ; preds = %for.cond4 - br label %for.inc18 - -for.inc18: ; preds = %for.end - %33 = load i32, i32* %y, align 4 - %inc19 = add nsw i32 %33, 1 - store i32 %inc19, i32* %y, align 4 - br label %for.cond1 - -for.end20: ; preds = %for.cond1 - br label %for.inc21 - -for.inc21: ; preds = %for.end20 - %34 = load i32, i32* %x, align 4 - %inc22 = add nsw i32 %34, 1 - store i32 %inc22, i32* %x, align 4 - br label %for.cond - -for.end23: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local double @_Z5randuPii(i32* %seed, i32 %index) #0 { -entry: - %seed.addr = alloca i32*, align 8 - %index.addr = alloca i32, align 4 - %num = alloca i32, align 4 - store i32* %seed, i32** %seed.addr, align 8 - store i32 %index, i32* %index.addr, align 4 - %0 = load i32, i32* @A, align 4 - %1 = load i32*, i32** %seed.addr, align 8 - %2 = load i32, i32* %index.addr, align 4 - %idxprom = sext i32 %2 to i64 - %arrayidx = getelementptr inbounds i32, i32* %1, i64 %idxprom - %3 = load i32, i32* %arrayidx, align 4 - %mul = mul nsw i32 %0, %3 - %4 = load i32, i32* @C, align 4 - %add = add nsw i32 %mul, %4 - store i32 %add, i32* %num, align 4 - %5 = load i32, i32* %num, align 4 - %conv = sext i32 %5 to i64 - %6 = load i64, i64* @M, align 8 - %rem = srem i64 %conv, %6 - %conv1 = trunc i64 %rem to i32 - %7 = load i32*, i32** %seed.addr, align 8 - %8 = load i32, i32* %index.addr, align 4 - %idxprom2 = sext i32 %8 to i64 - %arrayidx3 = getelementptr inbounds i32, i32* %7, i64 %idxprom2 - store i32 %conv1, i32* %arrayidx3, align 4 - %9 = load i32*, i32** %seed.addr, align 8 - %10 = load i32, i32* %index.addr, align 4 - %idxprom4 = sext i32 %10 to i64 - %arrayidx5 = getelementptr inbounds i32, i32* %9, i64 %idxprom4 - %11 = load i32, i32* %arrayidx5, align 4 - %conv6 = sitofp i32 %11 to double - %12 = load i64, i64* @M, align 8 - %conv7 = sitofp i64 %12 to double - %div = fdiv double %conv6, %conv7 - %13 = call double @llvm.fabs.f64(double %div) - ret double %13 -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.fabs.f64(double) #6 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local double @_Z5randnPii(i32* %seed, i32 %index) #0 { -entry: - %seed.addr = alloca i32*, align 8 - %index.addr = alloca i32, align 4 - %u = alloca double, align 8 - %v = alloca double, align 8 - %cosine = alloca double, align 8 - %rt = alloca double, align 8 - store i32* %seed, i32** %seed.addr, align 8 - store i32 %index, i32* %index.addr, align 4 - %0 = load i32*, i32** %seed.addr, align 8 - %1 = load i32, i32* %index.addr, align 4 - %call = call double @_Z5randuPii(i32* %0, i32 %1) - store double %call, double* %u, align 8 - %2 = load i32*, i32** %seed.addr, align 8 - %3 = load i32, i32* %index.addr, align 4 - %call1 = call double @_Z5randuPii(i32* %2, i32 %3) - store double %call1, double* %v, align 8 - %4 = load double, double* %v, align 8 - %mul = fmul contract double 0x401921FB54442D18, %4 - %call2 = call double @cos(double %mul) #10 - store double %call2, double* %cosine, align 8 - %5 = load double, double* %u, align 8 - %call3 = call double @log(double %5) #10 - %mul4 = fmul contract double -2.000000e+00, %call3 - store double %mul4, double* %rt, align 8 - %6 = load double, double* %rt, align 8 - %call5 = call double @sqrt(double %6) #10 - %7 = load double, double* %cosine, align 8 - %mul6 = fmul contract double %call5, %7 - ret double %mul6 -} - -; Function Attrs: nounwind -declare dso_local double @cos(double) #1 - -; Function Attrs: nounwind -declare dso_local double @log(double) #1 - -; Function Attrs: nounwind -declare dso_local double @sqrt(double) #1 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z8addNoisePiS_S_S_S_(i32* %array3D, i32* %dimX, i32* %dimY, i32* %dimZ, i32* %seed) #0 { -entry: - %array3D.addr = alloca i32*, align 8 - %dimX.addr = alloca i32*, align 8 - %dimY.addr = alloca i32*, align 8 - %dimZ.addr = alloca i32*, align 8 - %seed.addr = alloca i32*, align 8 - %x = alloca i32, align 4 - %y = alloca i32, align 4 - %z = alloca i32, align 4 - store i32* %array3D, i32** %array3D.addr, align 8 - store i32* %dimX, i32** %dimX.addr, align 8 - store i32* %dimY, i32** %dimY.addr, align 8 - store i32* %dimZ, i32** %dimZ.addr, align 8 - store i32* %seed, i32** %seed.addr, align 8 - store i32 0, i32* %x, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc22, %entry - %0 = load i32, i32* %x, align 4 - %1 = load i32*, i32** %dimX.addr, align 8 - %2 = load i32, i32* %1, align 4 - %cmp = icmp slt i32 %0, %2 - br i1 %cmp, label %for.body, label %for.end24 - -for.body: ; preds = %for.cond - store i32 0, i32* %y, align 4 - br label %for.cond1 - -for.cond1: ; preds = %for.inc19, %for.body - %3 = load i32, i32* %y, align 4 - %4 = load i32*, i32** %dimY.addr, align 8 - %5 = load i32, i32* %4, align 4 - %cmp2 = icmp slt i32 %3, %5 - br i1 %cmp2, label %for.body3, label %for.end21 - -for.body3: ; preds = %for.cond1 - store i32 0, i32* %z, align 4 - br label %for.cond4 - -for.cond4: ; preds = %for.inc, %for.body3 - %6 = load i32, i32* %z, align 4 - %7 = load i32*, i32** %dimZ.addr, align 8 - %8 = load i32, i32* %7, align 4 - %cmp5 = icmp slt i32 %6, %8 - br i1 %cmp5, label %for.body6, label %for.end - -for.body6: ; preds = %for.cond4 - %9 = load i32*, i32** %array3D.addr, align 8 - %10 = load i32, i32* %x, align 4 - %11 = load i32*, i32** %dimY.addr, align 8 - %12 = load i32, i32* %11, align 4 - %mul = mul nsw i32 %10, %12 - %13 = load i32*, i32** %dimZ.addr, align 8 - %14 = load i32, i32* %13, align 4 - %mul7 = mul nsw i32 %mul, %14 - %15 = load i32, i32* %y, align 4 - %16 = load i32*, i32** %dimZ.addr, align 8 - %17 = load i32, i32* %16, align 4 - %mul8 = mul nsw i32 %15, %17 - %add = add nsw i32 %mul7, %mul8 - %18 = load i32, i32* %z, align 4 - %add9 = add nsw i32 %add, %18 - %idxprom = sext i32 %add9 to i64 - %arrayidx = getelementptr inbounds i32, i32* %9, i64 %idxprom - %19 = load i32, i32* %arrayidx, align 4 - %20 = load i32*, i32** %seed.addr, align 8 - %call = call double @_Z5randnPii(i32* %20, i32 0) - %mul10 = fmul contract double 5.000000e+00, %call - %conv = fptosi double %mul10 to i32 - %add11 = add nsw i32 %19, %conv - %21 = load i32*, i32** %array3D.addr, align 8 - %22 = load i32, i32* %x, align 4 - %23 = load i32*, i32** %dimY.addr, align 8 - %24 = load i32, i32* %23, align 4 - %mul12 = mul nsw i32 %22, %24 - %25 = load i32*, i32** %dimZ.addr, align 8 - %26 = load i32, i32* %25, align 4 - %mul13 = mul nsw i32 %mul12, %26 - %27 = load i32, i32* %y, align 4 - %28 = load i32*, i32** %dimZ.addr, align 8 - %29 = load i32, i32* %28, align 4 - %mul14 = mul nsw i32 %27, %29 - %add15 = add nsw i32 %mul13, %mul14 - %30 = load i32, i32* %z, align 4 - %add16 = add nsw i32 %add15, %30 - %idxprom17 = sext i32 %add16 to i64 - %arrayidx18 = getelementptr inbounds i32, i32* %21, i64 %idxprom17 - store i32 %add11, i32* %arrayidx18, align 4 - br label %for.inc - -for.inc: ; preds = %for.body6 - %31 = load i32, i32* %z, align 4 - %inc = add nsw i32 %31, 1 - store i32 %inc, i32* %z, align 4 - br label %for.cond4 - -for.end: ; preds = %for.cond4 - br label %for.inc19 - -for.inc19: ; preds = %for.end - %32 = load i32, i32* %y, align 4 - %inc20 = add nsw i32 %32, 1 - store i32 %inc20, i32* %y, align 4 - br label %for.cond1 - -for.end21: ; preds = %for.cond1 - br label %for.inc22 - -for.inc22: ; preds = %for.end21 - %33 = load i32, i32* %x, align 4 - %inc23 = add nsw i32 %33, 1 - store i32 %inc23, i32* %x, align 4 - br label %for.cond - -for.end24: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z9strelDiskPii(i32* %disk, i32 %radius) #2 { -entry: - %disk.addr = alloca i32*, align 8 - %radius.addr = alloca i32, align 4 - %diameter = alloca i32, align 4 - %x = alloca i32, align 4 - %y = alloca i32, align 4 - %distance = alloca double, align 8 - store i32* %disk, i32** %disk.addr, align 8 - store i32 %radius, i32* %radius.addr, align 4 - %0 = load i32, i32* %radius.addr, align 4 - %mul = mul nsw i32 %0, 2 - %sub = sub nsw i32 %mul, 1 - store i32 %sub, i32* %diameter, align 4 - store i32 0, i32* %x, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc15, %entry - %1 = load i32, i32* %x, align 4 - %2 = load i32, i32* %diameter, align 4 - %cmp = icmp slt i32 %1, %2 - br i1 %cmp, label %for.body, label %for.end17 - -for.body: ; preds = %for.cond - store i32 0, i32* %y, align 4 - br label %for.cond1 - -for.cond1: ; preds = %for.inc, %for.body - %3 = load i32, i32* %y, align 4 - %4 = load i32, i32* %diameter, align 4 - %cmp2 = icmp slt i32 %3, %4 - br i1 %cmp2, label %for.body3, label %for.end - -for.body3: ; preds = %for.cond1 - %5 = load i32, i32* %x, align 4 - %6 = load i32, i32* %radius.addr, align 4 - %sub4 = sub nsw i32 %5, %6 - %add = add nsw i32 %sub4, 1 - %conv = sitofp i32 %add to double - %call = call double @_ZSt3powdi(double %conv, i32 2) - %7 = load i32, i32* %y, align 4 - %8 = load i32, i32* %radius.addr, align 4 - %sub5 = sub nsw i32 %7, %8 - %add6 = add nsw i32 %sub5, 1 - %conv7 = sitofp i32 %add6 to double - %call8 = call double @_ZSt3powdi(double %conv7, i32 2) - %add9 = fadd contract double %call, %call8 - %call10 = call double @sqrt(double %add9) #10 - store double %call10, double* %distance, align 8 - %9 = load double, double* %distance, align 8 - %10 = load i32, i32* %radius.addr, align 4 - %conv11 = sitofp i32 %10 to double - %cmp12 = fcmp olt double %9, %conv11 - br i1 %cmp12, label %if.then, label %if.end - -if.then: ; preds = %for.body3 - %11 = load i32*, i32** %disk.addr, align 8 - %12 = load i32, i32* %x, align 4 - %13 = load i32, i32* %diameter, align 4 - %mul13 = mul nsw i32 %12, %13 - %14 = load i32, i32* %y, align 4 - %add14 = add nsw i32 %mul13, %14 - %idxprom = sext i32 %add14 to i64 - %arrayidx = getelementptr inbounds i32, i32* %11, i64 %idxprom - store i32 1, i32* %arrayidx, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body3 - br label %for.inc - -for.inc: ; preds = %if.end - %15 = load i32, i32* %y, align 4 - %inc = add nsw i32 %15, 1 - store i32 %inc, i32* %y, align 4 - br label %for.cond1 - -for.end: ; preds = %for.cond1 - br label %for.inc15 - -for.inc15: ; preds = %for.end - %16 = load i32, i32* %x, align 4 - %inc16 = add nsw i32 %16, 1 - store i32 %inc16, i32* %x, align 4 - br label %for.cond - -for.end17: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local double @_ZSt3powdi(double %__x, i32 %__i) #0 comdat { -entry: - %__x.addr = alloca double, align 8 - %__i.addr = alloca i32, align 4 - store double %__x, double* %__x.addr, align 8 - store i32 %__i, i32* %__i.addr, align 4 - %0 = load double, double* %__x.addr, align 8 - %1 = load i32, i32* %__i.addr, align 4 - %2 = call double @llvm.powi.f64(double %0, i32 %1) - ret double %2 -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z13dilate_matrixPiiiiiiii(i32* %matrix, i32 %posX, i32 %posY, i32 %posZ, i32 %dimX, i32 %dimY, i32 %dimZ, i32 %error) #2 { -entry: - %matrix.addr = alloca i32*, align 8 - %posX.addr = alloca i32, align 4 - %posY.addr = alloca i32, align 4 - %posZ.addr = alloca i32, align 4 - %dimX.addr = alloca i32, align 4 - %dimY.addr = alloca i32, align 4 - %dimZ.addr = alloca i32, align 4 - %error.addr = alloca i32, align 4 - %startX = alloca i32, align 4 - %startY = alloca i32, align 4 - %endX = alloca i32, align 4 - %endY = alloca i32, align 4 - %x = alloca i32, align 4 - %y = alloca i32, align 4 - %distance = alloca double, align 8 - store i32* %matrix, i32** %matrix.addr, align 8 - store i32 %posX, i32* %posX.addr, align 4 - store i32 %posY, i32* %posY.addr, align 4 - store i32 %posZ, i32* %posZ.addr, align 4 - store i32 %dimX, i32* %dimX.addr, align 4 - store i32 %dimY, i32* %dimY.addr, align 4 - store i32 %dimZ, i32* %dimZ.addr, align 4 - store i32 %error, i32* %error.addr, align 4 - %0 = load i32, i32* %posX.addr, align 4 - %1 = load i32, i32* %error.addr, align 4 - %sub = sub nsw i32 %0, %1 - store i32 %sub, i32* %startX, align 4 - br label %while.cond - -while.cond: ; preds = %while.body, %entry - %2 = load i32, i32* %startX, align 4 - %cmp = icmp slt i32 %2, 0 - br i1 %cmp, label %while.body, label %while.end - -while.body: ; preds = %while.cond - %3 = load i32, i32* %startX, align 4 - %inc = add nsw i32 %3, 1 - store i32 %inc, i32* %startX, align 4 - br label %while.cond - -while.end: ; preds = %while.cond - %4 = load i32, i32* %posY.addr, align 4 - %5 = load i32, i32* %error.addr, align 4 - %sub1 = sub nsw i32 %4, %5 - store i32 %sub1, i32* %startY, align 4 - br label %while.cond2 - -while.cond2: ; preds = %while.body4, %while.end - %6 = load i32, i32* %startY, align 4 - %cmp3 = icmp slt i32 %6, 0 - br i1 %cmp3, label %while.body4, label %while.end6 - -while.body4: ; preds = %while.cond2 - %7 = load i32, i32* %startY, align 4 - %inc5 = add nsw i32 %7, 1 - store i32 %inc5, i32* %startY, align 4 - br label %while.cond2 - -while.end6: ; preds = %while.cond2 - %8 = load i32, i32* %posX.addr, align 4 - %9 = load i32, i32* %error.addr, align 4 - %add = add nsw i32 %8, %9 - store i32 %add, i32* %endX, align 4 - br label %while.cond7 - -while.cond7: ; preds = %while.body9, %while.end6 - %10 = load i32, i32* %endX, align 4 - %11 = load i32, i32* %dimX.addr, align 4 - %cmp8 = icmp sgt i32 %10, %11 - br i1 %cmp8, label %while.body9, label %while.end10 - -while.body9: ; preds = %while.cond7 - %12 = load i32, i32* %endX, align 4 - %dec = add nsw i32 %12, -1 - store i32 %dec, i32* %endX, align 4 - br label %while.cond7 - -while.end10: ; preds = %while.cond7 - %13 = load i32, i32* %posY.addr, align 4 - %14 = load i32, i32* %error.addr, align 4 - %add11 = add nsw i32 %13, %14 - store i32 %add11, i32* %endY, align 4 - br label %while.cond12 - -while.cond12: ; preds = %while.body14, %while.end10 - %15 = load i32, i32* %endY, align 4 - %16 = load i32, i32* %dimY.addr, align 4 - %cmp13 = icmp sgt i32 %15, %16 - br i1 %cmp13, label %while.body14, label %while.end16 - -while.body14: ; preds = %while.cond12 - %17 = load i32, i32* %endY, align 4 - %dec15 = add nsw i32 %17, -1 - store i32 %dec15, i32* %endY, align 4 - br label %while.cond12 - -while.end16: ; preds = %while.cond12 - %18 = load i32, i32* %startX, align 4 - store i32 %18, i32* %x, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc34, %while.end16 - %19 = load i32, i32* %x, align 4 - %20 = load i32, i32* %endX, align 4 - %cmp17 = icmp slt i32 %19, %20 - br i1 %cmp17, label %for.body, label %for.end36 - -for.body: ; preds = %for.cond - %21 = load i32, i32* %startY, align 4 - store i32 %21, i32* %y, align 4 - br label %for.cond18 - -for.cond18: ; preds = %for.inc, %for.body - %22 = load i32, i32* %y, align 4 - %23 = load i32, i32* %endY, align 4 - %cmp19 = icmp slt i32 %22, %23 - br i1 %cmp19, label %for.body20, label %for.end - -for.body20: ; preds = %for.cond18 - %24 = load i32, i32* %x, align 4 - %25 = load i32, i32* %posX.addr, align 4 - %sub21 = sub nsw i32 %24, %25 - %conv = sitofp i32 %sub21 to double - %call = call double @_ZSt3powdi(double %conv, i32 2) - %26 = load i32, i32* %y, align 4 - %27 = load i32, i32* %posY.addr, align 4 - %sub22 = sub nsw i32 %26, %27 - %conv23 = sitofp i32 %sub22 to double - %call24 = call double @_ZSt3powdi(double %conv23, i32 2) - %add25 = fadd contract double %call, %call24 - %call26 = call double @sqrt(double %add25) #10 - store double %call26, double* %distance, align 8 - %28 = load double, double* %distance, align 8 - %29 = load i32, i32* %error.addr, align 4 - %conv27 = sitofp i32 %29 to double - %cmp28 = fcmp olt double %28, %conv27 - br i1 %cmp28, label %if.then, label %if.end - -if.then: ; preds = %for.body20 - %30 = load i32*, i32** %matrix.addr, align 8 - %31 = load i32, i32* %x, align 4 - %32 = load i32, i32* %dimY.addr, align 4 - %mul = mul nsw i32 %31, %32 - %33 = load i32, i32* %dimZ.addr, align 4 - %mul29 = mul nsw i32 %mul, %33 - %34 = load i32, i32* %y, align 4 - %35 = load i32, i32* %dimZ.addr, align 4 - %mul30 = mul nsw i32 %34, %35 - %add31 = add nsw i32 %mul29, %mul30 - %36 = load i32, i32* %posZ.addr, align 4 - %add32 = add nsw i32 %add31, %36 - %idxprom = sext i32 %add32 to i64 - %arrayidx = getelementptr inbounds i32, i32* %30, i64 %idxprom - store i32 1, i32* %arrayidx, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body20 - br label %for.inc - -for.inc: ; preds = %if.end - %37 = load i32, i32* %y, align 4 - %inc33 = add nsw i32 %37, 1 - store i32 %inc33, i32* %y, align 4 - br label %for.cond18 - -for.end: ; preds = %for.cond18 - br label %for.inc34 - -for.inc34: ; preds = %for.end - %38 = load i32, i32* %x, align 4 - %inc35 = add nsw i32 %38, 1 - store i32 %inc35, i32* %x, align 4 - br label %for.cond - -for.end36: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z13imdilate_diskPiiiiiS_(i32* %matrix, i32 %dimX, i32 %dimY, i32 %dimZ, i32 %error, i32* %newMatrix) #2 { -entry: - %matrix.addr = alloca i32*, align 8 - %dimX.addr = alloca i32, align 4 - %dimY.addr = alloca i32, align 4 - %dimZ.addr = alloca i32, align 4 - %error.addr = alloca i32, align 4 - %newMatrix.addr = alloca i32*, align 8 - %x = alloca i32, align 4 - %y = alloca i32, align 4 - %z = alloca i32, align 4 - store i32* %matrix, i32** %matrix.addr, align 8 - store i32 %dimX, i32* %dimX.addr, align 4 - store i32 %dimY, i32* %dimY.addr, align 4 - store i32 %dimZ, i32* %dimZ.addr, align 4 - store i32 %error, i32* %error.addr, align 4 - store i32* %newMatrix, i32** %newMatrix.addr, align 8 - store i32 0, i32* %z, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc14, %entry - %0 = load i32, i32* %z, align 4 - %1 = load i32, i32* %dimZ.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end16 - -for.body: ; preds = %for.cond - store i32 0, i32* %x, align 4 - br label %for.cond1 - -for.cond1: ; preds = %for.inc11, %for.body - %2 = load i32, i32* %x, align 4 - %3 = load i32, i32* %dimX.addr, align 4 - %cmp2 = icmp slt i32 %2, %3 - br i1 %cmp2, label %for.body3, label %for.end13 - -for.body3: ; preds = %for.cond1 - store i32 0, i32* %y, align 4 - br label %for.cond4 - -for.cond4: ; preds = %for.inc, %for.body3 - %4 = load i32, i32* %y, align 4 - %5 = load i32, i32* %dimY.addr, align 4 - %cmp5 = icmp slt i32 %4, %5 - br i1 %cmp5, label %for.body6, label %for.end - -for.body6: ; preds = %for.cond4 - %6 = load i32*, i32** %matrix.addr, align 8 - %7 = load i32, i32* %x, align 4 - %8 = load i32, i32* %dimY.addr, align 4 - %mul = mul nsw i32 %7, %8 - %9 = load i32, i32* %dimZ.addr, align 4 - %mul7 = mul nsw i32 %mul, %9 - %10 = load i32, i32* %y, align 4 - %11 = load i32, i32* %dimZ.addr, align 4 - %mul8 = mul nsw i32 %10, %11 - %add = add nsw i32 %mul7, %mul8 - %12 = load i32, i32* %z, align 4 - %add9 = add nsw i32 %add, %12 - %idxprom = sext i32 %add9 to i64 - %arrayidx = getelementptr inbounds i32, i32* %6, i64 %idxprom - %13 = load i32, i32* %arrayidx, align 4 - %cmp10 = icmp eq i32 %13, 1 - br i1 %cmp10, label %if.then, label %if.end - -if.then: ; preds = %for.body6 - %14 = load i32*, i32** %newMatrix.addr, align 8 - %15 = load i32, i32* %x, align 4 - %16 = load i32, i32* %y, align 4 - %17 = load i32, i32* %z, align 4 - %18 = load i32, i32* %dimX.addr, align 4 - %19 = load i32, i32* %dimY.addr, align 4 - %20 = load i32, i32* %dimZ.addr, align 4 - %21 = load i32, i32* %error.addr, align 4 - call void @_Z13dilate_matrixPiiiiiiii(i32* %14, i32 %15, i32 %16, i32 %17, i32 %18, i32 %19, i32 %20, i32 %21) - br label %if.end - -if.end: ; preds = %if.then, %for.body6 - br label %for.inc - -for.inc: ; preds = %if.end - %22 = load i32, i32* %y, align 4 - %inc = add nsw i32 %22, 1 - store i32 %inc, i32* %y, align 4 - br label %for.cond4 - -for.end: ; preds = %for.cond4 - br label %for.inc11 - -for.inc11: ; preds = %for.end - %23 = load i32, i32* %x, align 4 - %inc12 = add nsw i32 %23, 1 - store i32 %inc12, i32* %x, align 4 - br label %for.cond1 - -for.end13: ; preds = %for.cond1 - br label %for.inc14 - -for.inc14: ; preds = %for.end13 - %24 = load i32, i32* %z, align 4 - %inc15 = add nsw i32 %24, 1 - store i32 %inc15, i32* %z, align 4 - br label %for.cond - -for.end16: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z12getneighborsPiiPdi(i32* %se, i32 %numOnes, double* %neighbors, i32 %radius) #0 { -entry: - %se.addr = alloca i32*, align 8 - %numOnes.addr = alloca i32, align 4 - %neighbors.addr = alloca double*, align 8 - %radius.addr = alloca i32, align 4 - %x = alloca i32, align 4 - %y = alloca i32, align 4 - %neighY = alloca i32, align 4 - %center = alloca i32, align 4 - %diameter = alloca i32, align 4 - store i32* %se, i32** %se.addr, align 8 - store i32 %numOnes, i32* %numOnes.addr, align 4 - store double* %neighbors, double** %neighbors.addr, align 8 - store i32 %radius, i32* %radius.addr, align 4 - store i32 0, i32* %neighY, align 4 - %0 = load i32, i32* %radius.addr, align 4 - %sub = sub nsw i32 %0, 1 - store i32 %sub, i32* %center, align 4 - %1 = load i32, i32* %radius.addr, align 4 - %mul = mul nsw i32 %1, 2 - %sub1 = sub nsw i32 %mul, 1 - store i32 %sub1, i32* %diameter, align 4 - store i32 0, i32* %x, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc17, %entry - %2 = load i32, i32* %x, align 4 - %3 = load i32, i32* %diameter, align 4 - %cmp = icmp slt i32 %2, %3 - br i1 %cmp, label %for.body, label %for.end19 - -for.body: ; preds = %for.cond - store i32 0, i32* %y, align 4 - br label %for.cond2 - -for.cond2: ; preds = %for.inc, %for.body - %4 = load i32, i32* %y, align 4 - %5 = load i32, i32* %diameter, align 4 - %cmp3 = icmp slt i32 %4, %5 - br i1 %cmp3, label %for.body4, label %for.end - -for.body4: ; preds = %for.cond2 - %6 = load i32*, i32** %se.addr, align 8 - %7 = load i32, i32* %x, align 4 - %8 = load i32, i32* %diameter, align 4 - %mul5 = mul nsw i32 %7, %8 - %9 = load i32, i32* %y, align 4 - %add = add nsw i32 %mul5, %9 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds i32, i32* %6, i64 %idxprom - %10 = load i32, i32* %arrayidx, align 4 - %tobool = icmp ne i32 %10, 0 - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %for.body4 - %11 = load i32, i32* %y, align 4 - %12 = load i32, i32* %center, align 4 - %sub6 = sub nsw i32 %11, %12 - %conv = sitofp i32 %sub6 to double - %13 = load double*, double** %neighbors.addr, align 8 - %14 = load i32, i32* %neighY, align 4 - %mul7 = mul nsw i32 %14, 2 - %idxprom8 = sext i32 %mul7 to i64 - %arrayidx9 = getelementptr inbounds double, double* %13, i64 %idxprom8 - store double %conv, double* %arrayidx9, align 8 - %15 = load i32, i32* %x, align 4 - %16 = load i32, i32* %center, align 4 - %sub10 = sub nsw i32 %15, %16 - %conv11 = sitofp i32 %sub10 to double - %17 = load double*, double** %neighbors.addr, align 8 - %18 = load i32, i32* %neighY, align 4 - %mul12 = mul nsw i32 %18, 2 - %add13 = add nsw i32 %mul12, 1 - %idxprom14 = sext i32 %add13 to i64 - %arrayidx15 = getelementptr inbounds double, double* %17, i64 %idxprom14 - store double %conv11, double* %arrayidx15, align 8 - %19 = load i32, i32* %neighY, align 4 - %inc = add nsw i32 %19, 1 - store i32 %inc, i32* %neighY, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body4 - br label %for.inc - -for.inc: ; preds = %if.end - %20 = load i32, i32* %y, align 4 - %inc16 = add nsw i32 %20, 1 - store i32 %inc16, i32* %y, align 4 - br label %for.cond2 - -for.end: ; preds = %for.cond2 - br label %for.inc17 - -for.inc17: ; preds = %for.end - %21 = load i32, i32* %x, align 4 - %inc18 = add nsw i32 %21, 1 - store i32 %inc18, i32* %x, align 4 - br label %for.cond - -for.end19: ; preds = %for.cond - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z13videoSequencePiiiiS_(i32* %I, i32 %IszX, i32 %IszY, i32 %Nfr, i32* %seed) #2 { -entry: - %I.addr = alloca i32*, align 8 - %IszX.addr = alloca i32, align 4 - %IszY.addr = alloca i32, align 4 - %Nfr.addr = alloca i32, align 4 - %seed.addr = alloca i32*, align 8 - %k = alloca i32, align 4 - %max_size = alloca i32, align 4 - %x0 = alloca i32, align 4 - %y0 = alloca i32, align 4 - %xk = alloca i32, align 4 - %yk = alloca i32, align 4 - %pos = alloca i32, align 4 - %newMatrix = alloca i32*, align 8 - %x = alloca i32, align 4 - %y = alloca i32, align 4 - store i32* %I, i32** %I.addr, align 8 - store i32 %IszX, i32* %IszX.addr, align 4 - store i32 %IszY, i32* %IszY.addr, align 4 - store i32 %Nfr, i32* %Nfr.addr, align 4 - store i32* %seed, i32** %seed.addr, align 8 - %0 = load i32, i32* %IszX.addr, align 4 - %1 = load i32, i32* %IszY.addr, align 4 - %mul = mul nsw i32 %0, %1 - %2 = load i32, i32* %Nfr.addr, align 4 - %mul1 = mul nsw i32 %mul, %2 - store i32 %mul1, i32* %max_size, align 4 - %3 = load i32, i32* %IszY.addr, align 4 - %conv = sitofp i32 %3 to double - %div = fdiv double %conv, 2.000000e+00 - %call = call double @_Z11roundDoubled(double %div) - %conv2 = fptosi double %call to i32 - store i32 %conv2, i32* %x0, align 4 - %4 = load i32, i32* %IszX.addr, align 4 - %conv3 = sitofp i32 %4 to double - %div4 = fdiv double %conv3, 2.000000e+00 - %call5 = call double @_Z11roundDoubled(double %div4) - %conv6 = fptosi double %call5 to i32 - store i32 %conv6, i32* %y0, align 4 - %5 = load i32*, i32** %I.addr, align 8 - %6 = load i32, i32* %x0, align 4 - %7 = load i32, i32* %IszY.addr, align 4 - %mul7 = mul nsw i32 %6, %7 - %8 = load i32, i32* %Nfr.addr, align 4 - %mul8 = mul nsw i32 %mul7, %8 - %9 = load i32, i32* %y0, align 4 - %10 = load i32, i32* %Nfr.addr, align 4 - %mul9 = mul nsw i32 %9, %10 - %add = add nsw i32 %mul8, %mul9 - %add10 = add nsw i32 %add, 0 - %idxprom = sext i32 %add10 to i64 - %arrayidx = getelementptr inbounds i32, i32* %5, i64 %idxprom - store i32 1, i32* %arrayidx, align 4 - store i32 1, i32* %k, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %11 = load i32, i32* %k, align 4 - %12 = load i32, i32* %Nfr.addr, align 4 - %cmp = icmp slt i32 %11, %12 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %13 = load i32, i32* %x0, align 4 - %14 = load i32, i32* %k, align 4 - %sub = sub nsw i32 %14, 1 - %add11 = add nsw i32 %13, %sub - %call12 = call i32 @abs(i32 %add11) #12 - store i32 %call12, i32* %xk, align 4 - %15 = load i32, i32* %y0, align 4 - %16 = load i32, i32* %k, align 4 - %sub13 = sub nsw i32 %16, 1 - %mul14 = mul nsw i32 2, %sub13 - %sub15 = sub nsw i32 %15, %mul14 - %call16 = call i32 @abs(i32 %sub15) #12 - store i32 %call16, i32* %yk, align 4 - %17 = load i32, i32* %yk, align 4 - %18 = load i32, i32* %IszY.addr, align 4 - %mul17 = mul nsw i32 %17, %18 - %19 = load i32, i32* %Nfr.addr, align 4 - %mul18 = mul nsw i32 %mul17, %19 - %20 = load i32, i32* %xk, align 4 - %21 = load i32, i32* %Nfr.addr, align 4 - %mul19 = mul nsw i32 %20, %21 - %add20 = add nsw i32 %mul18, %mul19 - %22 = load i32, i32* %k, align 4 - %add21 = add nsw i32 %add20, %22 - store i32 %add21, i32* %pos, align 4 - %23 = load i32, i32* %pos, align 4 - %24 = load i32, i32* %max_size, align 4 - %cmp22 = icmp sge i32 %23, %24 - br i1 %cmp22, label %if.then, label %if.end - -if.then: ; preds = %for.body - store i32 0, i32* %pos, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body - %25 = load i32*, i32** %I.addr, align 8 - %26 = load i32, i32* %pos, align 4 - %idxprom23 = sext i32 %26 to i64 - %arrayidx24 = getelementptr inbounds i32, i32* %25, i64 %idxprom23 - store i32 1, i32* %arrayidx24, align 4 - br label %for.inc - -for.inc: ; preds = %if.end - %27 = load i32, i32* %k, align 4 - %inc = add nsw i32 %27, 1 - store i32 %inc, i32* %k, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %28 = load i32, i32* %IszX.addr, align 4 - %conv25 = sext i32 %28 to i64 - %mul26 = mul i64 4, %conv25 - %29 = load i32, i32* %IszY.addr, align 4 - %conv27 = sext i32 %29 to i64 - %mul28 = mul i64 %mul26, %conv27 - %30 = load i32, i32* %Nfr.addr, align 4 - %conv29 = sext i32 %30 to i64 - %mul30 = mul i64 %mul28, %conv29 - %call31 = call noalias i8* @malloc(i64 %mul30) #10 - %31 = bitcast i8* %call31 to i32* - store i32* %31, i32** %newMatrix, align 8 - %32 = load i32*, i32** %I.addr, align 8 - %33 = load i32, i32* %IszX.addr, align 4 - %34 = load i32, i32* %IszY.addr, align 4 - %35 = load i32, i32* %Nfr.addr, align 4 - %36 = load i32*, i32** %newMatrix, align 8 - call void @_Z13imdilate_diskPiiiiiS_(i32* %32, i32 %33, i32 %34, i32 %35, i32 5, i32* %36) - store i32 0, i32* %x, align 4 - br label %for.cond32 - -for.cond32: ; preds = %for.inc61, %for.end - %37 = load i32, i32* %x, align 4 - %38 = load i32, i32* %IszX.addr, align 4 - %cmp33 = icmp slt i32 %37, %38 - br i1 %cmp33, label %for.body34, label %for.end63 - -for.body34: ; preds = %for.cond32 - store i32 0, i32* %y, align 4 - br label %for.cond35 - -for.cond35: ; preds = %for.inc58, %for.body34 - %39 = load i32, i32* %y, align 4 - %40 = load i32, i32* %IszY.addr, align 4 - %cmp36 = icmp slt i32 %39, %40 - br i1 %cmp36, label %for.body37, label %for.end60 - -for.body37: ; preds = %for.cond35 - store i32 0, i32* %k, align 4 - br label %for.cond38 - -for.cond38: ; preds = %for.inc55, %for.body37 - %41 = load i32, i32* %k, align 4 - %42 = load i32, i32* %Nfr.addr, align 4 - %cmp39 = icmp slt i32 %41, %42 - br i1 %cmp39, label %for.body40, label %for.end57 - -for.body40: ; preds = %for.cond38 - %43 = load i32*, i32** %newMatrix, align 8 - %44 = load i32, i32* %x, align 4 - %45 = load i32, i32* %IszY.addr, align 4 - %mul41 = mul nsw i32 %44, %45 - %46 = load i32, i32* %Nfr.addr, align 4 - %mul42 = mul nsw i32 %mul41, %46 - %47 = load i32, i32* %y, align 4 - %48 = load i32, i32* %Nfr.addr, align 4 - %mul43 = mul nsw i32 %47, %48 - %add44 = add nsw i32 %mul42, %mul43 - %49 = load i32, i32* %k, align 4 - %add45 = add nsw i32 %add44, %49 - %idxprom46 = sext i32 %add45 to i64 - %arrayidx47 = getelementptr inbounds i32, i32* %43, i64 %idxprom46 - %50 = load i32, i32* %arrayidx47, align 4 - %51 = load i32*, i32** %I.addr, align 8 - %52 = load i32, i32* %x, align 4 - %53 = load i32, i32* %IszY.addr, align 4 - %mul48 = mul nsw i32 %52, %53 - %54 = load i32, i32* %Nfr.addr, align 4 - %mul49 = mul nsw i32 %mul48, %54 - %55 = load i32, i32* %y, align 4 - %56 = load i32, i32* %Nfr.addr, align 4 - %mul50 = mul nsw i32 %55, %56 - %add51 = add nsw i32 %mul49, %mul50 - %57 = load i32, i32* %k, align 4 - %add52 = add nsw i32 %add51, %57 - %idxprom53 = sext i32 %add52 to i64 - %arrayidx54 = getelementptr inbounds i32, i32* %51, i64 %idxprom53 - store i32 %50, i32* %arrayidx54, align 4 - br label %for.inc55 - -for.inc55: ; preds = %for.body40 - %58 = load i32, i32* %k, align 4 - %inc56 = add nsw i32 %58, 1 - store i32 %inc56, i32* %k, align 4 - br label %for.cond38 - -for.end57: ; preds = %for.cond38 - br label %for.inc58 - -for.inc58: ; preds = %for.end57 - %59 = load i32, i32* %y, align 4 - %inc59 = add nsw i32 %59, 1 - store i32 %inc59, i32* %y, align 4 - br label %for.cond35 - -for.end60: ; preds = %for.cond35 - br label %for.inc61 - -for.inc61: ; preds = %for.end60 - %60 = load i32, i32* %x, align 4 - %inc62 = add nsw i32 %60, 1 - store i32 %inc62, i32* %x, align 4 - br label %for.cond32 - -for.end63: ; preds = %for.cond32 - %61 = load i32*, i32** %newMatrix, align 8 - %62 = bitcast i32* %61 to i8* - call void @free(i8* %62) #10 - %63 = load i32*, i32** %I.addr, align 8 - call void @_Z5setIfiiPiS_S_S_(i32 0, i32 100, i32* %63, i32* %IszX.addr, i32* %IszY.addr, i32* %Nfr.addr) - %64 = load i32*, i32** %I.addr, align 8 - call void @_Z5setIfiiPiS_S_S_(i32 1, i32 228, i32* %64, i32* %IszX.addr, i32* %IszY.addr, i32* %Nfr.addr) - %65 = load i32*, i32** %I.addr, align 8 - %66 = load i32*, i32** %seed.addr, align 8 - call void @_Z8addNoisePiS_S_S_S_(i32* %65, i32* %IszX.addr, i32* %IszY.addr, i32* %Nfr.addr, i32* %66) - ret void -} - -; Function Attrs: nounwind readnone -declare dso_local i32 @abs(i32) #7 - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #1 - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #1 - -; Function Attrs: noinline optnone uwtable -define dso_local double @_Z17calcLikelihoodSumPiS_i(i32* %I, i32* %ind, i32 %numOnes) #2 { -entry: - %I.addr = alloca i32*, align 8 - %ind.addr = alloca i32*, align 8 - %numOnes.addr = alloca i32, align 4 - %likelihoodSum = alloca double, align 8 - %y = alloca i32, align 4 - store i32* %I, i32** %I.addr, align 8 - store i32* %ind, i32** %ind.addr, align 8 - store i32 %numOnes, i32* %numOnes.addr, align 4 - store double 0.000000e+00, double* %likelihoodSum, align 8 - store i32 0, i32* %y, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %y, align 4 - %1 = load i32, i32* %numOnes.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load i32*, i32** %I.addr, align 8 - %3 = load i32*, i32** %ind.addr, align 8 - %4 = load i32, i32* %y, align 4 - %idxprom = sext i32 %4 to i64 - %arrayidx = getelementptr inbounds i32, i32* %3, i64 %idxprom - %5 = load i32, i32* %arrayidx, align 4 - %idxprom1 = sext i32 %5 to i64 - %arrayidx2 = getelementptr inbounds i32, i32* %2, i64 %idxprom1 - %6 = load i32, i32* %arrayidx2, align 4 - %sub = sub nsw i32 %6, 100 - %conv = sitofp i32 %sub to double - %call = call double @_ZSt3powdi(double %conv, i32 2) - %7 = load i32*, i32** %I.addr, align 8 - %8 = load i32*, i32** %ind.addr, align 8 - %9 = load i32, i32* %y, align 4 - %idxprom3 = sext i32 %9 to i64 - %arrayidx4 = getelementptr inbounds i32, i32* %8, i64 %idxprom3 - %10 = load i32, i32* %arrayidx4, align 4 - %idxprom5 = sext i32 %10 to i64 - %arrayidx6 = getelementptr inbounds i32, i32* %7, i64 %idxprom5 - %11 = load i32, i32* %arrayidx6, align 4 - %sub7 = sub nsw i32 %11, 228 - %conv8 = sitofp i32 %sub7 to double - %call9 = call double @_ZSt3powdi(double %conv8, i32 2) - %sub10 = fsub contract double %call, %call9 - %div = fdiv double %sub10, 5.000000e+01 - %12 = load double, double* %likelihoodSum, align 8 - %add = fadd contract double %12, %div - store double %add, double* %likelihoodSum, align 8 - br label %for.inc - -for.inc: ; preds = %for.body - %13 = load i32, i32* %y, align 4 - %inc = add nsw i32 %13, 1 - store i32 %inc, i32* %y, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %14 = load double, double* %likelihoodSum, align 8 - ret double %14 -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local i32 @_Z9findIndexPdid(double* %CDF, i32 %lengthCDF, double %value) #0 { -entry: - %retval = alloca i32, align 4 - %CDF.addr = alloca double*, align 8 - %lengthCDF.addr = alloca i32, align 4 - %value.addr = alloca double, align 8 - %index = alloca i32, align 4 - %x = alloca i32, align 4 - store double* %CDF, double** %CDF.addr, align 8 - store i32 %lengthCDF, i32* %lengthCDF.addr, align 4 - store double %value, double* %value.addr, align 8 - store i32 -1, i32* %index, align 4 - store i32 0, i32* %x, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %x, align 4 - %1 = load i32, i32* %lengthCDF.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load double*, double** %CDF.addr, align 8 - %3 = load i32, i32* %x, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds double, double* %2, i64 %idxprom - %4 = load double, double* %arrayidx, align 8 - %5 = load double, double* %value.addr, align 8 - %cmp1 = fcmp oge double %4, %5 - br i1 %cmp1, label %if.then, label %if.end - -if.then: ; preds = %for.body - %6 = load i32, i32* %x, align 4 - store i32 %6, i32* %index, align 4 - br label %for.end - -if.end: ; preds = %for.body - br label %for.inc - -for.inc: ; preds = %if.end - %7 = load i32, i32* %x, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, i32* %x, align 4 - br label %for.cond - -for.end: ; preds = %if.then, %for.cond - %8 = load i32, i32* %index, align 4 - %cmp2 = icmp eq i32 %8, -1 - br i1 %cmp2, label %if.then3, label %if.end4 - -if.then3: ; preds = %for.end - %9 = load i32, i32* %lengthCDF.addr, align 4 - %sub = sub nsw i32 %9, 1 - store i32 %sub, i32* %retval, align 4 - br label %return - -if.end4: ; preds = %for.end - %10 = load i32, i32* %index, align 4 - store i32 %10, i32* %retval, align 4 - br label %return - -return: ; preds = %if.end4, %if.then3 - %11 = load i32, i32* %retval, align 4 - ret i32 %11 -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z14particleFilterPiiiiS_i(i32* %I, i32 %IszX, i32 %IszY, i32 %Nfr, i32* %seed, i32 %Nparticles) #2 { -entry: - %I.addr = alloca i32*, align 8 - %IszX.addr = alloca i32, align 4 - %IszY.addr = alloca i32, align 4 - %Nfr.addr = alloca i32, align 4 - %seed.addr = alloca i32*, align 8 - %Nparticles.addr = alloca i32, align 4 - %max_size = alloca i32, align 4 - %start = alloca i64, align 8 - %xe = alloca double, align 8 - %ye = alloca double, align 8 - %radius = alloca i32, align 4 - %diameter = alloca i32, align 4 - %disk = alloca i32*, align 8 - %countOnes = alloca i32, align 4 - %x = alloca i32, align 4 - %y = alloca i32, align 4 - %objxy = alloca double*, align 8 - %get_neighbors = alloca i64, align 8 - %weights = alloca double*, align 8 - %get_weights = alloca i64, align 8 - %likelihood = alloca double*, align 8 - %arrayX = alloca double*, align 8 - %arrayY = alloca double*, align 8 - %xj = alloca double*, align 8 - %yj = alloca double*, align 8 - %CDF = alloca double*, align 8 - %arrayX_GPU = alloca double*, align 8 - %arrayY_GPU = alloca double*, align 8 - %xj_GPU = alloca double*, align 8 - %yj_GPU = alloca double*, align 8 - %CDF_GPU = alloca double*, align 8 - %ind = alloca i32*, align 8 - %u = alloca double*, align 8 - %u_GPU = alloca double*, align 8 - %k = alloca i32, align 4 - %indX = alloca i32, align 4 - %indY = alloca i32, align 4 - %set_arrays = alloca i64, align 8 - %error = alloca i64, align 8 - %likelihood_time = alloca i64, align 8 - %exponential = alloca i64, align 8 - %sumWeights = alloca double, align 8 - %sum_time = alloca i64, align 8 - %normalize = alloca i64, align 8 - %move_time = alloca i64, align 8 - %distance = alloca double, align 8 - %cum_sum = alloca i64, align 8 - %u1 = alloca double, align 8 - %u_time = alloca i64, align 8 - %start_copy = alloca i64, align 8 - %end_copy = alloca i64, align 8 - %num_blocks = alloca i32, align 4 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp335 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp335.coerce = alloca { i64, i32 }, align 4 - %start_copy_back = alloca i64, align 8 - %end_copy_back = alloca i64, align 8 - %xyj_time = alloca i64, align 8 - %reset = alloca i64, align 8 - %i = alloca i32, align 4 - store i32* %I, i32** %I.addr, align 8 - store i32 %IszX, i32* %IszX.addr, align 4 - store i32 %IszY, i32* %IszY.addr, align 4 - store i32 %Nfr, i32* %Nfr.addr, align 4 - store i32* %seed, i32** %seed.addr, align 8 - store i32 %Nparticles, i32* %Nparticles.addr, align 4 - %0 = load i32, i32* %IszX.addr, align 4 - %1 = load i32, i32* %IszY.addr, align 4 - %mul = mul nsw i32 %0, %1 - %2 = load i32, i32* %Nfr.addr, align 4 - %mul1 = mul nsw i32 %mul, %2 - store i32 %mul1, i32* %max_size, align 4 - %call = call i64 @_Z8get_timev() - store i64 %call, i64* %start, align 8 - %3 = load i32, i32* %IszY.addr, align 4 - %conv = sitofp i32 %3 to double - %div = fdiv double %conv, 2.000000e+00 - %call2 = call double @_Z11roundDoubled(double %div) - store double %call2, double* %xe, align 8 - %4 = load i32, i32* %IszX.addr, align 4 - %conv3 = sitofp i32 %4 to double - %div4 = fdiv double %conv3, 2.000000e+00 - %call5 = call double @_Z11roundDoubled(double %div4) - store double %call5, double* %ye, align 8 - store i32 5, i32* %radius, align 4 - %5 = load i32, i32* %radius, align 4 - %mul6 = mul nsw i32 %5, 2 - %sub = sub nsw i32 %mul6, 1 - store i32 %sub, i32* %diameter, align 4 - %6 = load i32, i32* %diameter, align 4 - %7 = load i32, i32* %diameter, align 4 - %mul7 = mul nsw i32 %6, %7 - %conv8 = sext i32 %mul7 to i64 - %mul9 = mul i64 %conv8, 4 - %call10 = call noalias i8* @malloc(i64 %mul9) #10 - %8 = bitcast i8* %call10 to i32* - store i32* %8, i32** %disk, align 8 - %9 = load i32*, i32** %disk, align 8 - %10 = load i32, i32* %radius, align 4 - call void @_Z9strelDiskPii(i32* %9, i32 %10) - store i32 0, i32* %countOnes, align 4 - store i32 0, i32* %x, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc17, %entry - %11 = load i32, i32* %x, align 4 - %12 = load i32, i32* %diameter, align 4 - %cmp = icmp slt i32 %11, %12 - br i1 %cmp, label %for.body, label %for.end19 - -for.body: ; preds = %for.cond - store i32 0, i32* %y, align 4 - br label %for.cond11 - -for.cond11: ; preds = %for.inc, %for.body - %13 = load i32, i32* %y, align 4 - %14 = load i32, i32* %diameter, align 4 - %cmp12 = icmp slt i32 %13, %14 - br i1 %cmp12, label %for.body13, label %for.end - -for.body13: ; preds = %for.cond11 - %15 = load i32*, i32** %disk, align 8 - %16 = load i32, i32* %x, align 4 - %17 = load i32, i32* %diameter, align 4 - %mul14 = mul nsw i32 %16, %17 - %18 = load i32, i32* %y, align 4 - %add = add nsw i32 %mul14, %18 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds i32, i32* %15, i64 %idxprom - %19 = load i32, i32* %arrayidx, align 4 - %cmp15 = icmp eq i32 %19, 1 - br i1 %cmp15, label %if.then, label %if.end - -if.then: ; preds = %for.body13 - %20 = load i32, i32* %countOnes, align 4 - %inc = add nsw i32 %20, 1 - store i32 %inc, i32* %countOnes, align 4 - br label %if.end - -if.end: ; preds = %if.then, %for.body13 - br label %for.inc - -for.inc: ; preds = %if.end - %21 = load i32, i32* %y, align 4 - %inc16 = add nsw i32 %21, 1 - store i32 %inc16, i32* %y, align 4 - br label %for.cond11 - -for.end: ; preds = %for.cond11 - br label %for.inc17 - -for.inc17: ; preds = %for.end - %22 = load i32, i32* %x, align 4 - %inc18 = add nsw i32 %22, 1 - store i32 %inc18, i32* %x, align 4 - br label %for.cond - -for.end19: ; preds = %for.cond - %23 = load i32, i32* %countOnes, align 4 - %mul20 = mul nsw i32 %23, 2 - %conv21 = sext i32 %mul20 to i64 - %mul22 = mul i64 %conv21, 8 - %call23 = call noalias i8* @malloc(i64 %mul22) #10 - %24 = bitcast i8* %call23 to double* - store double* %24, double** %objxy, align 8 - %25 = load i32*, i32** %disk, align 8 - %26 = load i32, i32* %countOnes, align 4 - %27 = load double*, double** %objxy, align 8 - %28 = load i32, i32* %radius, align 4 - call void @_Z12getneighborsPiiPdi(i32* %25, i32 %26, double* %27, i32 %28) - %call24 = call i64 @_Z8get_timev() - store i64 %call24, i64* %get_neighbors, align 8 - %29 = load i64, i64* %start, align 8 - %30 = load i64, i64* %get_neighbors, align 8 - %call25 = call float @_Z12elapsed_timexx(i64 %29, i64 %30) - %conv26 = fpext float %call25 to double - %call27 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str.1, i64 0, i64 0), double %conv26) - %31 = load i32, i32* %Nparticles.addr, align 4 - %conv28 = sext i32 %31 to i64 - %mul29 = mul i64 8, %conv28 - %call30 = call noalias i8* @malloc(i64 %mul29) #10 - %32 = bitcast i8* %call30 to double* - store double* %32, double** %weights, align 8 - store i32 0, i32* %x, align 4 - br label %for.cond31 - -for.cond31: ; preds = %for.inc38, %for.end19 - %33 = load i32, i32* %x, align 4 - %34 = load i32, i32* %Nparticles.addr, align 4 - %cmp32 = icmp slt i32 %33, %34 - br i1 %cmp32, label %for.body33, label %for.end40 - -for.body33: ; preds = %for.cond31 - %35 = load i32, i32* %Nparticles.addr, align 4 - %conv34 = sitofp i32 %35 to double - %div35 = fdiv double 1.000000e+00, %conv34 - %36 = load double*, double** %weights, align 8 - %37 = load i32, i32* %x, align 4 - %idxprom36 = sext i32 %37 to i64 - %arrayidx37 = getelementptr inbounds double, double* %36, i64 %idxprom36 - store double %div35, double* %arrayidx37, align 8 - br label %for.inc38 - -for.inc38: ; preds = %for.body33 - %38 = load i32, i32* %x, align 4 - %inc39 = add nsw i32 %38, 1 - store i32 %inc39, i32* %x, align 4 - br label %for.cond31 - -for.end40: ; preds = %for.cond31 - %call41 = call i64 @_Z8get_timev() - store i64 %call41, i64* %get_weights, align 8 - %39 = load i64, i64* %get_neighbors, align 8 - %40 = load i64, i64* %get_weights, align 8 - %call42 = call float @_Z12elapsed_timexx(i64 %39, i64 %40) - %conv43 = fpext float %call42 to double - %call44 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str.2, i64 0, i64 0), double %conv43) - %41 = load i32, i32* %Nparticles.addr, align 4 - %conv45 = sext i32 %41 to i64 - %mul46 = mul i64 8, %conv45 - %call47 = call noalias i8* @malloc(i64 %mul46) #10 - %42 = bitcast i8* %call47 to double* - store double* %42, double** %likelihood, align 8 - %43 = load i32, i32* %Nparticles.addr, align 4 - %conv48 = sext i32 %43 to i64 - %mul49 = mul i64 8, %conv48 - %call50 = call noalias i8* @malloc(i64 %mul49) #10 - %44 = bitcast i8* %call50 to double* - store double* %44, double** %arrayX, align 8 - %45 = load i32, i32* %Nparticles.addr, align 4 - %conv51 = sext i32 %45 to i64 - %mul52 = mul i64 8, %conv51 - %call53 = call noalias i8* @malloc(i64 %mul52) #10 - %46 = bitcast i8* %call53 to double* - store double* %46, double** %arrayY, align 8 - %47 = load i32, i32* %Nparticles.addr, align 4 - %conv54 = sext i32 %47 to i64 - %mul55 = mul i64 8, %conv54 - %call56 = call noalias i8* @malloc(i64 %mul55) #10 - %48 = bitcast i8* %call56 to double* - store double* %48, double** %xj, align 8 - %49 = load i32, i32* %Nparticles.addr, align 4 - %conv57 = sext i32 %49 to i64 - %mul58 = mul i64 8, %conv57 - %call59 = call noalias i8* @malloc(i64 %mul58) #10 - %50 = bitcast i8* %call59 to double* - store double* %50, double** %yj, align 8 - %51 = load i32, i32* %Nparticles.addr, align 4 - %conv60 = sext i32 %51 to i64 - %mul61 = mul i64 8, %conv60 - %call62 = call noalias i8* @malloc(i64 %mul61) #10 - %52 = bitcast i8* %call62 to double* - store double* %52, double** %CDF, align 8 - %53 = load i32, i32* %countOnes, align 4 - %conv63 = sext i32 %53 to i64 - %mul64 = mul i64 4, %conv63 - %call65 = call noalias i8* @malloc(i64 %mul64) #10 - %54 = bitcast i8* %call65 to i32* - store i32* %54, i32** %ind, align 8 - %55 = load i32, i32* %Nparticles.addr, align 4 - %conv66 = sext i32 %55 to i64 - %mul67 = mul i64 8, %conv66 - %call68 = call noalias i8* @malloc(i64 %mul67) #10 - %56 = bitcast i8* %call68 to double* - store double* %56, double** %u, align 8 - %57 = bitcast double** %arrayX_GPU to i8** - %58 = load i32, i32* %Nparticles.addr, align 4 - %conv69 = sext i32 %58 to i64 - %mul70 = mul i64 8, %conv69 - %call71 = call i32 @cudaMalloc(i8** %57, i64 %mul70) - call void @_Z11check_error9cudaError(i32 %call71) - %59 = bitcast double** %arrayY_GPU to i8** - %60 = load i32, i32* %Nparticles.addr, align 4 - %conv72 = sext i32 %60 to i64 - %mul73 = mul i64 8, %conv72 - %call74 = call i32 @cudaMalloc(i8** %59, i64 %mul73) - call void @_Z11check_error9cudaError(i32 %call74) - %61 = bitcast double** %xj_GPU to i8** - %62 = load i32, i32* %Nparticles.addr, align 4 - %conv75 = sext i32 %62 to i64 - %mul76 = mul i64 8, %conv75 - %call77 = call i32 @cudaMalloc(i8** %61, i64 %mul76) - call void @_Z11check_error9cudaError(i32 %call77) - %63 = bitcast double** %yj_GPU to i8** - %64 = load i32, i32* %Nparticles.addr, align 4 - %conv78 = sext i32 %64 to i64 - %mul79 = mul i64 8, %conv78 - %call80 = call i32 @cudaMalloc(i8** %63, i64 %mul79) - call void @_Z11check_error9cudaError(i32 %call80) - %65 = bitcast double** %CDF_GPU to i8** - %66 = load i32, i32* %Nparticles.addr, align 4 - %conv81 = sext i32 %66 to i64 - %mul82 = mul i64 8, %conv81 - %call83 = call i32 @cudaMalloc(i8** %65, i64 %mul82) - call void @_Z11check_error9cudaError(i32 %call83) - %67 = bitcast double** %u_GPU to i8** - %68 = load i32, i32* %Nparticles.addr, align 4 - %conv84 = sext i32 %68 to i64 - %mul85 = mul i64 8, %conv84 - %call86 = call i32 @cudaMalloc(i8** %67, i64 %mul85) - call void @_Z11check_error9cudaError(i32 %call86) - store i32 0, i32* %x, align 4 - br label %for.cond87 - -for.cond87: ; preds = %for.inc94, %for.end40 - %69 = load i32, i32* %x, align 4 - %70 = load i32, i32* %Nparticles.addr, align 4 - %cmp88 = icmp slt i32 %69, %70 - br i1 %cmp88, label %for.body89, label %for.end96 - -for.body89: ; preds = %for.cond87 - %71 = load double, double* %xe, align 8 - %72 = load double*, double** %arrayX, align 8 - %73 = load i32, i32* %x, align 4 - %idxprom90 = sext i32 %73 to i64 - %arrayidx91 = getelementptr inbounds double, double* %72, i64 %idxprom90 - store double %71, double* %arrayidx91, align 8 - %74 = load double, double* %ye, align 8 - %75 = load double*, double** %arrayY, align 8 - %76 = load i32, i32* %x, align 4 - %idxprom92 = sext i32 %76 to i64 - %arrayidx93 = getelementptr inbounds double, double* %75, i64 %idxprom92 - store double %74, double* %arrayidx93, align 8 - br label %for.inc94 - -for.inc94: ; preds = %for.body89 - %77 = load i32, i32* %x, align 4 - %inc95 = add nsw i32 %77, 1 - store i32 %inc95, i32* %x, align 4 - br label %for.cond87 - -for.end96: ; preds = %for.cond87 - store i32 1, i32* %k, align 4 - br label %for.cond97 - -for.cond97: ; preds = %for.inc381, %for.end96 - %78 = load i32, i32* %k, align 4 - %79 = load i32, i32* %Nfr.addr, align 4 - %cmp98 = icmp slt i32 %78, %79 - br i1 %cmp98, label %for.body99, label %for.end383 - -for.body99: ; preds = %for.cond97 - %call100 = call i64 @_Z8get_timev() - store i64 %call100, i64* %set_arrays, align 8 - store i32 0, i32* %x, align 4 - br label %for.cond101 - -for.cond101: ; preds = %for.inc120, %for.body99 - %80 = load i32, i32* %x, align 4 - %81 = load i32, i32* %Nparticles.addr, align 4 - %cmp102 = icmp slt i32 %80, %81 - br i1 %cmp102, label %for.body103, label %for.end122 - -for.body103: ; preds = %for.cond101 - %82 = load double*, double** %arrayX, align 8 - %83 = load i32, i32* %x, align 4 - %idxprom104 = sext i32 %83 to i64 - %arrayidx105 = getelementptr inbounds double, double* %82, i64 %idxprom104 - %84 = load double, double* %arrayidx105, align 8 - %add106 = fadd contract double %84, 1.000000e+00 - %85 = load i32*, i32** %seed.addr, align 8 - %86 = load i32, i32* %x, align 4 - %call107 = call double @_Z5randnPii(i32* %85, i32 %86) - %mul108 = fmul contract double 5.000000e+00, %call107 - %add109 = fadd contract double %add106, %mul108 - %87 = load double*, double** %arrayX, align 8 - %88 = load i32, i32* %x, align 4 - %idxprom110 = sext i32 %88 to i64 - %arrayidx111 = getelementptr inbounds double, double* %87, i64 %idxprom110 - store double %add109, double* %arrayidx111, align 8 - %89 = load double*, double** %arrayY, align 8 - %90 = load i32, i32* %x, align 4 - %idxprom112 = sext i32 %90 to i64 - %arrayidx113 = getelementptr inbounds double, double* %89, i64 %idxprom112 - %91 = load double, double* %arrayidx113, align 8 - %sub114 = fsub contract double %91, 2.000000e+00 - %92 = load i32*, i32** %seed.addr, align 8 - %93 = load i32, i32* %x, align 4 - %call115 = call double @_Z5randnPii(i32* %92, i32 %93) - %mul116 = fmul contract double 2.000000e+00, %call115 - %add117 = fadd contract double %sub114, %mul116 - %94 = load double*, double** %arrayY, align 8 - %95 = load i32, i32* %x, align 4 - %idxprom118 = sext i32 %95 to i64 - %arrayidx119 = getelementptr inbounds double, double* %94, i64 %idxprom118 - store double %add117, double* %arrayidx119, align 8 - br label %for.inc120 - -for.inc120: ; preds = %for.body103 - %96 = load i32, i32* %x, align 4 - %inc121 = add nsw i32 %96, 1 - store i32 %inc121, i32* %x, align 4 - br label %for.cond101 - -for.end122: ; preds = %for.cond101 - %call123 = call i64 @_Z8get_timev() - store i64 %call123, i64* %error, align 8 - %97 = load i64, i64* %set_arrays, align 8 - %98 = load i64, i64* %error, align 8 - %call124 = call float @_Z12elapsed_timexx(i64 %97, i64 %98) - %conv125 = fpext float %call124 to double - %call126 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.3, i64 0, i64 0), double %conv125) - store i32 0, i32* %x, align 4 - br label %for.cond127 - -for.cond127: ; preds = %for.inc178, %for.end122 - %99 = load i32, i32* %x, align 4 - %100 = load i32, i32* %Nparticles.addr, align 4 - %cmp128 = icmp slt i32 %99, %100 - br i1 %cmp128, label %for.body129, label %for.end180 - -for.body129: ; preds = %for.cond127 - store i32 0, i32* %y, align 4 - br label %for.cond130 - -for.cond130: ; preds = %for.inc166, %for.body129 - %101 = load i32, i32* %y, align 4 - %102 = load i32, i32* %countOnes, align 4 - %cmp131 = icmp slt i32 %101, %102 - br i1 %cmp131, label %for.body132, label %for.end168 - -for.body132: ; preds = %for.cond130 - %103 = load double*, double** %arrayX, align 8 - %104 = load i32, i32* %x, align 4 - %idxprom133 = sext i32 %104 to i64 - %arrayidx134 = getelementptr inbounds double, double* %103, i64 %idxprom133 - %105 = load double, double* %arrayidx134, align 8 - %call135 = call double @_Z11roundDoubled(double %105) - %106 = load double*, double** %objxy, align 8 - %107 = load i32, i32* %y, align 4 - %mul136 = mul nsw i32 %107, 2 - %add137 = add nsw i32 %mul136, 1 - %idxprom138 = sext i32 %add137 to i64 - %arrayidx139 = getelementptr inbounds double, double* %106, i64 %idxprom138 - %108 = load double, double* %arrayidx139, align 8 - %add140 = fadd contract double %call135, %108 - %conv141 = fptosi double %add140 to i32 - store i32 %conv141, i32* %indX, align 4 - %109 = load double*, double** %arrayY, align 8 - %110 = load i32, i32* %x, align 4 - %idxprom142 = sext i32 %110 to i64 - %arrayidx143 = getelementptr inbounds double, double* %109, i64 %idxprom142 - %111 = load double, double* %arrayidx143, align 8 - %call144 = call double @_Z11roundDoubled(double %111) - %112 = load double*, double** %objxy, align 8 - %113 = load i32, i32* %y, align 4 - %mul145 = mul nsw i32 %113, 2 - %idxprom146 = sext i32 %mul145 to i64 - %arrayidx147 = getelementptr inbounds double, double* %112, i64 %idxprom146 - %114 = load double, double* %arrayidx147, align 8 - %add148 = fadd contract double %call144, %114 - %conv149 = fptosi double %add148 to i32 - store i32 %conv149, i32* %indY, align 4 - %115 = load i32, i32* %indX, align 4 - %116 = load i32, i32* %IszY.addr, align 4 - %mul150 = mul nsw i32 %115, %116 - %117 = load i32, i32* %Nfr.addr, align 4 - %mul151 = mul nsw i32 %mul150, %117 - %118 = load i32, i32* %indY, align 4 - %119 = load i32, i32* %Nfr.addr, align 4 - %mul152 = mul nsw i32 %118, %119 - %add153 = add nsw i32 %mul151, %mul152 - %120 = load i32, i32* %k, align 4 - %add154 = add nsw i32 %add153, %120 - %call155 = call double @_ZSt4fabsIiEN9__gnu_cxx11__enable_ifIXsr12__is_integerIT_EE7__valueEdE6__typeES2_(i32 %add154) - %conv156 = fptosi double %call155 to i32 - %121 = load i32*, i32** %ind, align 8 - %122 = load i32, i32* %y, align 4 - %idxprom157 = sext i32 %122 to i64 - %arrayidx158 = getelementptr inbounds i32, i32* %121, i64 %idxprom157 - store i32 %conv156, i32* %arrayidx158, align 4 - %123 = load i32*, i32** %ind, align 8 - %124 = load i32, i32* %y, align 4 - %idxprom159 = sext i32 %124 to i64 - %arrayidx160 = getelementptr inbounds i32, i32* %123, i64 %idxprom159 - %125 = load i32, i32* %arrayidx160, align 4 - %126 = load i32, i32* %max_size, align 4 - %cmp161 = icmp sge i32 %125, %126 - br i1 %cmp161, label %if.then162, label %if.end165 - -if.then162: ; preds = %for.body132 - %127 = load i32*, i32** %ind, align 8 - %128 = load i32, i32* %y, align 4 - %idxprom163 = sext i32 %128 to i64 - %arrayidx164 = getelementptr inbounds i32, i32* %127, i64 %idxprom163 - store i32 0, i32* %arrayidx164, align 4 - br label %if.end165 - -if.end165: ; preds = %if.then162, %for.body132 - br label %for.inc166 - -for.inc166: ; preds = %if.end165 - %129 = load i32, i32* %y, align 4 - %inc167 = add nsw i32 %129, 1 - store i32 %inc167, i32* %y, align 4 - br label %for.cond130 - -for.end168: ; preds = %for.cond130 - %130 = load i32*, i32** %I.addr, align 8 - %131 = load i32*, i32** %ind, align 8 - %132 = load i32, i32* %countOnes, align 4 - %call169 = call double @_Z17calcLikelihoodSumPiS_i(i32* %130, i32* %131, i32 %132) - %133 = load double*, double** %likelihood, align 8 - %134 = load i32, i32* %x, align 4 - %idxprom170 = sext i32 %134 to i64 - %arrayidx171 = getelementptr inbounds double, double* %133, i64 %idxprom170 - store double %call169, double* %arrayidx171, align 8 - %135 = load double*, double** %likelihood, align 8 - %136 = load i32, i32* %x, align 4 - %idxprom172 = sext i32 %136 to i64 - %arrayidx173 = getelementptr inbounds double, double* %135, i64 %idxprom172 - %137 = load double, double* %arrayidx173, align 8 - %138 = load i32, i32* %countOnes, align 4 - %conv174 = sitofp i32 %138 to double - %div175 = fdiv double %137, %conv174 - %139 = load double*, double** %likelihood, align 8 - %140 = load i32, i32* %x, align 4 - %idxprom176 = sext i32 %140 to i64 - %arrayidx177 = getelementptr inbounds double, double* %139, i64 %idxprom176 - store double %div175, double* %arrayidx177, align 8 - br label %for.inc178 - -for.inc178: ; preds = %for.end168 - %141 = load i32, i32* %x, align 4 - %inc179 = add nsw i32 %141, 1 - store i32 %inc179, i32* %x, align 4 - br label %for.cond127 - -for.end180: ; preds = %for.cond127 - %call181 = call i64 @_Z8get_timev() - store i64 %call181, i64* %likelihood_time, align 8 - %142 = load i64, i64* %error, align 8 - %143 = load i64, i64* %likelihood_time, align 8 - %call182 = call float @_Z12elapsed_timexx(i64 %142, i64 %143) - %conv183 = fpext float %call182 to double - %call184 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.4, i64 0, i64 0), double %conv183) - store i32 0, i32* %x, align 4 - br label %for.cond185 - -for.cond185: ; preds = %for.inc196, %for.end180 - %144 = load i32, i32* %x, align 4 - %145 = load i32, i32* %Nparticles.addr, align 4 - %cmp186 = icmp slt i32 %144, %145 - br i1 %cmp186, label %for.body187, label %for.end198 - -for.body187: ; preds = %for.cond185 - %146 = load double*, double** %weights, align 8 - %147 = load i32, i32* %x, align 4 - %idxprom188 = sext i32 %147 to i64 - %arrayidx189 = getelementptr inbounds double, double* %146, i64 %idxprom188 - %148 = load double, double* %arrayidx189, align 8 - %149 = load double*, double** %likelihood, align 8 - %150 = load i32, i32* %x, align 4 - %idxprom190 = sext i32 %150 to i64 - %arrayidx191 = getelementptr inbounds double, double* %149, i64 %idxprom190 - %151 = load double, double* %arrayidx191, align 8 - %call192 = call double @exp(double %151) #10 - %mul193 = fmul contract double %148, %call192 - %152 = load double*, double** %weights, align 8 - %153 = load i32, i32* %x, align 4 - %idxprom194 = sext i32 %153 to i64 - %arrayidx195 = getelementptr inbounds double, double* %152, i64 %idxprom194 - store double %mul193, double* %arrayidx195, align 8 - br label %for.inc196 - -for.inc196: ; preds = %for.body187 - %154 = load i32, i32* %x, align 4 - %inc197 = add nsw i32 %154, 1 - store i32 %inc197, i32* %x, align 4 - br label %for.cond185 - -for.end198: ; preds = %for.cond185 - %call199 = call i64 @_Z8get_timev() - store i64 %call199, i64* %exponential, align 8 - %155 = load i64, i64* %likelihood_time, align 8 - %156 = load i64, i64* %exponential, align 8 - %call200 = call float @_Z12elapsed_timexx(i64 %155, i64 %156) - %conv201 = fpext float %call200 to double - %call202 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.5, i64 0, i64 0), double %conv201) - store double 0.000000e+00, double* %sumWeights, align 8 - store i32 0, i32* %x, align 4 - br label %for.cond203 - -for.cond203: ; preds = %for.inc209, %for.end198 - %157 = load i32, i32* %x, align 4 - %158 = load i32, i32* %Nparticles.addr, align 4 - %cmp204 = icmp slt i32 %157, %158 - br i1 %cmp204, label %for.body205, label %for.end211 - -for.body205: ; preds = %for.cond203 - %159 = load double*, double** %weights, align 8 - %160 = load i32, i32* %x, align 4 - %idxprom206 = sext i32 %160 to i64 - %arrayidx207 = getelementptr inbounds double, double* %159, i64 %idxprom206 - %161 = load double, double* %arrayidx207, align 8 - %162 = load double, double* %sumWeights, align 8 - %add208 = fadd contract double %162, %161 - store double %add208, double* %sumWeights, align 8 - br label %for.inc209 - -for.inc209: ; preds = %for.body205 - %163 = load i32, i32* %x, align 4 - %inc210 = add nsw i32 %163, 1 - store i32 %inc210, i32* %x, align 4 - br label %for.cond203 - -for.end211: ; preds = %for.cond203 - %call212 = call i64 @_Z8get_timev() - store i64 %call212, i64* %sum_time, align 8 - %164 = load i64, i64* %exponential, align 8 - %165 = load i64, i64* %sum_time, align 8 - %call213 = call float @_Z12elapsed_timexx(i64 %164, i64 %165) - %conv214 = fpext float %call213 to double - %call215 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str.6, i64 0, i64 0), double %conv214) - store i32 0, i32* %x, align 4 - br label %for.cond216 - -for.cond216: ; preds = %for.inc224, %for.end211 - %166 = load i32, i32* %x, align 4 - %167 = load i32, i32* %Nparticles.addr, align 4 - %cmp217 = icmp slt i32 %166, %167 - br i1 %cmp217, label %for.body218, label %for.end226 - -for.body218: ; preds = %for.cond216 - %168 = load double*, double** %weights, align 8 - %169 = load i32, i32* %x, align 4 - %idxprom219 = sext i32 %169 to i64 - %arrayidx220 = getelementptr inbounds double, double* %168, i64 %idxprom219 - %170 = load double, double* %arrayidx220, align 8 - %171 = load double, double* %sumWeights, align 8 - %div221 = fdiv double %170, %171 - %172 = load double*, double** %weights, align 8 - %173 = load i32, i32* %x, align 4 - %idxprom222 = sext i32 %173 to i64 - %arrayidx223 = getelementptr inbounds double, double* %172, i64 %idxprom222 - store double %div221, double* %arrayidx223, align 8 - br label %for.inc224 - -for.inc224: ; preds = %for.body218 - %174 = load i32, i32* %x, align 4 - %inc225 = add nsw i32 %174, 1 - store i32 %inc225, i32* %x, align 4 - br label %for.cond216 - -for.end226: ; preds = %for.cond216 - %call227 = call i64 @_Z8get_timev() - store i64 %call227, i64* %normalize, align 8 - %175 = load i64, i64* %sum_time, align 8 - %176 = load i64, i64* %normalize, align 8 - %call228 = call float @_Z12elapsed_timexx(i64 %175, i64 %176) - %conv229 = fpext float %call228 to double - %call230 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.7, i64 0, i64 0), double %conv229) - store double 0.000000e+00, double* %xe, align 8 - store double 0.000000e+00, double* %ye, align 8 - store i32 0, i32* %x, align 4 - br label %for.cond231 - -for.cond231: ; preds = %for.inc246, %for.end226 - %177 = load i32, i32* %x, align 4 - %178 = load i32, i32* %Nparticles.addr, align 4 - %cmp232 = icmp slt i32 %177, %178 - br i1 %cmp232, label %for.body233, label %for.end248 - -for.body233: ; preds = %for.cond231 - %179 = load double*, double** %arrayX, align 8 - %180 = load i32, i32* %x, align 4 - %idxprom234 = sext i32 %180 to i64 - %arrayidx235 = getelementptr inbounds double, double* %179, i64 %idxprom234 - %181 = load double, double* %arrayidx235, align 8 - %182 = load double*, double** %weights, align 8 - %183 = load i32, i32* %x, align 4 - %idxprom236 = sext i32 %183 to i64 - %arrayidx237 = getelementptr inbounds double, double* %182, i64 %idxprom236 - %184 = load double, double* %arrayidx237, align 8 - %mul238 = fmul contract double %181, %184 - %185 = load double, double* %xe, align 8 - %add239 = fadd contract double %185, %mul238 - store double %add239, double* %xe, align 8 - %186 = load double*, double** %arrayY, align 8 - %187 = load i32, i32* %x, align 4 - %idxprom240 = sext i32 %187 to i64 - %arrayidx241 = getelementptr inbounds double, double* %186, i64 %idxprom240 - %188 = load double, double* %arrayidx241, align 8 - %189 = load double*, double** %weights, align 8 - %190 = load i32, i32* %x, align 4 - %idxprom242 = sext i32 %190 to i64 - %arrayidx243 = getelementptr inbounds double, double* %189, i64 %idxprom242 - %191 = load double, double* %arrayidx243, align 8 - %mul244 = fmul contract double %188, %191 - %192 = load double, double* %ye, align 8 - %add245 = fadd contract double %192, %mul244 - store double %add245, double* %ye, align 8 - br label %for.inc246 - -for.inc246: ; preds = %for.body233 - %193 = load i32, i32* %x, align 4 - %inc247 = add nsw i32 %193, 1 - store i32 %inc247, i32* %x, align 4 - br label %for.cond231 - -for.end248: ; preds = %for.cond231 - %call249 = call i64 @_Z8get_timev() - store i64 %call249, i64* %move_time, align 8 - %194 = load i64, i64* %normalize, align 8 - %195 = load i64, i64* %move_time, align 8 - %call250 = call float @_Z12elapsed_timexx(i64 %194, i64 %195) - %conv251 = fpext float %call250 to double - %call252 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str.8, i64 0, i64 0), double %conv251) - %196 = load double, double* %xe, align 8 - %call253 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.9, i64 0, i64 0), double %196) - %197 = load double, double* %ye, align 8 - %call254 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str.10, i64 0, i64 0), double %197) - %198 = load double, double* %xe, align 8 - %199 = load i32, i32* %IszY.addr, align 4 - %conv255 = sitofp i32 %199 to double - %div256 = fdiv double %conv255, 2.000000e+00 - %call257 = call double @_Z11roundDoubled(double %div256) - %conv258 = fptosi double %call257 to i32 - %conv259 = sitofp i32 %conv258 to double - %sub260 = fsub contract double %198, %conv259 - %call261 = call double @_ZSt3powdi(double %sub260, i32 2) - %200 = load double, double* %ye, align 8 - %201 = load i32, i32* %IszX.addr, align 4 - %conv262 = sitofp i32 %201 to double - %div263 = fdiv double %conv262, 2.000000e+00 - %call264 = call double @_Z11roundDoubled(double %div263) - %conv265 = fptosi double %call264 to i32 - %conv266 = sitofp i32 %conv265 to double - %sub267 = fsub contract double %200, %conv266 - %call268 = call double @_ZSt3powdi(double %sub267, i32 2) - %add269 = fadd contract double %call261, %call268 - %call270 = call double @sqrt(double %add269) #10 - store double %call270, double* %distance, align 8 - %202 = load double, double* %distance, align 8 - %call271 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.11, i64 0, i64 0), double %202) - %203 = load double*, double** %weights, align 8 - %arrayidx272 = getelementptr inbounds double, double* %203, i64 0 - %204 = load double, double* %arrayidx272, align 8 - %205 = load double*, double** %CDF, align 8 - %arrayidx273 = getelementptr inbounds double, double* %205, i64 0 - store double %204, double* %arrayidx273, align 8 - store i32 1, i32* %x, align 4 - br label %for.cond274 - -for.cond274: ; preds = %for.inc285, %for.end248 - %206 = load i32, i32* %x, align 4 - %207 = load i32, i32* %Nparticles.addr, align 4 - %cmp275 = icmp slt i32 %206, %207 - br i1 %cmp275, label %for.body276, label %for.end287 - -for.body276: ; preds = %for.cond274 - %208 = load double*, double** %weights, align 8 - %209 = load i32, i32* %x, align 4 - %idxprom277 = sext i32 %209 to i64 - %arrayidx278 = getelementptr inbounds double, double* %208, i64 %idxprom277 - %210 = load double, double* %arrayidx278, align 8 - %211 = load double*, double** %CDF, align 8 - %212 = load i32, i32* %x, align 4 - %sub279 = sub nsw i32 %212, 1 - %idxprom280 = sext i32 %sub279 to i64 - %arrayidx281 = getelementptr inbounds double, double* %211, i64 %idxprom280 - %213 = load double, double* %arrayidx281, align 8 - %add282 = fadd contract double %210, %213 - %214 = load double*, double** %CDF, align 8 - %215 = load i32, i32* %x, align 4 - %idxprom283 = sext i32 %215 to i64 - %arrayidx284 = getelementptr inbounds double, double* %214, i64 %idxprom283 - store double %add282, double* %arrayidx284, align 8 - br label %for.inc285 - -for.inc285: ; preds = %for.body276 - %216 = load i32, i32* %x, align 4 - %inc286 = add nsw i32 %216, 1 - store i32 %inc286, i32* %x, align 4 - br label %for.cond274 - -for.end287: ; preds = %for.cond274 - %call288 = call i64 @_Z8get_timev() - store i64 %call288, i64* %cum_sum, align 8 - %217 = load i64, i64* %move_time, align 8 - %218 = load i64, i64* %cum_sum, align 8 - %call289 = call float @_Z12elapsed_timexx(i64 %217, i64 %218) - %conv290 = fpext float %call289 to double - %call291 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str.12, i64 0, i64 0), double %conv290) - %219 = load i32, i32* %Nparticles.addr, align 4 - %conv292 = sitofp i32 %219 to double - %div293 = fdiv double 1.000000e+00, %conv292 - %220 = load i32*, i32** %seed.addr, align 8 - %call294 = call double @_Z5randuPii(i32* %220, i32 0) - %mul295 = fmul contract double %div293, %call294 - store double %mul295, double* %u1, align 8 - store i32 0, i32* %x, align 4 - br label %for.cond296 - -for.cond296: ; preds = %for.inc305, %for.end287 - %221 = load i32, i32* %x, align 4 - %222 = load i32, i32* %Nparticles.addr, align 4 - %cmp297 = icmp slt i32 %221, %222 - br i1 %cmp297, label %for.body298, label %for.end307 - -for.body298: ; preds = %for.cond296 - %223 = load double, double* %u1, align 8 - %224 = load i32, i32* %x, align 4 - %conv299 = sitofp i32 %224 to double - %225 = load i32, i32* %Nparticles.addr, align 4 - %conv300 = sitofp i32 %225 to double - %div301 = fdiv double %conv299, %conv300 - %add302 = fadd contract double %223, %div301 - %226 = load double*, double** %u, align 8 - %227 = load i32, i32* %x, align 4 - %idxprom303 = sext i32 %227 to i64 - %arrayidx304 = getelementptr inbounds double, double* %226, i64 %idxprom303 - store double %add302, double* %arrayidx304, align 8 - br label %for.inc305 - -for.inc305: ; preds = %for.body298 - %228 = load i32, i32* %x, align 4 - %inc306 = add nsw i32 %228, 1 - store i32 %inc306, i32* %x, align 4 - br label %for.cond296 - -for.end307: ; preds = %for.cond296 - %call308 = call i64 @_Z8get_timev() - store i64 %call308, i64* %u_time, align 8 - %229 = load i64, i64* %cum_sum, align 8 - %230 = load i64, i64* %u_time, align 8 - %call309 = call float @_Z12elapsed_timexx(i64 %229, i64 %230) - %conv310 = fpext float %call309 to double - %call311 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.13, i64 0, i64 0), double %conv310) - %call312 = call i64 @_Z8get_timev() - store i64 %call312, i64* %start_copy, align 8 - %231 = load double*, double** %arrayX_GPU, align 8 - %232 = bitcast double* %231 to i8* - %233 = load double*, double** %arrayX, align 8 - %234 = bitcast double* %233 to i8* - %235 = load i32, i32* %Nparticles.addr, align 4 - %conv313 = sext i32 %235 to i64 - %mul314 = mul i64 8, %conv313 - %call315 = call i32 @cudaMemcpy(i8* %232, i8* %234, i64 %mul314, i32 1) - %236 = load double*, double** %arrayY_GPU, align 8 - %237 = bitcast double* %236 to i8* - %238 = load double*, double** %arrayY, align 8 - %239 = bitcast double* %238 to i8* - %240 = load i32, i32* %Nparticles.addr, align 4 - %conv316 = sext i32 %240 to i64 - %mul317 = mul i64 8, %conv316 - %call318 = call i32 @cudaMemcpy(i8* %237, i8* %239, i64 %mul317, i32 1) - %241 = load double*, double** %xj_GPU, align 8 - %242 = bitcast double* %241 to i8* - %243 = load double*, double** %xj, align 8 - %244 = bitcast double* %243 to i8* - %245 = load i32, i32* %Nparticles.addr, align 4 - %conv319 = sext i32 %245 to i64 - %mul320 = mul i64 8, %conv319 - %call321 = call i32 @cudaMemcpy(i8* %242, i8* %244, i64 %mul320, i32 1) - %246 = load double*, double** %yj_GPU, align 8 - %247 = bitcast double* %246 to i8* - %248 = load double*, double** %yj, align 8 - %249 = bitcast double* %248 to i8* - %250 = load i32, i32* %Nparticles.addr, align 4 - %conv322 = sext i32 %250 to i64 - %mul323 = mul i64 8, %conv322 - %call324 = call i32 @cudaMemcpy(i8* %247, i8* %249, i64 %mul323, i32 1) - %251 = load double*, double** %CDF_GPU, align 8 - %252 = bitcast double* %251 to i8* - %253 = load double*, double** %CDF, align 8 - %254 = bitcast double* %253 to i8* - %255 = load i32, i32* %Nparticles.addr, align 4 - %conv325 = sext i32 %255 to i64 - %mul326 = mul i64 8, %conv325 - %call327 = call i32 @cudaMemcpy(i8* %252, i8* %254, i64 %mul326, i32 1) - %256 = load double*, double** %u_GPU, align 8 - %257 = bitcast double* %256 to i8* - %258 = load double*, double** %u, align 8 - %259 = bitcast double* %258 to i8* - %260 = load i32, i32* %Nparticles.addr, align 4 - %conv328 = sext i32 %260 to i64 - %mul329 = mul i64 8, %conv328 - %call330 = call i32 @cudaMemcpy(i8* %257, i8* %259, i64 %mul329, i32 1) - %call331 = call i64 @_Z8get_timev() - store i64 %call331, i64* %end_copy, align 8 - %261 = load i32, i32* %Nparticles.addr, align 4 - %conv332 = sitofp i32 %261 to double - %div333 = fdiv double %conv332, 1.280000e+02 - %262 = call double @llvm.ceil.f64(double %div333) - %conv334 = fptosi double %262 to i32 - store i32 %conv334, i32* %num_blocks, align 4 - %263 = load i32, i32* %num_blocks, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp, i32 %263, i32 1, i32 1) - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp335, i32 128, i32 1, i32 1) - %264 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %265 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %264, i8* align 4 %265, i64 12, i1 false) - %266 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %267 = load i64, i64* %266, align 4 - %268 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %269 = load i32, i32* %268, align 4 - %270 = bitcast { i64, i32 }* %agg.tmp335.coerce to i8* - %271 = bitcast %struct.dim3* %agg.tmp335 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %270, i8* align 4 %271, i64 12, i1 false) - %272 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp335.coerce, i32 0, i32 0 - %273 = load i64, i64* %272, align 4 - %274 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp335.coerce, i32 0, i32 1 - %275 = load i32, i32* %274, align 4 - %call336 = call i32 @__cudaPushCallConfiguration(i64 %267, i32 %269, i64 %273, i32 %275, i64 0, i8* null) - %tobool = icmp ne i32 %call336, 0 - br i1 %tobool, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %for.end307 - %276 = load double*, double** %arrayX_GPU, align 8 - %277 = load double*, double** %arrayY_GPU, align 8 - %278 = load double*, double** %CDF_GPU, align 8 - %279 = load double*, double** %u_GPU, align 8 - %280 = load double*, double** %xj_GPU, align 8 - %281 = load double*, double** %yj_GPU, align 8 - %282 = load i32, i32* %Nparticles.addr, align 4 - call void @_Z6kernelPdS_S_S_S_S_i(double* %276, double* %277, double* %278, double* %279, double* %280, double* %281, i32 %282) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %for.end307 - %call337 = call i32 @cudaThreadSynchronize() - %call338 = call i64 @_Z8get_timev() - store i64 %call338, i64* %start_copy_back, align 8 - %283 = load double*, double** %yj, align 8 - %284 = bitcast double* %283 to i8* - %285 = load double*, double** %yj_GPU, align 8 - %286 = bitcast double* %285 to i8* - %287 = load i32, i32* %Nparticles.addr, align 4 - %conv339 = sext i32 %287 to i64 - %mul340 = mul i64 8, %conv339 - %call341 = call i32 @cudaMemcpy(i8* %284, i8* %286, i64 %mul340, i32 2) - %288 = load double*, double** %xj, align 8 - %289 = bitcast double* %288 to i8* - %290 = load double*, double** %xj_GPU, align 8 - %291 = bitcast double* %290 to i8* - %292 = load i32, i32* %Nparticles.addr, align 4 - %conv342 = sext i32 %292 to i64 - %mul343 = mul i64 8, %conv342 - %call344 = call i32 @cudaMemcpy(i8* %289, i8* %291, i64 %mul343, i32 2) - %call345 = call i64 @_Z8get_timev() - store i64 %call345, i64* %end_copy_back, align 8 - %293 = load i64, i64* %start_copy, align 8 - %294 = load i64, i64* %end_copy, align 8 - %call346 = call float @_Z12elapsed_timexx(i64 %293, i64 %294) - %conv347 = fpext float %call346 to double - %call348 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.14, i64 0, i64 0), double %conv347) - %295 = load i64, i64* %end_copy, align 8 - %296 = load i64, i64* %start_copy_back, align 8 - %call349 = call float @_Z12elapsed_timexx(i64 %295, i64 %296) - %conv350 = fpext float %call349 to double - %call351 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.15, i64 0, i64 0), double %conv350) - %297 = load i64, i64* %start_copy_back, align 8 - %298 = load i64, i64* %end_copy_back, align 8 - %call352 = call float @_Z12elapsed_timexx(i64 %297, i64 %298) - %conv353 = fpext float %call352 to double - %call354 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.16, i64 0, i64 0), double %conv353) - %call355 = call i64 @_Z8get_timev() - store i64 %call355, i64* %xyj_time, align 8 - %299 = load i64, i64* %u_time, align 8 - %300 = load i64, i64* %xyj_time, align 8 - %call356 = call float @_Z12elapsed_timexx(i64 %299, i64 %300) - %conv357 = fpext float %call356 to double - %call358 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.17, i64 0, i64 0), double %conv357) - store i32 0, i32* %x, align 4 - br label %for.cond359 - -for.cond359: ; preds = %for.inc374, %kcall.end - %301 = load i32, i32* %x, align 4 - %302 = load i32, i32* %Nparticles.addr, align 4 - %cmp360 = icmp slt i32 %301, %302 - br i1 %cmp360, label %for.body361, label %for.end376 - -for.body361: ; preds = %for.cond359 - %303 = load double*, double** %xj, align 8 - %304 = load i32, i32* %x, align 4 - %idxprom362 = sext i32 %304 to i64 - %arrayidx363 = getelementptr inbounds double, double* %303, i64 %idxprom362 - %305 = load double, double* %arrayidx363, align 8 - %306 = load double*, double** %arrayX, align 8 - %307 = load i32, i32* %x, align 4 - %idxprom364 = sext i32 %307 to i64 - %arrayidx365 = getelementptr inbounds double, double* %306, i64 %idxprom364 - store double %305, double* %arrayidx365, align 8 - %308 = load double*, double** %yj, align 8 - %309 = load i32, i32* %x, align 4 - %idxprom366 = sext i32 %309 to i64 - %arrayidx367 = getelementptr inbounds double, double* %308, i64 %idxprom366 - %310 = load double, double* %arrayidx367, align 8 - %311 = load double*, double** %arrayY, align 8 - %312 = load i32, i32* %x, align 4 - %idxprom368 = sext i32 %312 to i64 - %arrayidx369 = getelementptr inbounds double, double* %311, i64 %idxprom368 - store double %310, double* %arrayidx369, align 8 - %313 = load i32, i32* %Nparticles.addr, align 4 - %conv370 = sitofp i32 %313 to double - %div371 = fdiv double 1.000000e+00, %conv370 - %314 = load double*, double** %weights, align 8 - %315 = load i32, i32* %x, align 4 - %idxprom372 = sext i32 %315 to i64 - %arrayidx373 = getelementptr inbounds double, double* %314, i64 %idxprom372 - store double %div371, double* %arrayidx373, align 8 - br label %for.inc374 - -for.inc374: ; preds = %for.body361 - %316 = load i32, i32* %x, align 4 - %inc375 = add nsw i32 %316, 1 - store i32 %inc375, i32* %x, align 4 - br label %for.cond359 - -for.end376: ; preds = %for.cond359 - %call377 = call i64 @_Z8get_timev() - store i64 %call377, i64* %reset, align 8 - %317 = load i64, i64* %xyj_time, align 8 - %318 = load i64, i64* %reset, align 8 - %call378 = call float @_Z12elapsed_timexx(i64 %317, i64 %318) - %conv379 = fpext float %call378 to double - %call380 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str.18, i64 0, i64 0), double %conv379) - br label %for.inc381 - -for.inc381: ; preds = %for.end376 - %319 = load i32, i32* %k, align 4 - %inc382 = add nsw i32 %319, 1 - store i32 %inc382, i32* %k, align 4 - br label %for.cond97 - -for.end383: ; preds = %for.cond97 - %call384 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.19, i64 0, i64 0)) - store i32 0, i32* %i, align 4 - br label %for.cond385 - -for.cond385: ; preds = %for.inc391, %for.end383 - %320 = load i32, i32* %i, align 4 - %cmp386 = icmp slt i32 %320, 10 - br i1 %cmp386, label %for.body387, label %for.end393 - -for.body387: ; preds = %for.cond385 - %321 = load double*, double** %arrayX, align 8 - %322 = load i32, i32* %i, align 4 - %idxprom388 = sext i32 %322 to i64 - %arrayidx389 = getelementptr inbounds double, double* %321, i64 %idxprom388 - %323 = load double, double* %arrayidx389, align 8 - %call390 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.20, i64 0, i64 0), double %323) - br label %for.inc391 - -for.inc391: ; preds = %for.body387 - %324 = load i32, i32* %i, align 4 - %inc392 = add nsw i32 %324, 1 - store i32 %inc392, i32* %i, align 4 - br label %for.cond385 - -for.end393: ; preds = %for.cond385 - %325 = load double*, double** %u_GPU, align 8 - %326 = bitcast double* %325 to i8* - %call394 = call i32 @cudaFree(i8* %326) - %327 = load double*, double** %CDF_GPU, align 8 - %328 = bitcast double* %327 to i8* - %call395 = call i32 @cudaFree(i8* %328) - %329 = load double*, double** %yj_GPU, align 8 - %330 = bitcast double* %329 to i8* - %call396 = call i32 @cudaFree(i8* %330) - %331 = load double*, double** %xj_GPU, align 8 - %332 = bitcast double* %331 to i8* - %call397 = call i32 @cudaFree(i8* %332) - %333 = load double*, double** %arrayY_GPU, align 8 - %334 = bitcast double* %333 to i8* - %call398 = call i32 @cudaFree(i8* %334) - %335 = load double*, double** %arrayX_GPU, align 8 - %336 = bitcast double* %335 to i8* - %call399 = call i32 @cudaFree(i8* %336) - %337 = load i32*, i32** %disk, align 8 - %338 = bitcast i32* %337 to i8* - call void @free(i8* %338) #10 - %339 = load double*, double** %objxy, align 8 - %340 = bitcast double* %339 to i8* - call void @free(i8* %340) #10 - %341 = load double*, double** %weights, align 8 - %342 = bitcast double* %341 to i8* - call void @free(i8* %342) #10 - %343 = load double*, double** %likelihood, align 8 - %344 = bitcast double* %343 to i8* - call void @free(i8* %344) #10 - %345 = load double*, double** %arrayX, align 8 - %346 = bitcast double* %345 to i8* - call void @free(i8* %346) #10 - %347 = load double*, double** %arrayY, align 8 - %348 = bitcast double* %347 to i8* - call void @free(i8* %348) #10 - %349 = load double*, double** %xj, align 8 - %350 = bitcast double* %349 to i8* - call void @free(i8* %350) #10 - %351 = load double*, double** %yj, align 8 - %352 = bitcast double* %351 to i8* - call void @free(i8* %352) #10 - %353 = load double*, double** %CDF, align 8 - %354 = bitcast double* %353 to i8* - call void @free(i8* %354) #10 - %355 = load double*, double** %u, align 8 - %356 = bitcast double* %355 to i8* - call void @free(i8* %356) #10 - %357 = load i32*, i32** %ind, align 8 - %358 = bitcast i32* %357 to i8* - call void @free(i8* %358) #10 - ret void -} - -declare dso_local i32 @cudaMalloc(i8**, i64) #3 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local double @_ZSt4fabsIiEN9__gnu_cxx11__enable_ifIXsr12__is_integerIT_EE7__valueEdE6__typeES2_(i32 %__x) #0 comdat { -entry: - %__x.addr = alloca i32, align 4 - store i32 %__x, i32* %__x.addr, align 4 - %0 = load i32, i32* %__x.addr, align 4 - %conv = sitofp i32 %0 to double - %1 = call double @llvm.fabs.f64(double %conv) - ret double %1 -} - -; Function Attrs: nounwind -declare dso_local double @exp(double) #1 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #3 - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.ceil.f64(double) #6 - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #3 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #0 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -declare dso_local i32 @cudaThreadSynchronize() #3 - -declare dso_local i32 @cudaFree(i8*) #3 - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #8 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %usage = alloca i8*, align 8 - %IszX = alloca i32, align 4 - %IszY = alloca i32, align 4 - %Nfr = alloca i32, align 4 - %Nparticles = alloca i32, align 4 - %seed = alloca i32*, align 8 - %i = alloca i32, align 4 - %I = alloca i32*, align 8 - %start = alloca i64, align 8 - %endVideoSequence = alloca i64, align 8 - %endParticleFilter = alloca i64, align 8 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call i32 @cudaSetDevice(i32 0) - store i8* getelementptr inbounds ([56 x i8], [56 x i8]* @.str.21, i64 0, i64 0), i8** %usage, align 8 - %0 = load i32, i32* %argc.addr, align 4 - %cmp = icmp ne i32 %0, 9 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load i8*, i8** %usage, align 8 - %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.22, i64 0, i64 0), i8* %1) - store i32 0, i32* %retval, align 4 - br label %return - -if.end: ; preds = %entry - %2 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %2, i64 1 - %3 = load i8*, i8** %arrayidx, align 8 - %call2 = call i32 @strcmp(i8* %3, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.23, i64 0, i64 0)) #13 - %tobool = icmp ne i32 %call2, 0 - br i1 %tobool, label %if.then14, label %lor.lhs.false - -lor.lhs.false: ; preds = %if.end - %4 = load i8**, i8*** %argv.addr, align 8 - %arrayidx3 = getelementptr inbounds i8*, i8** %4, i64 3 - %5 = load i8*, i8** %arrayidx3, align 8 - %call4 = call i32 @strcmp(i8* %5, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.24, i64 0, i64 0)) #13 - %tobool5 = icmp ne i32 %call4, 0 - br i1 %tobool5, label %if.then14, label %lor.lhs.false6 - -lor.lhs.false6: ; preds = %lor.lhs.false - %6 = load i8**, i8*** %argv.addr, align 8 - %arrayidx7 = getelementptr inbounds i8*, i8** %6, i64 5 - %7 = load i8*, i8** %arrayidx7, align 8 - %call8 = call i32 @strcmp(i8* %7, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.25, i64 0, i64 0)) #13 - %tobool9 = icmp ne i32 %call8, 0 - br i1 %tobool9, label %if.then14, label %lor.lhs.false10 - -lor.lhs.false10: ; preds = %lor.lhs.false6 - %8 = load i8**, i8*** %argv.addr, align 8 - %arrayidx11 = getelementptr inbounds i8*, i8** %8, i64 7 - %9 = load i8*, i8** %arrayidx11, align 8 - %call12 = call i32 @strcmp(i8* %9, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.26, i64 0, i64 0)) #13 - %tobool13 = icmp ne i32 %call12, 0 - br i1 %tobool13, label %if.then14, label %if.end16 - -if.then14: ; preds = %lor.lhs.false10, %lor.lhs.false6, %lor.lhs.false, %if.end - %10 = load i8*, i8** %usage, align 8 - %call15 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.22, i64 0, i64 0), i8* %10) - store i32 0, i32* %retval, align 4 - br label %return - -if.end16: ; preds = %lor.lhs.false10 - %11 = load i8**, i8*** %argv.addr, align 8 - %arrayidx17 = getelementptr inbounds i8*, i8** %11, i64 2 - %12 = load i8*, i8** %arrayidx17, align 8 - %call18 = call i32 (i8*, i8*, ...) @sscanf(i8* %12, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.27, i64 0, i64 0), i32* %IszX) #10 - %cmp19 = icmp eq i32 %call18, -1 - br i1 %cmp19, label %if.then20, label %if.end22 - -if.then20: ; preds = %if.end16 - %call21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str.28, i64 0, i64 0)) - store i32 0, i32* %retval, align 4 - br label %return - -if.end22: ; preds = %if.end16 - %13 = load i32, i32* %IszX, align 4 - %cmp23 = icmp sle i32 %13, 0 - br i1 %cmp23, label %if.then24, label %if.end26 - -if.then24: ; preds = %if.end22 - %call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.29, i64 0, i64 0)) - store i32 0, i32* %retval, align 4 - br label %return - -if.end26: ; preds = %if.end22 - %14 = load i8**, i8*** %argv.addr, align 8 - %arrayidx27 = getelementptr inbounds i8*, i8** %14, i64 4 - %15 = load i8*, i8** %arrayidx27, align 8 - %call28 = call i32 (i8*, i8*, ...) @sscanf(i8* %15, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.27, i64 0, i64 0), i32* %IszY) #10 - %cmp29 = icmp eq i32 %call28, -1 - br i1 %cmp29, label %if.then30, label %if.end32 - -if.then30: ; preds = %if.end26 - %call31 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([31 x i8], [31 x i8]* @.str.30, i64 0, i64 0)) - store i32 0, i32* %retval, align 4 - br label %return - -if.end32: ; preds = %if.end26 - %16 = load i32, i32* %IszY, align 4 - %cmp33 = icmp sle i32 %16, 0 - br i1 %cmp33, label %if.then34, label %if.end36 - -if.then34: ; preds = %if.end32 - %call35 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.31, i64 0, i64 0)) - store i32 0, i32* %retval, align 4 - br label %return - -if.end36: ; preds = %if.end32 - %17 = load i8**, i8*** %argv.addr, align 8 - %arrayidx37 = getelementptr inbounds i8*, i8** %17, i64 6 - %18 = load i8*, i8** %arrayidx37, align 8 - %call38 = call i32 (i8*, i8*, ...) @sscanf(i8* %18, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.27, i64 0, i64 0), i32* %Nfr) #10 - %cmp39 = icmp eq i32 %call38, -1 - br i1 %cmp39, label %if.then40, label %if.end42 - -if.then40: ; preds = %if.end36 - %call41 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([43 x i8], [43 x i8]* @.str.32, i64 0, i64 0)) - store i32 0, i32* %retval, align 4 - br label %return - -if.end42: ; preds = %if.end36 - %19 = load i32, i32* %Nfr, align 4 - %cmp43 = icmp sle i32 %19, 0 - br i1 %cmp43, label %if.then44, label %if.end46 - -if.then44: ; preds = %if.end42 - %call45 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str.33, i64 0, i64 0)) - store i32 0, i32* %retval, align 4 - br label %return - -if.end46: ; preds = %if.end42 - %20 = load i8**, i8*** %argv.addr, align 8 - %arrayidx47 = getelementptr inbounds i8*, i8** %20, i64 8 - %21 = load i8*, i8** %arrayidx47, align 8 - %call48 = call i32 (i8*, i8*, ...) @sscanf(i8* %21, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.27, i64 0, i64 0), i32* %Nparticles) #10 - %cmp49 = icmp eq i32 %call48, -1 - br i1 %cmp49, label %if.then50, label %if.end52 - -if.then50: ; preds = %if.end46 - %call51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([46 x i8], [46 x i8]* @.str.34, i64 0, i64 0)) - store i32 0, i32* %retval, align 4 - br label %return - -if.end52: ; preds = %if.end46 - %22 = load i32, i32* %Nparticles, align 4 - %cmp53 = icmp sle i32 %22, 0 - br i1 %cmp53, label %if.then54, label %if.end56 - -if.then54: ; preds = %if.end52 - %call55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.35, i64 0, i64 0)) - store i32 0, i32* %retval, align 4 - br label %return - -if.end56: ; preds = %if.end52 - %23 = load i32, i32* %Nparticles, align 4 - %conv = sext i32 %23 to i64 - %mul = mul i64 4, %conv - %call57 = call noalias i8* @malloc(i64 %mul) #10 - %24 = bitcast i8* %call57 to i32* - store i32* %24, i32** %seed, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end56 - %25 = load i32, i32* %i, align 4 - %26 = load i32, i32* %Nparticles, align 4 - %cmp58 = icmp slt i32 %25, %26 - br i1 %cmp58, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %27 = load i32, i32* %i, align 4 - %28 = load i32*, i32** %seed, align 8 - %29 = load i32, i32* %i, align 4 - %idxprom = sext i32 %29 to i64 - %arrayidx59 = getelementptr inbounds i32, i32* %28, i64 %idxprom - store i32 %27, i32* %arrayidx59, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %30 = load i32, i32* %i, align 4 - %inc = add nsw i32 %30, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %31 = load i32, i32* %IszX, align 4 - %conv60 = sext i32 %31 to i64 - %mul61 = mul i64 4, %conv60 - %32 = load i32, i32* %IszY, align 4 - %conv62 = sext i32 %32 to i64 - %mul63 = mul i64 %mul61, %conv62 - %33 = load i32, i32* %Nfr, align 4 - %conv64 = sext i32 %33 to i64 - %mul65 = mul i64 %mul63, %conv64 - %call66 = call noalias i8* @malloc(i64 %mul65) #10 - %34 = bitcast i8* %call66 to i32* - store i32* %34, i32** %I, align 8 - %call67 = call i64 @_Z8get_timev() - store i64 %call67, i64* %start, align 8 - %35 = load i32*, i32** %I, align 8 - %36 = load i32, i32* %IszX, align 4 - %37 = load i32, i32* %IszY, align 4 - %38 = load i32, i32* %Nfr, align 4 - %39 = load i32*, i32** %seed, align 8 - call void @_Z13videoSequencePiiiiS_(i32* %35, i32 %36, i32 %37, i32 %38, i32* %39) - %call68 = call i64 @_Z8get_timev() - store i64 %call68, i64* %endVideoSequence, align 8 - %40 = load i64, i64* %start, align 8 - %41 = load i64, i64* %endVideoSequence, align 8 - %call69 = call float @_Z12elapsed_timexx(i64 %40, i64 %41) - %conv70 = fpext float %call69 to double - %call71 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.36, i64 0, i64 0), double %conv70) - %42 = load i32*, i32** %I, align 8 - %43 = load i32, i32* %IszX, align 4 - %44 = load i32, i32* %IszY, align 4 - %45 = load i32, i32* %Nfr, align 4 - %46 = load i32*, i32** %seed, align 8 - %47 = load i32, i32* %Nparticles, align 4 - call void @_Z14particleFilterPiiiiS_i(i32* %42, i32 %43, i32 %44, i32 %45, i32* %46, i32 %47) - %call72 = call i64 @_Z8get_timev() - store i64 %call72, i64* %endParticleFilter, align 8 - %48 = load i64, i64* %endVideoSequence, align 8 - %49 = load i64, i64* %endParticleFilter, align 8 - %call73 = call float @_Z12elapsed_timexx(i64 %48, i64 %49) - %conv74 = fpext float %call73 to double - %call75 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.37, i64 0, i64 0), double %conv74) - %50 = load i64, i64* %start, align 8 - %51 = load i64, i64* %endParticleFilter, align 8 - %call76 = call float @_Z12elapsed_timexx(i64 %50, i64 %51) - %conv77 = fpext float %call76 to double - %call78 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.38, i64 0, i64 0), double %conv77) - %52 = load i32*, i32** %seed, align 8 - %53 = bitcast i32* %52 to i8* - call void @free(i8* %53) #10 - %54 = load i32*, i32** %I, align 8 - %55 = bitcast i32* %54 to i8* - call void @free(i8* %55) #10 - store i32 0, i32* %retval, align 4 - br label %return - -return: ; preds = %for.end, %if.then54, %if.then50, %if.then44, %if.then40, %if.then34, %if.then30, %if.then24, %if.then20, %if.then14, %if.then - %56 = load i32, i32* %retval, align 4 - ret i32 %56 -} - -declare dso_local i32 @cudaSetDevice(i32) #3 - -; Function Attrs: nounwind readonly -declare dso_local i32 @strcmp(i8*, i8*) #9 - -; Function Attrs: nounwind -declare dso_local i32 @sscanf(i8*, i8*, ...) #1 - -; Function Attrs: nounwind readnone speculatable willreturn -declare double @llvm.powi.f64(double, i32) #6 - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (double*, double*, double*, double*, double*, double*, i32)* @_Z6kernelPdS_S_S_S_S_i to i8*), i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #5 = { argmemonly nounwind willreturn } -attributes #6 = { nounwind readnone speculatable willreturn } -attributes #7 = { nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #9 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #10 = { nounwind } -attributes #11 = { noreturn nounwind } -attributes #12 = { nounwind readnone } -attributes #13 = { nounwind readonly } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/particlefilter/run.sh b/examples/particlefilter/run.sh deleted file mode 100644 index bf29c6b..0000000 --- a/examples/particlefilter/run.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -e -llvm-as ex_particle_CUDA_naive_seq-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as ex_particle_CUDA_naive_seq-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator ex_particle_CUDA_naive_seq-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator ex_particle_CUDA_naive_seq-host-x86_64-unknown-linux-gnu.bc host.bc -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - -g++ -Wall -L../../build/runtime -L../../build/runtime/threadPool \ - -o particlefilter_naive -fPIC -no-pie \ - host.o kernel.o -lc -lx86Runtime -lthreadPool -lpthread -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -./particlefilter_naive -x 128 -y 128 -z 10 -np 1000 > res.log -if grep -q -e "48.550541 48.550541 48.550541 48.550541" res.log; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/pathfinder/pathfinder-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/pathfinder/pathfinder-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index ba65ae1..0000000 --- a/examples/pathfinder/pathfinder-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,462 +0,0 @@ -; ModuleID = 'pathfinder-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "pathfinder.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -@_ZZ14dynproc_kerneliPiS_S_iiiiE4prev = internal addrspace(3) global [256 x i32] undef, align 4 -@_ZZ14dynproc_kerneliPiS_S_iiiiE6result = internal addrspace(3) global [256 x i32] undef, align 4 -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z14dynproc_kerneliPiS_S_iiii(i32 %iteration, i32* %gpuWall, i32* %gpuSrc, i32* %gpuResults, i32 %cols, i32 %rows, i32 %startStep, i32 %border) #0 { -entry: - %iteration.addr = alloca i32, align 4 - %gpuWall.addr = alloca i32*, align 8 - %gpuSrc.addr = alloca i32*, align 8 - %gpuResults.addr = alloca i32*, align 8 - %cols.addr = alloca i32, align 4 - %rows.addr = alloca i32, align 4 - %startStep.addr = alloca i32, align 4 - %border.addr = alloca i32, align 4 - %bx = alloca i32, align 4 - %tx = alloca i32, align 4 - %small_block_cols = alloca i32, align 4 - %blkX = alloca i32, align 4 - %blkXmax = alloca i32, align 4 - %xidx = alloca i32, align 4 - %validXmin = alloca i32, align 4 - %validXmax = alloca i32, align 4 - %W = alloca i32, align 4 - %E = alloca i32, align 4 - %isValid = alloca i8, align 1 - %computed = alloca i8, align 1 - %i = alloca i32, align 4 - %left = alloca i32, align 4 - %up = alloca i32, align 4 - %right = alloca i32, align 4 - %shortest = alloca i32, align 4 - %index = alloca i32, align 4 - store i32 %iteration, i32* %iteration.addr, align 4 - store i32* %gpuWall, i32** %gpuWall.addr, align 8 - store i32* %gpuSrc, i32** %gpuSrc.addr, align 8 - store i32* %gpuResults, i32** %gpuResults.addr, align 8 - store i32 %cols, i32* %cols.addr, align 4 - store i32 %rows, i32* %rows.addr, align 4 - store i32 %startStep, i32* %startStep.addr, align 4 - store i32 %border, i32* %border.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %bx, align 4 - %call1 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call1, i32* %tx, align 4 - %0 = load i32, i32* %iteration.addr, align 4 - %mul = mul nsw i32 %0, 1 - %mul2 = mul nsw i32 %mul, 2 - %sub = sub nsw i32 256, %mul2 - store i32 %sub, i32* %small_block_cols, align 4 - %1 = load i32, i32* %small_block_cols, align 4 - %2 = load i32, i32* %bx, align 4 - %mul3 = mul nsw i32 %1, %2 - %3 = load i32, i32* %border.addr, align 4 - %sub4 = sub nsw i32 %mul3, %3 - store i32 %sub4, i32* %blkX, align 4 - %4 = load i32, i32* %blkX, align 4 - %add = add nsw i32 %4, 256 - %sub5 = sub nsw i32 %add, 1 - store i32 %sub5, i32* %blkXmax, align 4 - %5 = load i32, i32* %blkX, align 4 - %6 = load i32, i32* %tx, align 4 - %add6 = add nsw i32 %5, %6 - store i32 %add6, i32* %xidx, align 4 - %7 = load i32, i32* %blkX, align 4 - %cmp = icmp slt i32 %7, 0 - br i1 %cmp, label %cond.true, label %cond.false - -cond.true: ; preds = %entry - %8 = load i32, i32* %blkX, align 4 - %sub7 = sub nsw i32 0, %8 - br label %cond.end - -cond.false: ; preds = %entry - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %sub7, %cond.true ], [ 0, %cond.false ] - store i32 %cond, i32* %validXmin, align 4 - %9 = load i32, i32* %blkXmax, align 4 - %10 = load i32, i32* %cols.addr, align 4 - %sub8 = sub nsw i32 %10, 1 - %cmp9 = icmp sgt i32 %9, %sub8 - br i1 %cmp9, label %cond.true10, label %cond.false14 - -cond.true10: ; preds = %cond.end - %11 = load i32, i32* %blkXmax, align 4 - %12 = load i32, i32* %cols.addr, align 4 - %sub11 = sub nsw i32 %11, %12 - %add12 = add nsw i32 %sub11, 1 - %sub13 = sub nsw i32 255, %add12 - br label %cond.end15 - -cond.false14: ; preds = %cond.end - br label %cond.end15 - -cond.end15: ; preds = %cond.false14, %cond.true10 - %cond16 = phi i32 [ %sub13, %cond.true10 ], [ 255, %cond.false14 ] - store i32 %cond16, i32* %validXmax, align 4 - %13 = load i32, i32* %tx, align 4 - %sub17 = sub nsw i32 %13, 1 - store i32 %sub17, i32* %W, align 4 - %14 = load i32, i32* %tx, align 4 - %add18 = add nsw i32 %14, 1 - store i32 %add18, i32* %E, align 4 - %15 = load i32, i32* %W, align 4 - %16 = load i32, i32* %validXmin, align 4 - %cmp19 = icmp slt i32 %15, %16 - br i1 %cmp19, label %cond.true20, label %cond.false21 - -cond.true20: ; preds = %cond.end15 - %17 = load i32, i32* %validXmin, align 4 - br label %cond.end22 - -cond.false21: ; preds = %cond.end15 - %18 = load i32, i32* %W, align 4 - br label %cond.end22 - -cond.end22: ; preds = %cond.false21, %cond.true20 - %cond23 = phi i32 [ %17, %cond.true20 ], [ %18, %cond.false21 ] - store i32 %cond23, i32* %W, align 4 - %19 = load i32, i32* %E, align 4 - %20 = load i32, i32* %validXmax, align 4 - %cmp24 = icmp sgt i32 %19, %20 - br i1 %cmp24, label %cond.true25, label %cond.false26 - -cond.true25: ; preds = %cond.end22 - %21 = load i32, i32* %validXmax, align 4 - br label %cond.end27 - -cond.false26: ; preds = %cond.end22 - %22 = load i32, i32* %E, align 4 - br label %cond.end27 - -cond.end27: ; preds = %cond.false26, %cond.true25 - %cond28 = phi i32 [ %21, %cond.true25 ], [ %22, %cond.false26 ] - store i32 %cond28, i32* %E, align 4 - %23 = load i32, i32* %tx, align 4 - %24 = load i32, i32* %validXmin, align 4 - %cmp29 = icmp sge i32 %23, %24 - br i1 %cmp29, label %land.rhs, label %land.end - -land.rhs: ; preds = %cond.end27 - %25 = load i32, i32* %tx, align 4 - %26 = load i32, i32* %validXmax, align 4 - %cmp30 = icmp sle i32 %25, %26 - br label %land.end - -land.end: ; preds = %land.rhs, %cond.end27 - %27 = phi i1 [ false, %cond.end27 ], [ %cmp30, %land.rhs ] - %frombool = zext i1 %27 to i8 - store i8 %frombool, i8* %isValid, align 1 - %28 = load i32, i32* %xidx, align 4 - %cmp31 = icmp sge i32 %28, 0 - br i1 %cmp31, label %land.lhs.true, label %if.end - -land.lhs.true: ; preds = %land.end - %29 = load i32, i32* %xidx, align 4 - %30 = load i32, i32* %cols.addr, align 4 - %sub32 = sub nsw i32 %30, 1 - %cmp33 = icmp sle i32 %29, %sub32 - br i1 %cmp33, label %if.then, label %if.end - -if.then: ; preds = %land.lhs.true - %31 = load i32*, i32** %gpuSrc.addr, align 8 - %32 = load i32, i32* %xidx, align 4 - %idxprom = sext i32 %32 to i64 - %arrayidx = getelementptr inbounds i32, i32* %31, i64 %idxprom - %33 = load i32, i32* %arrayidx, align 4 - %34 = load i32, i32* %tx, align 4 - %idxprom34 = sext i32 %34 to i64 - %arrayidx35 = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ14dynproc_kerneliPiS_S_iiiiE4prev to [256 x i32]*), i64 0, i64 %idxprom34 - store i32 %33, i32* %arrayidx35, align 4 - br label %if.end - -if.end: ; preds = %if.then, %land.lhs.true, %land.end - call void @llvm.nvvm.barrier0() - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end - %35 = load i32, i32* %i, align 4 - %36 = load i32, i32* %iteration.addr, align 4 - %cmp36 = icmp slt i32 %35, %36 - br i1 %cmp36, label %for.body, label %for.end - -for.body: ; preds = %for.cond - store i8 0, i8* %computed, align 1 - %37 = load i32, i32* %tx, align 4 - %38 = load i32, i32* %i, align 4 - %add37 = add nsw i32 %38, 1 - %cmp38 = icmp sge i32 %37, %add37 - br i1 %cmp38, label %land.lhs.true39, label %if.end69 - -land.lhs.true39: ; preds = %for.body - %39 = load i32, i32* %tx, align 4 - %40 = load i32, i32* %i, align 4 - %sub40 = sub nsw i32 256, %40 - %sub41 = sub nsw i32 %sub40, 2 - %cmp42 = icmp sle i32 %39, %sub41 - br i1 %cmp42, label %land.lhs.true43, label %if.end69 - -land.lhs.true43: ; preds = %land.lhs.true39 - %41 = load i8, i8* %isValid, align 1 - %tobool = trunc i8 %41 to i1 - br i1 %tobool, label %if.then44, label %if.end69 - -if.then44: ; preds = %land.lhs.true43 - store i8 1, i8* %computed, align 1 - %42 = load i32, i32* %W, align 4 - %idxprom45 = sext i32 %42 to i64 - %arrayidx46 = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ14dynproc_kerneliPiS_S_iiiiE4prev to [256 x i32]*), i64 0, i64 %idxprom45 - %43 = load i32, i32* %arrayidx46, align 4 - store i32 %43, i32* %left, align 4 - %44 = load i32, i32* %tx, align 4 - %idxprom47 = sext i32 %44 to i64 - %arrayidx48 = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ14dynproc_kerneliPiS_S_iiiiE4prev to [256 x i32]*), i64 0, i64 %idxprom47 - %45 = load i32, i32* %arrayidx48, align 4 - store i32 %45, i32* %up, align 4 - %46 = load i32, i32* %E, align 4 - %idxprom49 = sext i32 %46 to i64 - %arrayidx50 = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ14dynproc_kerneliPiS_S_iiiiE4prev to [256 x i32]*), i64 0, i64 %idxprom49 - %47 = load i32, i32* %arrayidx50, align 4 - store i32 %47, i32* %right, align 4 - %48 = load i32, i32* %left, align 4 - %49 = load i32, i32* %up, align 4 - %cmp51 = icmp sle i32 %48, %49 - br i1 %cmp51, label %cond.true52, label %cond.false53 - -cond.true52: ; preds = %if.then44 - %50 = load i32, i32* %left, align 4 - br label %cond.end54 - -cond.false53: ; preds = %if.then44 - %51 = load i32, i32* %up, align 4 - br label %cond.end54 - -cond.end54: ; preds = %cond.false53, %cond.true52 - %cond55 = phi i32 [ %50, %cond.true52 ], [ %51, %cond.false53 ] - store i32 %cond55, i32* %shortest, align 4 - %52 = load i32, i32* %shortest, align 4 - %53 = load i32, i32* %right, align 4 - %cmp56 = icmp sle i32 %52, %53 - br i1 %cmp56, label %cond.true57, label %cond.false58 - -cond.true57: ; preds = %cond.end54 - %54 = load i32, i32* %shortest, align 4 - br label %cond.end59 - -cond.false58: ; preds = %cond.end54 - %55 = load i32, i32* %right, align 4 - br label %cond.end59 - -cond.end59: ; preds = %cond.false58, %cond.true57 - %cond60 = phi i32 [ %54, %cond.true57 ], [ %55, %cond.false58 ] - store i32 %cond60, i32* %shortest, align 4 - %56 = load i32, i32* %cols.addr, align 4 - %57 = load i32, i32* %startStep.addr, align 4 - %58 = load i32, i32* %i, align 4 - %add61 = add nsw i32 %57, %58 - %mul62 = mul nsw i32 %56, %add61 - %59 = load i32, i32* %xidx, align 4 - %add63 = add nsw i32 %mul62, %59 - store i32 %add63, i32* %index, align 4 - %60 = load i32, i32* %shortest, align 4 - %61 = load i32*, i32** %gpuWall.addr, align 8 - %62 = load i32, i32* %index, align 4 - %idxprom64 = sext i32 %62 to i64 - %arrayidx65 = getelementptr inbounds i32, i32* %61, i64 %idxprom64 - %63 = load i32, i32* %arrayidx65, align 4 - %add66 = add nsw i32 %60, %63 - %64 = load i32, i32* %tx, align 4 - %idxprom67 = sext i32 %64 to i64 - %arrayidx68 = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ14dynproc_kerneliPiS_S_iiiiE6result to [256 x i32]*), i64 0, i64 %idxprom67 - store i32 %add66, i32* %arrayidx68, align 4 - br label %if.end69 - -if.end69: ; preds = %cond.end59, %land.lhs.true43, %land.lhs.true39, %for.body - call void @llvm.nvvm.barrier0() - %65 = load i32, i32* %i, align 4 - %66 = load i32, i32* %iteration.addr, align 4 - %sub70 = sub nsw i32 %66, 1 - %cmp71 = icmp eq i32 %65, %sub70 - br i1 %cmp71, label %if.then72, label %if.end73 - -if.then72: ; preds = %if.end69 - br label %for.end - -if.end73: ; preds = %if.end69 - %67 = load i8, i8* %computed, align 1 - %tobool74 = trunc i8 %67 to i1 - br i1 %tobool74, label %if.then75, label %if.end80 - -if.then75: ; preds = %if.end73 - %68 = load i32, i32* %tx, align 4 - %idxprom76 = sext i32 %68 to i64 - %arrayidx77 = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ14dynproc_kerneliPiS_S_iiiiE6result to [256 x i32]*), i64 0, i64 %idxprom76 - %69 = load i32, i32* %arrayidx77, align 4 - %70 = load i32, i32* %tx, align 4 - %idxprom78 = sext i32 %70 to i64 - %arrayidx79 = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ14dynproc_kerneliPiS_S_iiiiE4prev to [256 x i32]*), i64 0, i64 %idxprom78 - store i32 %69, i32* %arrayidx79, align 4 - br label %if.end80 - -if.end80: ; preds = %if.then75, %if.end73 - call void @llvm.nvvm.barrier0() - br label %for.inc - -for.inc: ; preds = %if.end80 - %71 = load i32, i32* %i, align 4 - %inc = add nsw i32 %71, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %if.then72, %for.cond - %72 = load i8, i8* %computed, align 1 - %tobool81 = trunc i8 %72 to i1 - br i1 %tobool81, label %if.then82, label %if.end87 - -if.then82: ; preds = %for.end - %73 = load i32, i32* %tx, align 4 - %idxprom83 = sext i32 %73 to i64 - %arrayidx84 = getelementptr inbounds [256 x i32], [256 x i32]* addrspacecast ([256 x i32] addrspace(3)* @_ZZ14dynproc_kerneliPiS_S_iiiiE6result to [256 x i32]*), i64 0, i64 %idxprom83 - %74 = load i32, i32* %arrayidx84, align 4 - %75 = load i32*, i32** %gpuResults.addr, align 8 - %76 = load i32, i32* %xidx, align 4 - %idxprom85 = sext i32 %76 to i64 - %arrayidx86 = getelementptr inbounds i32, i32* %75, i64 %idxprom85 - store i32 %74, i32* %arrayidx86, align 4 - br label %if.end87 - -if.end87: ; preds = %if.then82, %for.end - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: convergent nounwind -declare void @llvm.nvvm.barrier0() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind } -attributes #3 = { nounwind readnone } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !4, !6, !6, !6, !6, !7, !7, !6} -!llvm.ident = !{!8} -!nvvmir.version = !{!9} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (i32, i32*, i32*, i32*, i32, i32, i32, i32)* @_Z14dynproc_kerneliPiS_S_iiii, !"kernel", i32 1} -!4 = !{null, !"align", i32 8} -!5 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!6 = !{null, !"align", i32 16} -!7 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!8 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!9 = !{i32 1, i32 4} diff --git a/examples/pathfinder/pathfinder-host-x86_64-unknown-linux-gnu.ll b/examples/pathfinder/pathfinder-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 720c1b8..0000000 --- a/examples/pathfinder/pathfinder-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,745 +0,0 @@ -; ModuleID = 'pathfinder-host-x86_64-unknown-linux-gnu.bc' -source_filename = "pathfinder.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque - -$_ZN4dim3C2Ejjj = comdat any - -@rows = dso_local global i32 0, align 4 -@cols = dso_local global i32 0, align 4 -@data = dso_local global i32* null, align 8 -@wall = dso_local global i32** null, align 8 -@result = dso_local global i32* null, align 8 -@pyramid_height = dso_local global i32 0, align 4 -@.str = private unnamed_addr constant [47 x i8] c"Usage: dynproc row_len col_len pyramid_height\0A\00", align 1 -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str.1 = private unnamed_addr constant [11 x i8] c"error: %s\0A\00", align 1 -@.str.2 = private unnamed_addr constant [92 x i8] c"pyramidHeight: %d\0AgridSize: [%d]\0Aborder:[%d]\0AblockSize: %d\0AblockGrid:[%d]\0AtargetBlock:[%d]\0A\00", align 1 -@.str.3 = private unnamed_addr constant [4 x i8] c"%d \00", align 1 -@.str.4 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 -@0 = private unnamed_addr constant [30 x i8] c"_Z14dynproc_kerneliPiS_S_iiii\00", align 1 -@1 = private constant [20737 x i8] c"P\EDU\BA\01\00\10\00\F0P\00\00\00\00\00\00\02\00\01\01@\00\00\00\E8B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00@B\00\00\00\00\00\00\C0?\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0A\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z14dynproc_kerneliPiS_S_iiii\00.nv.info._Z14dynproc_kerneliPiS_S_iiii\00.nv.shared._Z14dynproc_kerneliPiS_S_iiii\00.nv.global\00.nv.constant0._Z14dynproc_kerneliPiS_S_iiii\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z14dynproc_kerneliPiS_S_iiii\00.text._Z14dynproc_kerneliPiS_S_iiii\00.nv.info._Z14dynproc_kerneliPiS_S_iiii\00.nv.shared._Z14dynproc_kerneliPiS_S_iiii\00.nv.global\00blockIdx\00threadIdx\00$___ZZ14dynproc_kerneliPiS_S_iiiiE4prev__187\00$___ZZ14dynproc_kerneliPiS_S_iiiiE6result__189\00.nv.constant0._Z14dynproc_kerneliPiS_S_iiii\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00P\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\9B\00\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\C4\00\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\CF\00\00\00\01\00\09\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\D8\00\00\00\01\00\09\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00>\01\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00\00:\00\00\00\00\00\00\04/\08\00\07\00\00\00\11\00\00\00\04#\08\00\07\00\00\00\00\00\00\00\04\12\08\00\07\00\00\00x\00\00\00\04\11\08\00\07\00\00\00x\00\00\00\010\00\00\01*\00\00\04\0A\08\00\06\00\00\00@\010\00\03\190\00\04\17\0C\00\00\00\00\00\07\00,\00\00\F0\11\00\04\17\0C\00\00\00\00\00\06\00(\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00$\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0\11\00\03\1B\FF\00\04\1D\04\00\E8\05\00\00\04\1C\04\00\D89\00\00\04\1E\04\00\80\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F0\07visible .entry _Z14dyn\E2\00\F6\03_kerneliPiS_S_iiii\A6\04\00r\00\0F+\00\0A\0E\8D\04\00\D3\00\0F3\00\11\1F13\00\1F\1F23\00\1F/3,\CC\00\1E\1F43\00\1F\1F53\00\1F\1F63\00\1F\1F7\C2\04\13O6[12\C3\04\16\A6pred %p<21\C5\04\AB16 %rs<10>\E9\04=105\EB\04 56\EC\04P\09.shaK\00\03\97\00\124\97\00\1FZ\D6\00\09\CFE4prev[1024]C\00%t6resultE\00\0Fs\05\08\1F6s\05\19\00!\04\0F\92\01\12\0E\06\05/20<\00\14\1F6B\05\00\1F9<\00\14\1F5<\00\00\1F8<\00\14\0F\0F\06\02\0F<\00\14\1F3\A2\05\01\0F<\00\14\0F\DE\05\01\0Fh\01\15\1F1\F0\00\00\1F7<\00\14#0]\FA\01#to\B6\15\04~\00\144r\05\01\1F\00\0A\1C\00\115\1C\00\1F4;\00\05\146\DA\05\0F;\00\00\117\1C\00\1F6;\00\05\148+\06\0F;\00\00\119\1C\00\1A8\04\06\03r\0E\1F7]\06\02\1A9\16\00\03]\06/d7^\06\03\1F5^\06\02\1B1q\00\133\A2\06\1A9\17\00\134,\0B\1B0\17\00\02\\\00\192I\0F\CB22, %ctaid.x/\00\02\B9\00\192\D3\06n23, %t-\00\135\FF\06\113\FF\01\03,\00$4,\18\01S;\0Ashl\9D\04325,\1D\00\0A\89\00\D26, 256;\0Asub.s\13\00#7,\19\00\006\00\0Bq\00\02\FB\00(27q\00%8,\1D\00\08\17\00%9,\D2\00\83;\0Amul.lob\00330,8\00\00'\00\074\00531,5\01\08\93\00532,7\00\1B3'\12\136\CD\07\182H\00%3,\1D\00T;\0AaddH\00#4,\1E\00+25\DA\00\126\A7\01(34G\00\185G\00\06\17\00%6,\7F\01\09^\00#7,5\00\00$\00\0B_\00\02\D7\01\1839\01(38_\00rsetp.gtL\003p3,!\00\F2\0D-1;\0A@%p3 bra LBB6_2;\0Abra.uni\10\0021;\0A\08\00\16:[\00(40[\00Tneg.s\84\0A\00\1D\00\08\02\02#99o\09\09U\00\133U\00(2:Z\09?9, <\00\00/39=\00\04*3:m\0E\001\00\0A4\03\157\C0\09\06\BB\00%1,\92\01\08\17\00%2,\95\03\08u\01343,\1E\00#-1E\01#leE\01#4,P\00\00'\00\01G\01\164G\01\1B5\B5\00\134\B5\00\184G\01\1F5\8C\00\03\186\8C\00\06\A7\02347,\1E\00\00;\00\09\1B\02\13,\1F\009254Q\13\120\D4\02\0B\8C\00\136\8C\00\185A\01#44\B7\02\0D?\00/44@\00\04\196@\00\12,2\00\0B\82\01\02\D2\03\08\F7\02/48\E0\02\03349,\1E\00\1C-\C0\15\02\84\03\184\CC\03/50F\00\03351,\1E\00\0CE\00\02\82\03(51E\00\04\F6\01\198\82\03553,@\02\04:\03\04\F5\01#5,8\00\00'\00\01\F5\01\165\F5\01\1B8)\01\137)\01\177\F5\01\185\\\00\08l\01\02\B9\00\0BB\00\139B\00\178B\00\186\B5\00\0DB\00\1F6B\00\04\189\AD\01\227,1\00\0Fh\01\02\08E\04\145\EF\05\198\A3\02\155\C6\00,6]\17\03#6,8\00\00'\00\01\22\01\166\22\01\1C1\0A\04$10`\04\170\E3\00\188_\00\08\E3\00\01y\01\0Cg\01$12D\00\08\A5\04\189\BA\00\0DD\00\1F9D\00\06\09\94\04210,4\00\0FM\02\02(10\14\01\09\B8\05\06\17\00\1D7M\02\14l\87\05#8,8\00\00'\00\02\A5\00\03\12\0C\157\0B\05\03\11\00\00\D9\0A1%p7P\01\178P\01\0C\9C\03\141\1E\05(13u\02\1F8\9C\00\03\1F9\C7\01\07#1,8\00\00'\00\0F\8B\00\00\0D\C9\01\04\E2\04914:.\00\11,3\00\00_\00Blp.u\DE\0Ca1, 1, H\00\03Z\01\138Y\01\02\C9\06\18s\A6\03\156n\06\1D8B\01#9,!\00\110\1A\01\179\1A\01\1C7\A1\00\145\A1\00\185M\03\181]\00\06\17\00\0F\10\06\04\116k\05\186\10\06\05U\07\02,\02\126\86\03\02\1C\04/10\91\00\09\04H\05\2216\91\00\03\96\0B%0,N\0A\01\92\00\02z\00)d1\AA\00\01{\09\031\00$2, \00\132\AC\00\03\19\00$3,Q\00\01'\00\08\E0\00\01\D8\03\00#\00\0Ad\00\184(\02\08d\00$5, \00\172\E9\0D_rd16,i\0E\12\03q\0B\02e\0E\05=\00\02\AD\0B*16\B7\00(8,\1D\00\08J\09\00\1D\00\01!\02\1D6\F8\02\04l\05\CA17:\0Abar.syncO\08K65, \9F\06\129!\08\1C6w\05$18N\00\09y\05\04\E2\03\199F\03(67\13\0B\09.\06\00\94\01\028\00\00'\00\02\13\02\161\13\02\1D3x\00\04\AE\05\141\AF\05\020\03\152\BA\00\04(\03\129(\03\09\D4\01\0F\E5\03\03(69\B6\00\06\CB\02370,\1E\00\1C1V\03\02R\02\116\88\01\127Z\03'12#\0A\0C=\05\142\C5\05\182\C5\05/71\90\00\02(72\90\00\07\94\06\133\C0\08\06\F2\08374,\19\00\007\00\0Dp\03#3,f\00\00*\00\02\A6\00\1F3\A6\00\09\04\CB\0A\132'\06\108B\01%3,z\04T;\0Aand\81\11#4,\1E\00\04\1E\01\22eq\1B\00\10p\05\03\02!\00\00=\0B\10!\11\00\0Fv\00\09\04Y\06)22\D3\01\155\A2\08\0B\D3\01\195\91\03\1D9\ED\07\03\F8\0F\1F0x\03(\2221A\0F\190\E4\03\016\0E\02s\00\0AH\04\01 \0E\066\00\08t\02\2275H\04;23]Z\03\02\F9\09\197\CE\00\1F2_\04\05\02?\0E\01 \00\0B{\00\196{\00\185{\00\136{\00\1B6{\00#10\C8\08\196|\00\187\10\08\08|\00$8, \00\0B|\00\199|\00\188|\00\137|\00\1D9|\00\02\1A\08\187E\09\147\99\03\199\C3\0E679,\B1\00\0D\DD\02#5,:\00\00)\00\02\DD\02\07j\0A\1C2\00\05\152S\0D\08\F8\07\054\06\1C9I\09\01\D3\01\0D\8E\09$25F\00\08\A8\0C)12\A9\00\0EG\00\1F2G\00\06\09c\0C\02\B1\03<1034\01\03\A1!\08h\10\158\06\07\190\99\07681,j\01\0D5\01#6,;\00\00)\00\025\01\07}\0A,27\A8\00\04\82\07(26\EF\00\194{\00\08\EF\00\02\E5\0C\0D|\01\04.\06\182\E9\0B)15\AA\00\0EG\00\1F5G\00\06\1986\01\226,5\00\0F6\01\04\186\1E\01\09\B7\08\06\17\00\04\E5\04\194\E8\0C/84\1A\06\03385,5\00\00$\00\0B^\11386,i\00\00)\00\08N\00\187\A0\08\07N\00&8,7\00\1B7\C5\00\03\86#\188]\03-89\FB\01\03(\15\04\14\02\198\B5\08\05\D7\11\00N\00\09\DB\03\02\D9\11-d3\1A\09\00\A4\11\03Q\00\01'\00\07\DD\11\2290\DB\03)33\C6\00391,\9B\00\00#\00\09\7F\00\09\D5\04\07\A3\05\00\AB\11\0FA\17\13\0F\A5\05\02\2236\9D\00\1A5\D3\00$7,u\00\0B\D3\00(8,6\00\1976\09\123,\02\1D9\F0\03\04p\08*296\09\06\0F\01\1F2\F4\08\02'93\F4\08\07&\01#4,\1D\00\05\1F\0B\15n\0C\09#7,P\00\00(\00\02\9D\03\177\0C\09\0D$\12\04U\08;30:\1A\00\04_\04\183\C9\07\05\17\0F\1A9\C9\07#7,\1E\00\0E\C9\07#8,!\00\04\C9\07\07Z\0D\1C36\11$32v\00(2:\FF\01\1F9\D4\06\04\00%\13\03 \00\0A3\0B/41\18\02*\124\D5\02)41\FF\01\02\A9\12\05\1D\00\09\C3\01\03\A4\07:43]\88\00\1F4C\08(\134\A3\17\1A4\86\00(6,\1D\00\190\85\02\124\13\08\1C9\F8\04\153\D1\13\1A3\85\02\0A&\00\04`\0E\183\9B\06/96\C3\04\03\129m\02\1D6\07\12\03\F2\0B/97\F2\0B\05'35T\02\1F8T\02\04#9,\1E\00\0ET\02\02\1B\00\169T\02\07\94\0E\1C3v\00$36v\00\186T\02/47T\02\05$8, \00\0BT\02\1F9T\02*\2250X\00\199\CE\01\02L\13\05\1D\00\08\A4\05\2298T\02)51\A2\05\05Q\13)24\A3\05/53\BC\0E\04\02(\12\01 \00\0A\CF\04\00b\12\03Q\00\01'\00\09J\02\2255\C5\01\0C\09\12$37O\01\B07:\0Aret;\0A\0A}\0A\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([20737 x i8], [20737 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z4initiPPc(i32 %argc, i8** %argv) #0 { -entry: - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %n = alloca i32, align 4 - %seed = alloca i32, align 4 - %i = alloca i32, align 4 - %j = alloca i32, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %0 = load i32, i32* %argc.addr, align 4 - %cmp = icmp eq i32 %0, 4 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %1 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %1, i64 1 - %2 = load i8*, i8** %arrayidx, align 8 - %call = call i32 @atoi(i8* %2) #11 - store i32 %call, i32* @cols, align 4 - %3 = load i8**, i8*** %argv.addr, align 8 - %arrayidx1 = getelementptr inbounds i8*, i8** %3, i64 2 - %4 = load i8*, i8** %arrayidx1, align 8 - %call2 = call i32 @atoi(i8* %4) #11 - store i32 %call2, i32* @rows, align 4 - %5 = load i8**, i8*** %argv.addr, align 8 - %arrayidx3 = getelementptr inbounds i8*, i8** %5, i64 3 - %6 = load i8*, i8** %arrayidx3, align 8 - %call4 = call i32 @atoi(i8* %6) #11 - store i32 %call4, i32* @pyramid_height, align 4 - br label %if.end - -if.else: ; preds = %entry - %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str, i64 0, i64 0)) - call void @exit(i32 0) #12 - unreachable - -if.end: ; preds = %if.then - %7 = load i32, i32* @rows, align 4 - %8 = load i32, i32* @cols, align 4 - %mul = mul nsw i32 %7, %8 - %9 = sext i32 %mul to i64 - %10 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %9, i64 4) - %11 = extractvalue { i64, i1 } %10, 1 - %12 = extractvalue { i64, i1 } %10, 0 - %13 = select i1 %11, i64 -1, i64 %12 - %call6 = call i8* @_Znam(i64 %13) #13 - %14 = bitcast i8* %call6 to i32* - store i32* %14, i32** @data, align 8 - %15 = load i32, i32* @rows, align 4 - %16 = sext i32 %15 to i64 - %17 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %16, i64 8) - %18 = extractvalue { i64, i1 } %17, 1 - %19 = extractvalue { i64, i1 } %17, 0 - %20 = select i1 %18, i64 -1, i64 %19 - %call7 = call i8* @_Znam(i64 %20) #13 - %21 = bitcast i8* %call7 to i32** - store i32** %21, i32*** @wall, align 8 - store i32 0, i32* %n, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end - %22 = load i32, i32* %n, align 4 - %23 = load i32, i32* @rows, align 4 - %cmp8 = icmp slt i32 %22, %23 - br i1 %cmp8, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %24 = load i32*, i32** @data, align 8 - %25 = load i32, i32* @cols, align 4 - %26 = load i32, i32* %n, align 4 - %mul9 = mul nsw i32 %25, %26 - %idx.ext = sext i32 %mul9 to i64 - %add.ptr = getelementptr inbounds i32, i32* %24, i64 %idx.ext - %27 = load i32**, i32*** @wall, align 8 - %28 = load i32, i32* %n, align 4 - %idxprom = sext i32 %28 to i64 - %arrayidx10 = getelementptr inbounds i32*, i32** %27, i64 %idxprom - store i32* %add.ptr, i32** %arrayidx10, align 8 - br label %for.inc - -for.inc: ; preds = %for.body - %29 = load i32, i32* %n, align 4 - %inc = add nsw i32 %29, 1 - store i32 %inc, i32* %n, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %30 = load i32, i32* @cols, align 4 - %31 = sext i32 %30 to i64 - %32 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %31, i64 4) - %33 = extractvalue { i64, i1 } %32, 1 - %34 = extractvalue { i64, i1 } %32, 0 - %35 = select i1 %33, i64 -1, i64 %34 - %call11 = call i8* @_Znam(i64 %35) #13 - %36 = bitcast i8* %call11 to i32* - store i32* %36, i32** @result, align 8 - store i32 9, i32* %seed, align 4 - %37 = load i32, i32* %seed, align 4 - call void @srand(i32 %37) #14 - store i32 0, i32* %i, align 4 - br label %for.cond12 - -for.cond12: ; preds = %for.inc26, %for.end - %38 = load i32, i32* %i, align 4 - %39 = load i32, i32* @rows, align 4 - %cmp13 = icmp slt i32 %38, %39 - br i1 %cmp13, label %for.body14, label %for.end28 - -for.body14: ; preds = %for.cond12 - store i32 0, i32* %j, align 4 - br label %for.cond15 - -for.cond15: ; preds = %for.inc23, %for.body14 - %40 = load i32, i32* %j, align 4 - %41 = load i32, i32* @cols, align 4 - %cmp16 = icmp slt i32 %40, %41 - br i1 %cmp16, label %for.body17, label %for.end25 - -for.body17: ; preds = %for.cond15 - %call18 = call i32 @rand() #14 - %rem = srem i32 %call18, 10 - %42 = load i32**, i32*** @wall, align 8 - %43 = load i32, i32* %i, align 4 - %idxprom19 = sext i32 %43 to i64 - %arrayidx20 = getelementptr inbounds i32*, i32** %42, i64 %idxprom19 - %44 = load i32*, i32** %arrayidx20, align 8 - %45 = load i32, i32* %j, align 4 - %idxprom21 = sext i32 %45 to i64 - %arrayidx22 = getelementptr inbounds i32, i32* %44, i64 %idxprom21 - store i32 %rem, i32* %arrayidx22, align 4 - br label %for.inc23 - -for.inc23: ; preds = %for.body17 - %46 = load i32, i32* %j, align 4 - %inc24 = add nsw i32 %46, 1 - store i32 %inc24, i32* %j, align 4 - br label %for.cond15 - -for.end25: ; preds = %for.cond15 - br label %for.inc26 - -for.inc26: ; preds = %for.end25 - %47 = load i32, i32* %i, align 4 - %inc27 = add nsw i32 %47, 1 - store i32 %inc27, i32* %i, align 4 - br label %for.cond12 - -for.end28: ; preds = %for.cond12 - ret void -} - -; Function Attrs: nounwind readonly -declare dso_local i32 @atoi(i8*) #1 - -declare dso_local i32 @printf(i8*, ...) #2 - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #3 - -; Function Attrs: nounwind readnone speculatable willreturn -declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #4 - -; Function Attrs: nobuiltin -declare dso_local noalias i8* @_Znam(i64) #5 - -; Function Attrs: nounwind -declare dso_local void @srand(i32) #6 - -; Function Attrs: nounwind -declare dso_local i32 @rand() #6 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z5fatalPc(i8* %s) #0 { -entry: - %s.addr = alloca i8*, align 8 - store i8* %s, i8** %s.addr, align 8 - %0 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %1 = load i8*, i8** %s.addr, align 8 - %call = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %0, i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.1, i64 0, i64 0), i8* %1) - ret void -} - -declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #2 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z14dynproc_kerneliPiS_S_iiii(i32 %iteration, i32* %gpuWall, i32* %gpuSrc, i32* %gpuResults, i32 %cols, i32 %rows, i32 %startStep, i32 %border) #0 { -entry: - %iteration.addr = alloca i32, align 4 - %gpuWall.addr = alloca i32*, align 8 - %gpuSrc.addr = alloca i32*, align 8 - %gpuResults.addr = alloca i32*, align 8 - %cols.addr = alloca i32, align 4 - %rows.addr = alloca i32, align 4 - %startStep.addr = alloca i32, align 4 - %border.addr = alloca i32, align 4 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i32 %iteration, i32* %iteration.addr, align 4 - store i32* %gpuWall, i32** %gpuWall.addr, align 8 - store i32* %gpuSrc, i32** %gpuSrc.addr, align 8 - store i32* %gpuResults, i32** %gpuResults.addr, align 8 - store i32 %cols, i32* %cols.addr, align 4 - store i32 %rows, i32* %rows.addr, align 4 - store i32 %startStep, i32* %startStep.addr, align 4 - store i32 %border, i32* %border.addr, align 4 - %kernel_args = alloca i8*, i64 8, align 16 - %0 = bitcast i32* %iteration.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32** %gpuWall.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i32** %gpuSrc.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast i32** %gpuResults.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %cols.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32* %rows.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = bitcast i32* %startStep.addr to i8* - %13 = getelementptr i8*, i8** %kernel_args, i32 6 - store i8* %12, i8** %13 - %14 = bitcast i32* %border.addr to i8* - %15 = getelementptr i8*, i8** %kernel_args, i32 7 - store i8* %14, i8** %15 - %16 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %17 = load i64, i64* %shmem_size, align 8 - %18 = load i8*, i8** %stream, align 8 - %19 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %20 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %19, i8* align 8 %20, i64 12, i1 false) - %21 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %22 = load i64, i64* %21, align 8 - %23 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %24 = load i32, i32* %23, align 8 - %25 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %26 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %25, i8* align 8 %26, i64 12, i1 false) - %27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %28 = load i64, i64* %27, align 8 - %29 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %30 = load i32, i32* %29, align 8 - %31 = bitcast i8* %18 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32, i32*, i32*, i32*, i32, i32, i32, i32)* @_Z14dynproc_kerneliPiS_S_iiii to i8*), i64 %22, i32 %24, i64 %28, i32 %30, i8** %kernel_args, i64 %17, %struct.CUstream_st* %31) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #7 - -; Function Attrs: noinline optnone uwtable -define dso_local i32 @_Z9calc_pathPiPS_iiiii(i32* %gpuWall, i32** %gpuResult, i32 %rows, i32 %cols, i32 %pyramid_height, i32 %blockCols, i32 %borderCols) #0 { -entry: - %gpuWall.addr = alloca i32*, align 8 - %gpuResult.addr = alloca i32**, align 8 - %rows.addr = alloca i32, align 4 - %cols.addr = alloca i32, align 4 - %pyramid_height.addr = alloca i32, align 4 - %blockCols.addr = alloca i32, align 4 - %borderCols.addr = alloca i32, align 4 - %dimBlock = alloca %struct.dim3, align 4 - %dimGrid = alloca %struct.dim3, align 4 - %src = alloca i32, align 4 - %dst = alloca i32, align 4 - %t = alloca i32, align 4 - %temp = alloca i32, align 4 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp1 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp1.coerce = alloca { i64, i32 }, align 4 - store i32* %gpuWall, i32** %gpuWall.addr, align 8 - store i32** %gpuResult, i32*** %gpuResult.addr, align 8 - store i32 %rows, i32* %rows.addr, align 4 - store i32 %cols, i32* %cols.addr, align 4 - store i32 %pyramid_height, i32* %pyramid_height.addr, align 4 - store i32 %blockCols, i32* %blockCols.addr, align 4 - store i32 %borderCols, i32* %borderCols.addr, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimBlock, i32 256, i32 1, i32 1) - %0 = load i32, i32* %blockCols.addr, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %dimGrid, i32 %0, i32 1, i32 1) - store i32 1, i32* %src, align 4 - store i32 0, i32* %dst, align 4 - store i32 0, i32* %t, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %1 = load i32, i32* %t, align 4 - %2 = load i32, i32* %rows.addr, align 4 - %sub = sub nsw i32 %2, 1 - %cmp = icmp slt i32 %1, %sub - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %3 = load i32, i32* %src, align 4 - store i32 %3, i32* %temp, align 4 - %4 = load i32, i32* %dst, align 4 - store i32 %4, i32* %src, align 4 - %5 = load i32, i32* %temp, align 4 - store i32 %5, i32* %dst, align 4 - %6 = bitcast %struct.dim3* %agg.tmp to i8* - %7 = bitcast %struct.dim3* %dimGrid to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %6, i8* align 4 %7, i64 12, i1 false) - %8 = bitcast %struct.dim3* %agg.tmp1 to i8* - %9 = bitcast %struct.dim3* %dimBlock to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %8, i8* align 4 %9, i64 12, i1 false) - %10 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %11 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %10, i8* align 4 %11, i64 12, i1 false) - %12 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %13 = load i64, i64* %12, align 4 - %14 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %15 = load i32, i32* %14, align 4 - %16 = bitcast { i64, i32 }* %agg.tmp1.coerce to i8* - %17 = bitcast %struct.dim3* %agg.tmp1 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %16, i8* align 4 %17, i64 12, i1 false) - %18 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp1.coerce, i32 0, i32 0 - %19 = load i64, i64* %18, align 4 - %20 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp1.coerce, i32 0, i32 1 - %21 = load i32, i32* %20, align 4 - %call = call i32 @__cudaPushCallConfiguration(i64 %13, i32 %15, i64 %19, i32 %21, i64 0, i8* null) - %tobool = icmp ne i32 %call, 0 - br i1 %tobool, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %for.body - %22 = load i32, i32* %pyramid_height.addr, align 4 - %23 = load i32, i32* %rows.addr, align 4 - %24 = load i32, i32* %t, align 4 - %sub2 = sub nsw i32 %23, %24 - %sub3 = sub nsw i32 %sub2, 1 - %cmp4 = icmp sle i32 %22, %sub3 - br i1 %cmp4, label %cond.true, label %cond.false - -cond.true: ; preds = %kcall.configok - %25 = load i32, i32* %pyramid_height.addr, align 4 - br label %cond.end - -cond.false: ; preds = %kcall.configok - %26 = load i32, i32* %rows.addr, align 4 - %27 = load i32, i32* %t, align 4 - %sub5 = sub nsw i32 %26, %27 - %sub6 = sub nsw i32 %sub5, 1 - br label %cond.end - -cond.end: ; preds = %cond.false, %cond.true - %cond = phi i32 [ %25, %cond.true ], [ %sub6, %cond.false ] - %28 = load i32*, i32** %gpuWall.addr, align 8 - %29 = load i32**, i32*** %gpuResult.addr, align 8 - %30 = load i32, i32* %src, align 4 - %idxprom = sext i32 %30 to i64 - %arrayidx = getelementptr inbounds i32*, i32** %29, i64 %idxprom - %31 = load i32*, i32** %arrayidx, align 8 - %32 = load i32**, i32*** %gpuResult.addr, align 8 - %33 = load i32, i32* %dst, align 4 - %idxprom7 = sext i32 %33 to i64 - %arrayidx8 = getelementptr inbounds i32*, i32** %32, i64 %idxprom7 - %34 = load i32*, i32** %arrayidx8, align 8 - %35 = load i32, i32* %cols.addr, align 4 - %36 = load i32, i32* %rows.addr, align 4 - %37 = load i32, i32* %t, align 4 - %38 = load i32, i32* %borderCols.addr, align 4 - call void @_Z14dynproc_kerneliPiS_S_iiii(i32 %cond, i32* %28, i32* %31, i32* %34, i32 %35, i32 %36, i32 %37, i32 %38) - br label %kcall.end - -kcall.end: ; preds = %cond.end, %for.body - %call9 = call i32 @cudaDeviceSynchronize() - br label %for.inc - -for.inc: ; preds = %kcall.end - %39 = load i32, i32* %pyramid_height.addr, align 4 - %40 = load i32, i32* %t, align 4 - %add = add nsw i32 %40, %39 - store i32 %add, i32* %t, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %41 = load i32, i32* %dst, align 4 - ret i32 %41 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #8 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #2 - -declare dso_local i32 @cudaDeviceSynchronize() #2 - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #9 { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call i32 @cudaSetDevice(i32 0) - %0 = load i32, i32* %argc.addr, align 4 - %1 = load i8**, i8*** %argv.addr, align 8 - call void @_Z3runiPPc(i32 %0, i8** %1) - ret i32 0 -} - -declare dso_local i32 @cudaSetDevice(i32) #2 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z3runiPPc(i32 %argc, i8** %argv) #0 { -entry: - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %borderCols = alloca i32, align 4 - %smallBlockCol = alloca i32, align 4 - %blockCols = alloca i32, align 4 - %gpuWall = alloca i32*, align 8 - %gpuResult = alloca [2 x i32*], align 16 - %size = alloca i32, align 4 - %final_ret = alloca i32, align 4 - %i = alloca i32, align 4 - %i32 = alloca i32, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %0 = load i32, i32* %argc.addr, align 4 - %1 = load i8**, i8*** %argv.addr, align 8 - call void @_Z4initiPPc(i32 %0, i8** %1) - %2 = load i32, i32* @pyramid_height, align 4 - %mul = mul nsw i32 %2, 1 - store i32 %mul, i32* %borderCols, align 4 - %3 = load i32, i32* @pyramid_height, align 4 - %mul1 = mul nsw i32 %3, 1 - %mul2 = mul nsw i32 %mul1, 2 - %sub = sub nsw i32 256, %mul2 - store i32 %sub, i32* %smallBlockCol, align 4 - %4 = load i32, i32* @cols, align 4 - %5 = load i32, i32* %smallBlockCol, align 4 - %div = sdiv i32 %4, %5 - %6 = load i32, i32* @cols, align 4 - %7 = load i32, i32* %smallBlockCol, align 4 - %rem = srem i32 %6, %7 - %cmp = icmp eq i32 %rem, 0 - %8 = zext i1 %cmp to i64 - %cond = select i1 %cmp, i32 0, i32 1 - %add = add nsw i32 %div, %cond - store i32 %add, i32* %blockCols, align 4 - %9 = load i32, i32* @pyramid_height, align 4 - %10 = load i32, i32* @cols, align 4 - %11 = load i32, i32* %borderCols, align 4 - %12 = load i32, i32* %blockCols, align 4 - %13 = load i32, i32* %smallBlockCol, align 4 - %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([92 x i8], [92 x i8]* @.str.2, i64 0, i64 0), i32 %9, i32 %10, i32 %11, i32 256, i32 %12, i32 %13) - %14 = load i32, i32* @rows, align 4 - %15 = load i32, i32* @cols, align 4 - %mul3 = mul nsw i32 %14, %15 - store i32 %mul3, i32* %size, align 4 - %arrayidx = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0 - %16 = bitcast i32** %arrayidx to i8** - %17 = load i32, i32* @cols, align 4 - %conv = sext i32 %17 to i64 - %mul4 = mul i64 4, %conv - %call5 = call i32 @cudaMalloc(i8** %16, i64 %mul4) - %arrayidx6 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 1 - %18 = bitcast i32** %arrayidx6 to i8** - %19 = load i32, i32* @cols, align 4 - %conv7 = sext i32 %19 to i64 - %mul8 = mul i64 4, %conv7 - %call9 = call i32 @cudaMalloc(i8** %18, i64 %mul8) - %arrayidx10 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0 - %20 = load i32*, i32** %arrayidx10, align 16 - %21 = bitcast i32* %20 to i8* - %22 = load i32*, i32** @data, align 8 - %23 = bitcast i32* %22 to i8* - %24 = load i32, i32* @cols, align 4 - %conv11 = sext i32 %24 to i64 - %mul12 = mul i64 4, %conv11 - %call13 = call i32 @cudaMemcpy(i8* %21, i8* %23, i64 %mul12, i32 1) - %25 = bitcast i32** %gpuWall to i8** - %26 = load i32, i32* %size, align 4 - %27 = load i32, i32* @cols, align 4 - %sub14 = sub nsw i32 %26, %27 - %conv15 = sext i32 %sub14 to i64 - %mul16 = mul i64 4, %conv15 - %call17 = call i32 @cudaMalloc(i8** %25, i64 %mul16) - %28 = load i32*, i32** %gpuWall, align 8 - %29 = bitcast i32* %28 to i8* - %30 = load i32*, i32** @data, align 8 - %31 = load i32, i32* @cols, align 4 - %idx.ext = sext i32 %31 to i64 - %add.ptr = getelementptr inbounds i32, i32* %30, i64 %idx.ext - %32 = bitcast i32* %add.ptr to i8* - %33 = load i32, i32* %size, align 4 - %34 = load i32, i32* @cols, align 4 - %sub18 = sub nsw i32 %33, %34 - %conv19 = sext i32 %sub18 to i64 - %mul20 = mul i64 4, %conv19 - %call21 = call i32 @cudaMemcpy(i8* %29, i8* %32, i64 %mul20, i32 1) - %35 = load i32*, i32** %gpuWall, align 8 - %arraydecay = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0 - %36 = load i32, i32* @rows, align 4 - %37 = load i32, i32* @cols, align 4 - %38 = load i32, i32* @pyramid_height, align 4 - %39 = load i32, i32* %blockCols, align 4 - %40 = load i32, i32* %borderCols, align 4 - %call22 = call i32 @_Z9calc_pathPiPS_iiiii(i32* %35, i32** %arraydecay, i32 %36, i32 %37, i32 %38, i32 %39, i32 %40) - store i32 %call22, i32* %final_ret, align 4 - %41 = load i32*, i32** @result, align 8 - %42 = bitcast i32* %41 to i8* - %43 = load i32, i32* %final_ret, align 4 - %idxprom = sext i32 %43 to i64 - %arrayidx23 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 %idxprom - %44 = load i32*, i32** %arrayidx23, align 8 - %45 = bitcast i32* %44 to i8* - %46 = load i32, i32* @cols, align 4 - %conv24 = sext i32 %46 to i64 - %mul25 = mul i64 4, %conv24 - %call26 = call i32 @cudaMemcpy(i8* %42, i8* %45, i64 %mul25, i32 2) - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %47 = load i32, i32* %i, align 4 - %48 = load i32, i32* @cols, align 4 - %cmp27 = icmp slt i32 %47, %48 - br i1 %cmp27, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %49 = load i32*, i32** @data, align 8 - %50 = load i32, i32* %i, align 4 - %idxprom28 = sext i32 %50 to i64 - %arrayidx29 = getelementptr inbounds i32, i32* %49, i64 %idxprom28 - %51 = load i32, i32* %arrayidx29, align 4 - %call30 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.3, i64 0, i64 0), i32 %51) - br label %for.inc - -for.inc: ; preds = %for.body - %52 = load i32, i32* %i, align 4 - %inc = add nsw i32 %52, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %call31 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i64 0, i64 0)) - store i32 0, i32* %i32, align 4 - br label %for.cond33 - -for.cond33: ; preds = %for.inc39, %for.end - %53 = load i32, i32* %i32, align 4 - %54 = load i32, i32* @cols, align 4 - %cmp34 = icmp slt i32 %53, %54 - br i1 %cmp34, label %for.body35, label %for.end41 - -for.body35: ; preds = %for.cond33 - %55 = load i32*, i32** @result, align 8 - %56 = load i32, i32* %i32, align 4 - %idxprom36 = sext i32 %56 to i64 - %arrayidx37 = getelementptr inbounds i32, i32* %55, i64 %idxprom36 - %57 = load i32, i32* %arrayidx37, align 4 - %call38 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.3, i64 0, i64 0), i32 %57) - br label %for.inc39 - -for.inc39: ; preds = %for.body35 - %58 = load i32, i32* %i32, align 4 - %inc40 = add nsw i32 %58, 1 - store i32 %inc40, i32* %i32, align 4 - br label %for.cond33 - -for.end41: ; preds = %for.cond33 - %call42 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.4, i64 0, i64 0)) - %59 = load i32*, i32** %gpuWall, align 8 - %60 = bitcast i32* %59 to i8* - %call43 = call i32 @cudaFree(i8* %60) - %arrayidx44 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 0 - %61 = load i32*, i32** %arrayidx44, align 16 - %62 = bitcast i32* %61 to i8* - %call45 = call i32 @cudaFree(i8* %62) - %arrayidx46 = getelementptr inbounds [2 x i32*], [2 x i32*]* %gpuResult, i64 0, i64 1 - %63 = load i32*, i32** %arrayidx46, align 8 - %64 = bitcast i32* %63 to i8* - %call47 = call i32 @cudaFree(i8* %64) - %65 = load i32*, i32** @data, align 8 - %isnull = icmp eq i32* %65, null - br i1 %isnull, label %delete.end, label %delete.notnull - -delete.notnull: ; preds = %for.end41 - %66 = bitcast i32* %65 to i8* - call void @_ZdaPv(i8* %66) #15 - br label %delete.end - -delete.end: ; preds = %delete.notnull, %for.end41 - %67 = load i32**, i32*** @wall, align 8 - %isnull48 = icmp eq i32** %67, null - br i1 %isnull48, label %delete.end50, label %delete.notnull49 - -delete.notnull49: ; preds = %delete.end - %68 = bitcast i32** %67 to i8* - call void @_ZdaPv(i8* %68) #15 - br label %delete.end50 - -delete.end50: ; preds = %delete.notnull49, %delete.end - %69 = load i32*, i32** @result, align 8 - %isnull51 = icmp eq i32* %69, null - br i1 %isnull51, label %delete.end53, label %delete.notnull52 - -delete.notnull52: ; preds = %delete.end50 - %70 = bitcast i32* %69 to i8* - call void @_ZdaPv(i8* %70) #15 - br label %delete.end53 - -delete.end53: ; preds = %delete.notnull52, %delete.end50 - ret void -} - -declare dso_local i32 @cudaMalloc(i8**, i64) #2 - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #2 - -declare dso_local i32 @cudaFree(i8*) #2 - -; Function Attrs: nobuiltin nounwind -declare dso_local void @_ZdaPv(i8*) #10 - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32, i32*, i32*, i32*, i32, i32, i32, i32)* @_Z14dynproc_kerneliPiS_S_iiii to i8*), i8* getelementptr inbounds ([30 x i8], [30 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([30 x i8], [30 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #3 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { nounwind readnone speculatable willreturn } -attributes #5 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { argmemonly nounwind willreturn } -attributes #8 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #9 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #10 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #11 = { nounwind readonly } -attributes #12 = { noreturn nounwind } -attributes #13 = { builtin } -attributes #14 = { nounwind } -attributes #15 = { builtin nounwind } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/pathfinder/pathfinder.cu b/examples/pathfinder/pathfinder.cu deleted file mode 100644 index d57c677..0000000 --- a/examples/pathfinder/pathfinder.cu +++ /dev/null @@ -1,238 +0,0 @@ -#include -#include -#include -#include - -#ifdef TIMING -#include "timing.h" - -struct timeval tv; -struct timeval tv_total_start, tv_total_end; -struct timeval tv_h2d_start, tv_h2d_end; -struct timeval tv_d2h_start, tv_d2h_end; -struct timeval tv_kernel_start, tv_kernel_end; -struct timeval tv_mem_alloc_start, tv_mem_alloc_end; -struct timeval tv_close_start, tv_close_end; -float init_time = 0, mem_alloc_time = 0, h2d_time = 0, kernel_time = 0, - d2h_time = 0, close_time = 0, total_time = 0; -#endif - -#define BLOCK_SIZE 256 -#define STR_SIZE 256 -#define DEVICE 0 -#define HALO \ - 1 // halo width along one direction when advancing to the next iteration - -//#define BENCH_PRINT - -void run(int argc, char **argv); - -int rows, cols; -int *data; -int **wall; -int *result; -#define M_SEED 9 -int pyramid_height; - -void init(int argc, char **argv) { - if (argc == 4) { - cols = atoi(argv[1]); - rows = atoi(argv[2]); - pyramid_height = atoi(argv[3]); - } else { - printf("Usage: dynproc row_len col_len pyramid_height\n"); - exit(0); - } - data = new int[rows * cols]; - wall = new int *[rows]; - for (int n = 0; n < rows; n++) - wall[n] = data + cols * n; - result = new int[cols]; - - int seed = M_SEED; - srand(seed); - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - wall[i][j] = rand() % 10; - } - } -#ifdef BENCH_PRINT - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - printf("%d ", wall[i][j]); - } - printf("\n"); - } -#endif -} - -void fatal(char *s) { fprintf(stderr, "error: %s\n", s); } - -#define IN_RANGE(x, min, max) ((x) >= (min) && (x) <= (max)) -#define CLAMP_RANGE(x, min, max) x = (x < (min)) ? min : ((x > (max)) ? max : x) -#define MIN(a, b) ((a) <= (b) ? (a) : (b)) - -__global__ void dynproc_kernel(int iteration, int *gpuWall, int *gpuSrc, - int *gpuResults, int cols, int rows, - int startStep, int border) { - - __shared__ int prev[BLOCK_SIZE]; - __shared__ int result[BLOCK_SIZE]; - - int bx = blockIdx.x; - int tx = threadIdx.x; - - // each block finally computes result for a small block - // after N iterations. - // it is the non-overlapping small blocks that cover - // all the input data - - // calculate the small block size - int small_block_cols = BLOCK_SIZE - iteration * HALO * 2; - - // calculate the boundary for the block according to - // the boundary of its small block - int blkX = small_block_cols * bx - border; - int blkXmax = blkX + BLOCK_SIZE - 1; - - // calculate the global thread coordination - int xidx = blkX + tx; - - // effective range within this block that falls within - // the valid range of the input data - // used to rule out computation outside the boundary. - int validXmin = (blkX < 0) ? -blkX : 0; - int validXmax = (blkXmax > cols - 1) ? BLOCK_SIZE - 1 - (blkXmax - cols + 1) - : BLOCK_SIZE - 1; - - int W = tx - 1; - int E = tx + 1; - - W = (W < validXmin) ? validXmin : W; - E = (E > validXmax) ? validXmax : E; - - bool isValid = IN_RANGE(tx, validXmin, validXmax); - - if (IN_RANGE(xidx, 0, cols - 1)) { - prev[tx] = gpuSrc[xidx]; - } - __syncthreads(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 - bool computed; - for (int i = 0; i < iteration; i++) { - computed = false; - if (IN_RANGE(tx, i + 1, BLOCK_SIZE - i - 2) && isValid) { - computed = true; - int left = prev[W]; - int up = prev[tx]; - int right = prev[E]; - int shortest = MIN(left, up); - shortest = MIN(shortest, right); - int index = cols * (startStep + i) + xidx; - result[tx] = shortest + gpuWall[index]; - } - __syncthreads(); - if (i == iteration - 1) - break; - if (computed) // Assign the computation range - prev[tx] = result[tx]; - __syncthreads(); // [Ronny] Added sync to avoid race on prev Aug. 14 2012 - } - - // update the global memory - // after the last iteration, only threads coordinated within the - // small block perform the calculation and switch on ``computed'' - if (computed) { - gpuResults[xidx] = result[tx]; - } -} - -/* - compute N time steps -*/ -int calc_path(int *gpuWall, int *gpuResult[2], int rows, int cols, - int pyramid_height, int blockCols, int borderCols) { - dim3 dimBlock(BLOCK_SIZE); - dim3 dimGrid(blockCols); - - int src = 1, dst = 0; - for (int t = 0; t < rows - 1; t += pyramid_height) { - int temp = src; - src = dst; - dst = temp; - dynproc_kernel<<>>( - MIN(pyramid_height, rows - t - 1), gpuWall, gpuResult[src], - gpuResult[dst], cols, rows, t, borderCols); - - // for the measurement fairness - cudaDeviceSynchronize(); - } - return dst; -} - -int main(int argc, char **argv) { - cudaSetDevice(0); - - run(argc, argv); - - return EXIT_SUCCESS; -} - -void run(int argc, char **argv) { - init(argc, argv); - - /* --------------- pyramid parameters --------------- */ - int borderCols = (pyramid_height)*HALO; - int smallBlockCol = BLOCK_SIZE - (pyramid_height)*HALO * 2; - int blockCols = cols / smallBlockCol + ((cols % smallBlockCol == 0) ? 0 : 1); - - printf("pyramidHeight: %d\ngridSize: [%d]\nborder:[%d]\nblockSize: " - "%d\nblockGrid:[%d]\ntargetBlock:[%d]\n", - pyramid_height, cols, borderCols, BLOCK_SIZE, blockCols, - smallBlockCol); - - int *gpuWall, *gpuResult[2]; - int size = rows * cols; - - cudaMalloc((void **)&gpuResult[0], sizeof(int) * cols); - cudaMalloc((void **)&gpuResult[1], sizeof(int) * cols); - cudaMemcpy(gpuResult[0], data, sizeof(int) * cols, cudaMemcpyHostToDevice); - cudaMalloc((void **)&gpuWall, sizeof(int) * (size - cols)); - cudaMemcpy(gpuWall, data + cols, sizeof(int) * (size - cols), - cudaMemcpyHostToDevice); - -#ifdef TIMING - gettimeofday(&tv_kernel_start, NULL); -#endif - - int final_ret = calc_path(gpuWall, gpuResult, rows, cols, pyramid_height, - blockCols, borderCols); - -#ifdef TIMING - gettimeofday(&tv_kernel_end, NULL); - tvsub(&tv_kernel_end, &tv_kernel_start, &tv); - kernel_time += tv.tv_sec * 1000.0 + (float)tv.tv_usec / 1000.0; -#endif - - cudaMemcpy(result, gpuResult[final_ret], sizeof(int) * cols, - cudaMemcpyDeviceToHost); - - for (int i = 0; i < cols; i++) - printf("%d ", data[i]); - printf("\n"); - for (int i = 0; i < cols; i++) - printf("%d ", result[i]); - printf("\n"); - - cudaFree(gpuWall); - cudaFree(gpuResult[0]); - cudaFree(gpuResult[1]); - - delete[] data; - delete[] wall; - delete[] result; - -#ifdef TIMING - printf("Exec: %f\n", kernel_time); -#endif -} diff --git a/examples/pathfinder/run.sh b/examples/pathfinder/run.sh deleted file mode 100644 index 7bfd85e..0000000 --- a/examples/pathfinder/run.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -e -llvm-as pathfinder-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as pathfinder-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator pathfinder-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator pathfinder-host-x86_64-unknown-linux-gnu.bc host.bc -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - -g++ -Wall -L../../build/runtime \ - -L../../build/runtime/threadPool -o pathfinder \ - -fPIC -no-pie host.o kernel.o -lc -lx86Runtime -lthreadPool -lpthread -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -./pathfinder 100000 100 20 > res.log -if grep -q "5 4 5 7 0 3 0 8 2" res.log; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/srad_v2/run.sh b/examples/srad_v2/run.sh deleted file mode 100644 index fe49cfa..0000000 --- a/examples/srad_v2/run.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -set -e -llvm-as srad-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as srad-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator srad-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator srad-host-x86_64-unknown-linux-gnu.bc host.bc - -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - -g++ -Wall -L../../build/runtime \ - -L../../build/runtime/threadPool \ - -o srad -fPIC -no-pie host.o kernel.o -lc -lx86Runtime -lthreadPool -lpthread -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -./srad 2048 2048 0 127 0 127 0.5 2 > res.log -if grep -q "1.98368 2.16545 1.72989" res.log; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/srad_v2/srad-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/srad_v2/srad-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index f1b895c..0000000 --- a/examples/srad_v2/srad-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,1551 +0,0 @@ -; ModuleID = 'srad-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "srad.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.__cuda_builtin_gridDim_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv = comdat any - -$_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_yEv = comdat any - -$_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv = comdat any - -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 -@_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ11srad_cuda_1PfS_S_S_S_S_iifE11temp_result = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5north = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5south = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4east = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4west = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@gridDim = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_gridDim_t, align 1 -@_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE7south_c = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE6east_c = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE11c_cuda_temp = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE13c_cuda_result = internal addrspace(3) global [16 x [16 x float]] undef, align 4 -@_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE4temp = internal addrspace(3) global [16 x [16 x float]] undef, align 4 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z11srad_cuda_1PfS_S_S_S_S_iif(float* %E_C, float* %W_C, float* %N_C, float* %S_C, float* %J_cuda, float* %C_cuda, i32 %cols, i32 %rows, float %q0sqr) #0 { -entry: - %E_C.addr = alloca float*, align 8 - %W_C.addr = alloca float*, align 8 - %N_C.addr = alloca float*, align 8 - %S_C.addr = alloca float*, align 8 - %J_cuda.addr = alloca float*, align 8 - %C_cuda.addr = alloca float*, align 8 - %cols.addr = alloca i32, align 4 - %rows.addr = alloca i32, align 4 - %q0sqr.addr = alloca float, align 4 - %bx = alloca i32, align 4 - %by = alloca i32, align 4 - %tx = alloca i32, align 4 - %ty = alloca i32, align 4 - %index = alloca i32, align 4 - %index_n = alloca i32, align 4 - %index_s = alloca i32, align 4 - %index_w = alloca i32, align 4 - %index_e = alloca i32, align 4 - %n = alloca float, align 4 - %w = alloca float, align 4 - %e = alloca float, align 4 - %s = alloca float, align 4 - %jc = alloca float, align 4 - %g2 = alloca float, align 4 - %l = alloca float, align 4 - %num = alloca float, align 4 - %den = alloca float, align 4 - %qsqr = alloca float, align 4 - %c = alloca float, align 4 - store float* %E_C, float** %E_C.addr, align 8 - store float* %W_C, float** %W_C.addr, align 8 - store float* %N_C, float** %N_C.addr, align 8 - store float* %S_C, float** %S_C.addr, align 8 - store float* %J_cuda, float** %J_cuda.addr, align 8 - store float* %C_cuda, float** %C_cuda.addr, align 8 - store i32 %cols, i32* %cols.addr, align 4 - store i32 %rows, i32* %rows.addr, align 4 - store float %q0sqr, float* %q0sqr.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %bx, align 4 - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #2 - store i32 %call1, i32* %by, align 4 - %call2 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call2, i32* %tx, align 4 - %call3 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - store i32 %call3, i32* %ty, align 4 - %0 = load i32, i32* %cols.addr, align 4 - %mul = mul nsw i32 %0, 16 - %1 = load i32, i32* %by, align 4 - %mul4 = mul nsw i32 %mul, %1 - %2 = load i32, i32* %bx, align 4 - %mul5 = mul nsw i32 16, %2 - %add = add nsw i32 %mul4, %mul5 - %3 = load i32, i32* %cols.addr, align 4 - %4 = load i32, i32* %ty, align 4 - %mul6 = mul nsw i32 %3, %4 - %add7 = add nsw i32 %add, %mul6 - %5 = load i32, i32* %tx, align 4 - %add8 = add nsw i32 %add7, %5 - store i32 %add8, i32* %index, align 4 - %6 = load i32, i32* %cols.addr, align 4 - %mul9 = mul nsw i32 %6, 16 - %7 = load i32, i32* %by, align 4 - %mul10 = mul nsw i32 %mul9, %7 - %8 = load i32, i32* %bx, align 4 - %mul11 = mul nsw i32 16, %8 - %add12 = add nsw i32 %mul10, %mul11 - %9 = load i32, i32* %tx, align 4 - %add13 = add nsw i32 %add12, %9 - %10 = load i32, i32* %cols.addr, align 4 - %sub = sub nsw i32 %add13, %10 - store i32 %sub, i32* %index_n, align 4 - %11 = load i32, i32* %cols.addr, align 4 - %mul14 = mul nsw i32 %11, 16 - %12 = load i32, i32* %by, align 4 - %mul15 = mul nsw i32 %mul14, %12 - %13 = load i32, i32* %bx, align 4 - %mul16 = mul nsw i32 16, %13 - %add17 = add nsw i32 %mul15, %mul16 - %14 = load i32, i32* %cols.addr, align 4 - %mul18 = mul nsw i32 %14, 16 - %add19 = add nsw i32 %add17, %mul18 - %15 = load i32, i32* %tx, align 4 - %add20 = add nsw i32 %add19, %15 - store i32 %add20, i32* %index_s, align 4 - %16 = load i32, i32* %cols.addr, align 4 - %mul21 = mul nsw i32 %16, 16 - %17 = load i32, i32* %by, align 4 - %mul22 = mul nsw i32 %mul21, %17 - %18 = load i32, i32* %bx, align 4 - %mul23 = mul nsw i32 16, %18 - %add24 = add nsw i32 %mul22, %mul23 - %19 = load i32, i32* %cols.addr, align 4 - %20 = load i32, i32* %ty, align 4 - %mul25 = mul nsw i32 %19, %20 - %add26 = add nsw i32 %add24, %mul25 - %sub27 = sub nsw i32 %add26, 1 - store i32 %sub27, i32* %index_w, align 4 - %21 = load i32, i32* %cols.addr, align 4 - %mul28 = mul nsw i32 %21, 16 - %22 = load i32, i32* %by, align 4 - %mul29 = mul nsw i32 %mul28, %22 - %23 = load i32, i32* %bx, align 4 - %mul30 = mul nsw i32 16, %23 - %add31 = add nsw i32 %mul29, %mul30 - %24 = load i32, i32* %cols.addr, align 4 - %25 = load i32, i32* %ty, align 4 - %mul32 = mul nsw i32 %24, %25 - %add33 = add nsw i32 %add31, %mul32 - %add34 = add nsw i32 %add33, 16 - store i32 %add34, i32* %index_e, align 4 - %26 = load float*, float** %J_cuda.addr, align 8 - %27 = load i32, i32* %index_n, align 4 - %idxprom = sext i32 %27 to i64 - %arrayidx = getelementptr inbounds float, float* %26, i64 %idxprom - %28 = load float, float* %arrayidx, align 4 - %29 = load i32, i32* %ty, align 4 - %idxprom35 = sext i32 %29 to i64 - %arrayidx36 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5north to [16 x [16 x float]]*), i64 0, i64 %idxprom35 - %30 = load i32, i32* %tx, align 4 - %idxprom37 = sext i32 %30 to i64 - %arrayidx38 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx36, i64 0, i64 %idxprom37 - store float %28, float* %arrayidx38, align 4 - %31 = load float*, float** %J_cuda.addr, align 8 - %32 = load i32, i32* %index_s, align 4 - %idxprom39 = sext i32 %32 to i64 - %arrayidx40 = getelementptr inbounds float, float* %31, i64 %idxprom39 - %33 = load float, float* %arrayidx40, align 4 - %34 = load i32, i32* %ty, align 4 - %idxprom41 = sext i32 %34 to i64 - %arrayidx42 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5south to [16 x [16 x float]]*), i64 0, i64 %idxprom41 - %35 = load i32, i32* %tx, align 4 - %idxprom43 = sext i32 %35 to i64 - %arrayidx44 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx42, i64 0, i64 %idxprom43 - store float %33, float* %arrayidx44, align 4 - %36 = load i32, i32* %by, align 4 - %cmp = icmp eq i32 %36, 0 - br i1 %cmp, label %if.then, label %if.else - -if.then: ; preds = %entry - %37 = load float*, float** %J_cuda.addr, align 8 - %38 = load i32, i32* %bx, align 4 - %mul45 = mul nsw i32 16, %38 - %39 = load i32, i32* %tx, align 4 - %add46 = add nsw i32 %mul45, %39 - %idxprom47 = sext i32 %add46 to i64 - %arrayidx48 = getelementptr inbounds float, float* %37, i64 %idxprom47 - %40 = load float, float* %arrayidx48, align 4 - %41 = load i32, i32* %ty, align 4 - %idxprom49 = sext i32 %41 to i64 - %arrayidx50 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5north to [16 x [16 x float]]*), i64 0, i64 %idxprom49 - %42 = load i32, i32* %tx, align 4 - %idxprom51 = sext i32 %42 to i64 - %arrayidx52 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx50, i64 0, i64 %idxprom51 - store float %40, float* %arrayidx52, align 4 - br label %if.end72 - -if.else: ; preds = %entry - %43 = load i32, i32* %by, align 4 - %call53 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_yEv() #2 - %sub54 = sub i32 %call53, 1 - %cmp55 = icmp eq i32 %43, %sub54 - br i1 %cmp55, label %if.then56, label %if.end - -if.then56: ; preds = %if.else - %44 = load float*, float** %J_cuda.addr, align 8 - %45 = load i32, i32* %cols.addr, align 4 - %mul57 = mul nsw i32 %45, 16 - %call58 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_yEv() #2 - %sub59 = sub i32 %call58, 1 - %mul60 = mul i32 %mul57, %sub59 - %46 = load i32, i32* %bx, align 4 - %mul61 = mul nsw i32 16, %46 - %add62 = add i32 %mul60, %mul61 - %47 = load i32, i32* %cols.addr, align 4 - %mul63 = mul nsw i32 %47, 15 - %add64 = add i32 %add62, %mul63 - %48 = load i32, i32* %tx, align 4 - %add65 = add i32 %add64, %48 - %idxprom66 = zext i32 %add65 to i64 - %arrayidx67 = getelementptr inbounds float, float* %44, i64 %idxprom66 - %49 = load float, float* %arrayidx67, align 4 - %50 = load i32, i32* %ty, align 4 - %idxprom68 = sext i32 %50 to i64 - %arrayidx69 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5south to [16 x [16 x float]]*), i64 0, i64 %idxprom68 - %51 = load i32, i32* %tx, align 4 - %idxprom70 = sext i32 %51 to i64 - %arrayidx71 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx69, i64 0, i64 %idxprom70 - store float %49, float* %arrayidx71, align 4 - br label %if.end - -if.end: ; preds = %if.then56, %if.else - br label %if.end72 - -if.end72: ; preds = %if.end, %if.then - call void @llvm.nvvm.barrier0() - %52 = load float*, float** %J_cuda.addr, align 8 - %53 = load i32, i32* %index_w, align 4 - %idxprom73 = sext i32 %53 to i64 - %arrayidx74 = getelementptr inbounds float, float* %52, i64 %idxprom73 - %54 = load float, float* %arrayidx74, align 4 - %55 = load i32, i32* %ty, align 4 - %idxprom75 = sext i32 %55 to i64 - %arrayidx76 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4west to [16 x [16 x float]]*), i64 0, i64 %idxprom75 - %56 = load i32, i32* %tx, align 4 - %idxprom77 = sext i32 %56 to i64 - %arrayidx78 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx76, i64 0, i64 %idxprom77 - store float %54, float* %arrayidx78, align 4 - %57 = load float*, float** %J_cuda.addr, align 8 - %58 = load i32, i32* %index_e, align 4 - %idxprom79 = sext i32 %58 to i64 - %arrayidx80 = getelementptr inbounds float, float* %57, i64 %idxprom79 - %59 = load float, float* %arrayidx80, align 4 - %60 = load i32, i32* %ty, align 4 - %idxprom81 = sext i32 %60 to i64 - %arrayidx82 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4east to [16 x [16 x float]]*), i64 0, i64 %idxprom81 - %61 = load i32, i32* %tx, align 4 - %idxprom83 = sext i32 %61 to i64 - %arrayidx84 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx82, i64 0, i64 %idxprom83 - store float %59, float* %arrayidx84, align 4 - %62 = load i32, i32* %bx, align 4 - %cmp85 = icmp eq i32 %62, 0 - br i1 %cmp85, label %if.then86, label %if.else97 - -if.then86: ; preds = %if.end72 - %63 = load float*, float** %J_cuda.addr, align 8 - %64 = load i32, i32* %cols.addr, align 4 - %mul87 = mul nsw i32 %64, 16 - %65 = load i32, i32* %by, align 4 - %mul88 = mul nsw i32 %mul87, %65 - %66 = load i32, i32* %cols.addr, align 4 - %67 = load i32, i32* %ty, align 4 - %mul89 = mul nsw i32 %66, %67 - %add90 = add nsw i32 %mul88, %mul89 - %idxprom91 = sext i32 %add90 to i64 - %arrayidx92 = getelementptr inbounds float, float* %63, i64 %idxprom91 - %68 = load float, float* %arrayidx92, align 4 - %69 = load i32, i32* %ty, align 4 - %idxprom93 = sext i32 %69 to i64 - %arrayidx94 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4west to [16 x [16 x float]]*), i64 0, i64 %idxprom93 - %70 = load i32, i32* %tx, align 4 - %idxprom95 = sext i32 %70 to i64 - %arrayidx96 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx94, i64 0, i64 %idxprom95 - store float %68, float* %arrayidx96, align 4 - br label %if.end119 - -if.else97: ; preds = %if.end72 - %71 = load i32, i32* %bx, align 4 - %call98 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #2 - %sub99 = sub i32 %call98, 1 - %cmp100 = icmp eq i32 %71, %sub99 - br i1 %cmp100, label %if.then101, label %if.end118 - -if.then101: ; preds = %if.else97 - %72 = load float*, float** %J_cuda.addr, align 8 - %73 = load i32, i32* %cols.addr, align 4 - %mul102 = mul nsw i32 %73, 16 - %74 = load i32, i32* %by, align 4 - %mul103 = mul nsw i32 %mul102, %74 - %call104 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #2 - %sub105 = sub i32 %call104, 1 - %mul106 = mul i32 16, %sub105 - %add107 = add i32 %mul103, %mul106 - %75 = load i32, i32* %cols.addr, align 4 - %76 = load i32, i32* %ty, align 4 - %mul108 = mul nsw i32 %75, %76 - %add109 = add i32 %add107, %mul108 - %add110 = add i32 %add109, 16 - %sub111 = sub i32 %add110, 1 - %idxprom112 = zext i32 %sub111 to i64 - %arrayidx113 = getelementptr inbounds float, float* %72, i64 %idxprom112 - %77 = load float, float* %arrayidx113, align 4 - %78 = load i32, i32* %ty, align 4 - %idxprom114 = sext i32 %78 to i64 - %arrayidx115 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4east to [16 x [16 x float]]*), i64 0, i64 %idxprom114 - %79 = load i32, i32* %tx, align 4 - %idxprom116 = sext i32 %79 to i64 - %arrayidx117 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx115, i64 0, i64 %idxprom116 - store float %77, float* %arrayidx117, align 4 - br label %if.end118 - -if.end118: ; preds = %if.then101, %if.else97 - br label %if.end119 - -if.end119: ; preds = %if.end118, %if.then86 - call void @llvm.nvvm.barrier0() - %80 = load float*, float** %J_cuda.addr, align 8 - %81 = load i32, i32* %index, align 4 - %idxprom120 = sext i32 %81 to i64 - %arrayidx121 = getelementptr inbounds float, float* %80, i64 %idxprom120 - %82 = load float, float* %arrayidx121, align 4 - %83 = load i32, i32* %ty, align 4 - %idxprom122 = sext i32 %83 to i64 - %arrayidx123 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom122 - %84 = load i32, i32* %tx, align 4 - %idxprom124 = sext i32 %84 to i64 - %arrayidx125 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx123, i64 0, i64 %idxprom124 - store float %82, float* %arrayidx125, align 4 - call void @llvm.nvvm.barrier0() - %85 = load i32, i32* %ty, align 4 - %idxprom126 = sext i32 %85 to i64 - %arrayidx127 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom126 - %86 = load i32, i32* %tx, align 4 - %idxprom128 = sext i32 %86 to i64 - %arrayidx129 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx127, i64 0, i64 %idxprom128 - %87 = load float, float* %arrayidx129, align 4 - store float %87, float* %jc, align 4 - %88 = load i32, i32* %ty, align 4 - %cmp130 = icmp eq i32 %88, 0 - br i1 %cmp130, label %land.lhs.true, label %if.else155 - -land.lhs.true: ; preds = %if.end119 - %89 = load i32, i32* %tx, align 4 - %cmp131 = icmp eq i32 %89, 0 - br i1 %cmp131, label %if.then132, label %if.else155 - -if.then132: ; preds = %land.lhs.true - %90 = load i32, i32* %ty, align 4 - %idxprom133 = sext i32 %90 to i64 - %arrayidx134 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5north to [16 x [16 x float]]*), i64 0, i64 %idxprom133 - %91 = load i32, i32* %tx, align 4 - %idxprom135 = sext i32 %91 to i64 - %arrayidx136 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx134, i64 0, i64 %idxprom135 - %92 = load float, float* %arrayidx136, align 4 - %93 = load float, float* %jc, align 4 - %sub137 = fsub contract float %92, %93 - store float %sub137, float* %n, align 4 - %94 = load i32, i32* %ty, align 4 - %add138 = add nsw i32 %94, 1 - %idxprom139 = sext i32 %add138 to i64 - %arrayidx140 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom139 - %95 = load i32, i32* %tx, align 4 - %idxprom141 = sext i32 %95 to i64 - %arrayidx142 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx140, i64 0, i64 %idxprom141 - %96 = load float, float* %arrayidx142, align 4 - %97 = load float, float* %jc, align 4 - %sub143 = fsub contract float %96, %97 - store float %sub143, float* %s, align 4 - %98 = load i32, i32* %ty, align 4 - %idxprom144 = sext i32 %98 to i64 - %arrayidx145 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4west to [16 x [16 x float]]*), i64 0, i64 %idxprom144 - %99 = load i32, i32* %tx, align 4 - %idxprom146 = sext i32 %99 to i64 - %arrayidx147 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx145, i64 0, i64 %idxprom146 - %100 = load float, float* %arrayidx147, align 4 - %101 = load float, float* %jc, align 4 - %sub148 = fsub contract float %100, %101 - store float %sub148, float* %w, align 4 - %102 = load i32, i32* %ty, align 4 - %idxprom149 = sext i32 %102 to i64 - %arrayidx150 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom149 - %103 = load i32, i32* %tx, align 4 - %add151 = add nsw i32 %103, 1 - %idxprom152 = sext i32 %add151 to i64 - %arrayidx153 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx150, i64 0, i64 %idxprom152 - %104 = load float, float* %arrayidx153, align 4 - %105 = load float, float* %jc, align 4 - %sub154 = fsub contract float %104, %105 - store float %sub154, float* %e, align 4 - br label %if.end372 - -if.else155: ; preds = %land.lhs.true, %if.end119 - %106 = load i32, i32* %ty, align 4 - %cmp156 = icmp eq i32 %106, 0 - br i1 %cmp156, label %land.lhs.true157, label %if.else182 - -land.lhs.true157: ; preds = %if.else155 - %107 = load i32, i32* %tx, align 4 - %cmp158 = icmp eq i32 %107, 15 - br i1 %cmp158, label %if.then159, label %if.else182 - -if.then159: ; preds = %land.lhs.true157 - %108 = load i32, i32* %ty, align 4 - %idxprom160 = sext i32 %108 to i64 - %arrayidx161 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5north to [16 x [16 x float]]*), i64 0, i64 %idxprom160 - %109 = load i32, i32* %tx, align 4 - %idxprom162 = sext i32 %109 to i64 - %arrayidx163 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx161, i64 0, i64 %idxprom162 - %110 = load float, float* %arrayidx163, align 4 - %111 = load float, float* %jc, align 4 - %sub164 = fsub contract float %110, %111 - store float %sub164, float* %n, align 4 - %112 = load i32, i32* %ty, align 4 - %add165 = add nsw i32 %112, 1 - %idxprom166 = sext i32 %add165 to i64 - %arrayidx167 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom166 - %113 = load i32, i32* %tx, align 4 - %idxprom168 = sext i32 %113 to i64 - %arrayidx169 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx167, i64 0, i64 %idxprom168 - %114 = load float, float* %arrayidx169, align 4 - %115 = load float, float* %jc, align 4 - %sub170 = fsub contract float %114, %115 - store float %sub170, float* %s, align 4 - %116 = load i32, i32* %ty, align 4 - %idxprom171 = sext i32 %116 to i64 - %arrayidx172 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom171 - %117 = load i32, i32* %tx, align 4 - %sub173 = sub nsw i32 %117, 1 - %idxprom174 = sext i32 %sub173 to i64 - %arrayidx175 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx172, i64 0, i64 %idxprom174 - %118 = load float, float* %arrayidx175, align 4 - %119 = load float, float* %jc, align 4 - %sub176 = fsub contract float %118, %119 - store float %sub176, float* %w, align 4 - %120 = load i32, i32* %ty, align 4 - %idxprom177 = sext i32 %120 to i64 - %arrayidx178 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4east to [16 x [16 x float]]*), i64 0, i64 %idxprom177 - %121 = load i32, i32* %tx, align 4 - %idxprom179 = sext i32 %121 to i64 - %arrayidx180 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx178, i64 0, i64 %idxprom179 - %122 = load float, float* %arrayidx180, align 4 - %123 = load float, float* %jc, align 4 - %sub181 = fsub contract float %122, %123 - store float %sub181, float* %e, align 4 - br label %if.end371 - -if.else182: ; preds = %land.lhs.true157, %if.else155 - %124 = load i32, i32* %ty, align 4 - %cmp183 = icmp eq i32 %124, 15 - br i1 %cmp183, label %land.lhs.true184, label %if.else209 - -land.lhs.true184: ; preds = %if.else182 - %125 = load i32, i32* %tx, align 4 - %cmp185 = icmp eq i32 %125, 15 - br i1 %cmp185, label %if.then186, label %if.else209 - -if.then186: ; preds = %land.lhs.true184 - %126 = load i32, i32* %ty, align 4 - %sub187 = sub nsw i32 %126, 1 - %idxprom188 = sext i32 %sub187 to i64 - %arrayidx189 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom188 - %127 = load i32, i32* %tx, align 4 - %idxprom190 = sext i32 %127 to i64 - %arrayidx191 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx189, i64 0, i64 %idxprom190 - %128 = load float, float* %arrayidx191, align 4 - %129 = load float, float* %jc, align 4 - %sub192 = fsub contract float %128, %129 - store float %sub192, float* %n, align 4 - %130 = load i32, i32* %ty, align 4 - %idxprom193 = sext i32 %130 to i64 - %arrayidx194 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5south to [16 x [16 x float]]*), i64 0, i64 %idxprom193 - %131 = load i32, i32* %tx, align 4 - %idxprom195 = sext i32 %131 to i64 - %arrayidx196 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx194, i64 0, i64 %idxprom195 - %132 = load float, float* %arrayidx196, align 4 - %133 = load float, float* %jc, align 4 - %sub197 = fsub contract float %132, %133 - store float %sub197, float* %s, align 4 - %134 = load i32, i32* %ty, align 4 - %idxprom198 = sext i32 %134 to i64 - %arrayidx199 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom198 - %135 = load i32, i32* %tx, align 4 - %sub200 = sub nsw i32 %135, 1 - %idxprom201 = sext i32 %sub200 to i64 - %arrayidx202 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx199, i64 0, i64 %idxprom201 - %136 = load float, float* %arrayidx202, align 4 - %137 = load float, float* %jc, align 4 - %sub203 = fsub contract float %136, %137 - store float %sub203, float* %w, align 4 - %138 = load i32, i32* %ty, align 4 - %idxprom204 = sext i32 %138 to i64 - %arrayidx205 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4east to [16 x [16 x float]]*), i64 0, i64 %idxprom204 - %139 = load i32, i32* %tx, align 4 - %idxprom206 = sext i32 %139 to i64 - %arrayidx207 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx205, i64 0, i64 %idxprom206 - %140 = load float, float* %arrayidx207, align 4 - %141 = load float, float* %jc, align 4 - %sub208 = fsub contract float %140, %141 - store float %sub208, float* %e, align 4 - br label %if.end370 - -if.else209: ; preds = %land.lhs.true184, %if.else182 - %142 = load i32, i32* %ty, align 4 - %cmp210 = icmp eq i32 %142, 15 - br i1 %cmp210, label %land.lhs.true211, label %if.else236 - -land.lhs.true211: ; preds = %if.else209 - %143 = load i32, i32* %tx, align 4 - %cmp212 = icmp eq i32 %143, 0 - br i1 %cmp212, label %if.then213, label %if.else236 - -if.then213: ; preds = %land.lhs.true211 - %144 = load i32, i32* %ty, align 4 - %sub214 = sub nsw i32 %144, 1 - %idxprom215 = sext i32 %sub214 to i64 - %arrayidx216 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom215 - %145 = load i32, i32* %tx, align 4 - %idxprom217 = sext i32 %145 to i64 - %arrayidx218 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx216, i64 0, i64 %idxprom217 - %146 = load float, float* %arrayidx218, align 4 - %147 = load float, float* %jc, align 4 - %sub219 = fsub contract float %146, %147 - store float %sub219, float* %n, align 4 - %148 = load i32, i32* %ty, align 4 - %idxprom220 = sext i32 %148 to i64 - %arrayidx221 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5south to [16 x [16 x float]]*), i64 0, i64 %idxprom220 - %149 = load i32, i32* %tx, align 4 - %idxprom222 = sext i32 %149 to i64 - %arrayidx223 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx221, i64 0, i64 %idxprom222 - %150 = load float, float* %arrayidx223, align 4 - %151 = load float, float* %jc, align 4 - %sub224 = fsub contract float %150, %151 - store float %sub224, float* %s, align 4 - %152 = load i32, i32* %ty, align 4 - %idxprom225 = sext i32 %152 to i64 - %arrayidx226 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4west to [16 x [16 x float]]*), i64 0, i64 %idxprom225 - %153 = load i32, i32* %tx, align 4 - %idxprom227 = sext i32 %153 to i64 - %arrayidx228 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx226, i64 0, i64 %idxprom227 - %154 = load float, float* %arrayidx228, align 4 - %155 = load float, float* %jc, align 4 - %sub229 = fsub contract float %154, %155 - store float %sub229, float* %w, align 4 - %156 = load i32, i32* %ty, align 4 - %idxprom230 = sext i32 %156 to i64 - %arrayidx231 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom230 - %157 = load i32, i32* %tx, align 4 - %add232 = add nsw i32 %157, 1 - %idxprom233 = sext i32 %add232 to i64 - %arrayidx234 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx231, i64 0, i64 %idxprom233 - %158 = load float, float* %arrayidx234, align 4 - %159 = load float, float* %jc, align 4 - %sub235 = fsub contract float %158, %159 - store float %sub235, float* %e, align 4 - br label %if.end369 - -if.else236: ; preds = %land.lhs.true211, %if.else209 - %160 = load i32, i32* %ty, align 4 - %cmp237 = icmp eq i32 %160, 0 - br i1 %cmp237, label %if.then238, label %if.else262 - -if.then238: ; preds = %if.else236 - %161 = load i32, i32* %ty, align 4 - %idxprom239 = sext i32 %161 to i64 - %arrayidx240 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5north to [16 x [16 x float]]*), i64 0, i64 %idxprom239 - %162 = load i32, i32* %tx, align 4 - %idxprom241 = sext i32 %162 to i64 - %arrayidx242 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx240, i64 0, i64 %idxprom241 - %163 = load float, float* %arrayidx242, align 4 - %164 = load float, float* %jc, align 4 - %sub243 = fsub contract float %163, %164 - store float %sub243, float* %n, align 4 - %165 = load i32, i32* %ty, align 4 - %add244 = add nsw i32 %165, 1 - %idxprom245 = sext i32 %add244 to i64 - %arrayidx246 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom245 - %166 = load i32, i32* %tx, align 4 - %idxprom247 = sext i32 %166 to i64 - %arrayidx248 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx246, i64 0, i64 %idxprom247 - %167 = load float, float* %arrayidx248, align 4 - %168 = load float, float* %jc, align 4 - %sub249 = fsub contract float %167, %168 - store float %sub249, float* %s, align 4 - %169 = load i32, i32* %ty, align 4 - %idxprom250 = sext i32 %169 to i64 - %arrayidx251 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom250 - %170 = load i32, i32* %tx, align 4 - %sub252 = sub nsw i32 %170, 1 - %idxprom253 = sext i32 %sub252 to i64 - %arrayidx254 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx251, i64 0, i64 %idxprom253 - %171 = load float, float* %arrayidx254, align 4 - %172 = load float, float* %jc, align 4 - %sub255 = fsub contract float %171, %172 - store float %sub255, float* %w, align 4 - %173 = load i32, i32* %ty, align 4 - %idxprom256 = sext i32 %173 to i64 - %arrayidx257 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom256 - %174 = load i32, i32* %tx, align 4 - %add258 = add nsw i32 %174, 1 - %idxprom259 = sext i32 %add258 to i64 - %arrayidx260 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx257, i64 0, i64 %idxprom259 - %175 = load float, float* %arrayidx260, align 4 - %176 = load float, float* %jc, align 4 - %sub261 = fsub contract float %175, %176 - store float %sub261, float* %e, align 4 - br label %if.end368 - -if.else262: ; preds = %if.else236 - %177 = load i32, i32* %tx, align 4 - %cmp263 = icmp eq i32 %177, 15 - br i1 %cmp263, label %if.then264, label %if.else288 - -if.then264: ; preds = %if.else262 - %178 = load i32, i32* %ty, align 4 - %sub265 = sub nsw i32 %178, 1 - %idxprom266 = sext i32 %sub265 to i64 - %arrayidx267 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom266 - %179 = load i32, i32* %tx, align 4 - %idxprom268 = sext i32 %179 to i64 - %arrayidx269 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx267, i64 0, i64 %idxprom268 - %180 = load float, float* %arrayidx269, align 4 - %181 = load float, float* %jc, align 4 - %sub270 = fsub contract float %180, %181 - store float %sub270, float* %n, align 4 - %182 = load i32, i32* %ty, align 4 - %add271 = add nsw i32 %182, 1 - %idxprom272 = sext i32 %add271 to i64 - %arrayidx273 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom272 - %183 = load i32, i32* %tx, align 4 - %idxprom274 = sext i32 %183 to i64 - %arrayidx275 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx273, i64 0, i64 %idxprom274 - %184 = load float, float* %arrayidx275, align 4 - %185 = load float, float* %jc, align 4 - %sub276 = fsub contract float %184, %185 - store float %sub276, float* %s, align 4 - %186 = load i32, i32* %ty, align 4 - %idxprom277 = sext i32 %186 to i64 - %arrayidx278 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom277 - %187 = load i32, i32* %tx, align 4 - %sub279 = sub nsw i32 %187, 1 - %idxprom280 = sext i32 %sub279 to i64 - %arrayidx281 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx278, i64 0, i64 %idxprom280 - %188 = load float, float* %arrayidx281, align 4 - %189 = load float, float* %jc, align 4 - %sub282 = fsub contract float %188, %189 - store float %sub282, float* %w, align 4 - %190 = load i32, i32* %ty, align 4 - %idxprom283 = sext i32 %190 to i64 - %arrayidx284 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4east to [16 x [16 x float]]*), i64 0, i64 %idxprom283 - %191 = load i32, i32* %tx, align 4 - %idxprom285 = sext i32 %191 to i64 - %arrayidx286 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx284, i64 0, i64 %idxprom285 - %192 = load float, float* %arrayidx286, align 4 - %193 = load float, float* %jc, align 4 - %sub287 = fsub contract float %192, %193 - store float %sub287, float* %e, align 4 - br label %if.end367 - -if.else288: ; preds = %if.else262 - %194 = load i32, i32* %ty, align 4 - %cmp289 = icmp eq i32 %194, 15 - br i1 %cmp289, label %if.then290, label %if.else314 - -if.then290: ; preds = %if.else288 - %195 = load i32, i32* %ty, align 4 - %sub291 = sub nsw i32 %195, 1 - %idxprom292 = sext i32 %sub291 to i64 - %arrayidx293 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom292 - %196 = load i32, i32* %tx, align 4 - %idxprom294 = sext i32 %196 to i64 - %arrayidx295 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx293, i64 0, i64 %idxprom294 - %197 = load float, float* %arrayidx295, align 4 - %198 = load float, float* %jc, align 4 - %sub296 = fsub contract float %197, %198 - store float %sub296, float* %n, align 4 - %199 = load i32, i32* %ty, align 4 - %idxprom297 = sext i32 %199 to i64 - %arrayidx298 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE5south to [16 x [16 x float]]*), i64 0, i64 %idxprom297 - %200 = load i32, i32* %tx, align 4 - %idxprom299 = sext i32 %200 to i64 - %arrayidx300 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx298, i64 0, i64 %idxprom299 - %201 = load float, float* %arrayidx300, align 4 - %202 = load float, float* %jc, align 4 - %sub301 = fsub contract float %201, %202 - store float %sub301, float* %s, align 4 - %203 = load i32, i32* %ty, align 4 - %idxprom302 = sext i32 %203 to i64 - %arrayidx303 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom302 - %204 = load i32, i32* %tx, align 4 - %sub304 = sub nsw i32 %204, 1 - %idxprom305 = sext i32 %sub304 to i64 - %arrayidx306 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx303, i64 0, i64 %idxprom305 - %205 = load float, float* %arrayidx306, align 4 - %206 = load float, float* %jc, align 4 - %sub307 = fsub contract float %205, %206 - store float %sub307, float* %w, align 4 - %207 = load i32, i32* %ty, align 4 - %idxprom308 = sext i32 %207 to i64 - %arrayidx309 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom308 - %208 = load i32, i32* %tx, align 4 - %add310 = add nsw i32 %208, 1 - %idxprom311 = sext i32 %add310 to i64 - %arrayidx312 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx309, i64 0, i64 %idxprom311 - %209 = load float, float* %arrayidx312, align 4 - %210 = load float, float* %jc, align 4 - %sub313 = fsub contract float %209, %210 - store float %sub313, float* %e, align 4 - br label %if.end366 - -if.else314: ; preds = %if.else288 - %211 = load i32, i32* %tx, align 4 - %cmp315 = icmp eq i32 %211, 0 - br i1 %cmp315, label %if.then316, label %if.else340 - -if.then316: ; preds = %if.else314 - %212 = load i32, i32* %ty, align 4 - %sub317 = sub nsw i32 %212, 1 - %idxprom318 = sext i32 %sub317 to i64 - %arrayidx319 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom318 - %213 = load i32, i32* %tx, align 4 - %idxprom320 = sext i32 %213 to i64 - %arrayidx321 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx319, i64 0, i64 %idxprom320 - %214 = load float, float* %arrayidx321, align 4 - %215 = load float, float* %jc, align 4 - %sub322 = fsub contract float %214, %215 - store float %sub322, float* %n, align 4 - %216 = load i32, i32* %ty, align 4 - %add323 = add nsw i32 %216, 1 - %idxprom324 = sext i32 %add323 to i64 - %arrayidx325 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom324 - %217 = load i32, i32* %tx, align 4 - %idxprom326 = sext i32 %217 to i64 - %arrayidx327 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx325, i64 0, i64 %idxprom326 - %218 = load float, float* %arrayidx327, align 4 - %219 = load float, float* %jc, align 4 - %sub328 = fsub contract float %218, %219 - store float %sub328, float* %s, align 4 - %220 = load i32, i32* %ty, align 4 - %idxprom329 = sext i32 %220 to i64 - %arrayidx330 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4west to [16 x [16 x float]]*), i64 0, i64 %idxprom329 - %221 = load i32, i32* %tx, align 4 - %idxprom331 = sext i32 %221 to i64 - %arrayidx332 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx330, i64 0, i64 %idxprom331 - %222 = load float, float* %arrayidx332, align 4 - %223 = load float, float* %jc, align 4 - %sub333 = fsub contract float %222, %223 - store float %sub333, float* %w, align 4 - %224 = load i32, i32* %ty, align 4 - %idxprom334 = sext i32 %224 to i64 - %arrayidx335 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom334 - %225 = load i32, i32* %tx, align 4 - %add336 = add nsw i32 %225, 1 - %idxprom337 = sext i32 %add336 to i64 - %arrayidx338 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx335, i64 0, i64 %idxprom337 - %226 = load float, float* %arrayidx338, align 4 - %227 = load float, float* %jc, align 4 - %sub339 = fsub contract float %226, %227 - store float %sub339, float* %e, align 4 - br label %if.end365 - -if.else340: ; preds = %if.else314 - %228 = load i32, i32* %ty, align 4 - %sub341 = sub nsw i32 %228, 1 - %idxprom342 = sext i32 %sub341 to i64 - %arrayidx343 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom342 - %229 = load i32, i32* %tx, align 4 - %idxprom344 = sext i32 %229 to i64 - %arrayidx345 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx343, i64 0, i64 %idxprom344 - %230 = load float, float* %arrayidx345, align 4 - %231 = load float, float* %jc, align 4 - %sub346 = fsub contract float %230, %231 - store float %sub346, float* %n, align 4 - %232 = load i32, i32* %ty, align 4 - %add347 = add nsw i32 %232, 1 - %idxprom348 = sext i32 %add347 to i64 - %arrayidx349 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom348 - %233 = load i32, i32* %tx, align 4 - %idxprom350 = sext i32 %233 to i64 - %arrayidx351 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx349, i64 0, i64 %idxprom350 - %234 = load float, float* %arrayidx351, align 4 - %235 = load float, float* %jc, align 4 - %sub352 = fsub contract float %234, %235 - store float %sub352, float* %s, align 4 - %236 = load i32, i32* %ty, align 4 - %idxprom353 = sext i32 %236 to i64 - %arrayidx354 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom353 - %237 = load i32, i32* %tx, align 4 - %sub355 = sub nsw i32 %237, 1 - %idxprom356 = sext i32 %sub355 to i64 - %arrayidx357 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx354, i64 0, i64 %idxprom356 - %238 = load float, float* %arrayidx357, align 4 - %239 = load float, float* %jc, align 4 - %sub358 = fsub contract float %238, %239 - store float %sub358, float* %w, align 4 - %240 = load i32, i32* %ty, align 4 - %idxprom359 = sext i32 %240 to i64 - %arrayidx360 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom359 - %241 = load i32, i32* %tx, align 4 - %add361 = add nsw i32 %241, 1 - %idxprom362 = sext i32 %add361 to i64 - %arrayidx363 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx360, i64 0, i64 %idxprom362 - %242 = load float, float* %arrayidx363, align 4 - %243 = load float, float* %jc, align 4 - %sub364 = fsub contract float %242, %243 - store float %sub364, float* %e, align 4 - br label %if.end365 - -if.end365: ; preds = %if.else340, %if.then316 - br label %if.end366 - -if.end366: ; preds = %if.end365, %if.then290 - br label %if.end367 - -if.end367: ; preds = %if.end366, %if.then264 - br label %if.end368 - -if.end368: ; preds = %if.end367, %if.then238 - br label %if.end369 - -if.end369: ; preds = %if.end368, %if.then213 - br label %if.end370 - -if.end370: ; preds = %if.end369, %if.then186 - br label %if.end371 - -if.end371: ; preds = %if.end370, %if.then159 - br label %if.end372 - -if.end372: ; preds = %if.end371, %if.then132 - %244 = load float, float* %n, align 4 - %245 = load float, float* %n, align 4 - %mul373 = fmul contract float %244, %245 - %246 = load float, float* %s, align 4 - %247 = load float, float* %s, align 4 - %mul374 = fmul contract float %246, %247 - %add375 = fadd contract float %mul373, %mul374 - %248 = load float, float* %w, align 4 - %249 = load float, float* %w, align 4 - %mul376 = fmul contract float %248, %249 - %add377 = fadd contract float %add375, %mul376 - %250 = load float, float* %e, align 4 - %251 = load float, float* %e, align 4 - %mul378 = fmul contract float %250, %251 - %add379 = fadd contract float %add377, %mul378 - %252 = load float, float* %jc, align 4 - %253 = load float, float* %jc, align 4 - %mul380 = fmul contract float %252, %253 - %div = fdiv float %add379, %mul380 - store float %div, float* %g2, align 4 - %254 = load float, float* %n, align 4 - %255 = load float, float* %s, align 4 - %add381 = fadd contract float %254, %255 - %256 = load float, float* %w, align 4 - %add382 = fadd contract float %add381, %256 - %257 = load float, float* %e, align 4 - %add383 = fadd contract float %add382, %257 - %258 = load float, float* %jc, align 4 - %div384 = fdiv float %add383, %258 - store float %div384, float* %l, align 4 - %259 = load float, float* %g2, align 4 - %conv = fpext float %259 to double - %mul385 = fmul contract double 5.000000e-01, %conv - %260 = load float, float* %l, align 4 - %261 = load float, float* %l, align 4 - %mul386 = fmul contract float %260, %261 - %conv387 = fpext float %mul386 to double - %mul388 = fmul contract double 6.250000e-02, %conv387 - %sub389 = fsub contract double %mul385, %mul388 - %conv390 = fptrunc double %sub389 to float - store float %conv390, float* %num, align 4 - %262 = load float, float* %l, align 4 - %conv391 = fpext float %262 to double - %mul392 = fmul contract double 2.500000e-01, %conv391 - %add393 = fadd contract double 1.000000e+00, %mul392 - %conv394 = fptrunc double %add393 to float - store float %conv394, float* %den, align 4 - %263 = load float, float* %num, align 4 - %264 = load float, float* %den, align 4 - %265 = load float, float* %den, align 4 - %mul395 = fmul contract float %264, %265 - %div396 = fdiv float %263, %mul395 - store float %div396, float* %qsqr, align 4 - %266 = load float, float* %qsqr, align 4 - %267 = load float, float* %q0sqr.addr, align 4 - %sub397 = fsub contract float %266, %267 - %268 = load float, float* %q0sqr.addr, align 4 - %269 = load float, float* %q0sqr.addr, align 4 - %add398 = fadd contract float 1.000000e+00, %269 - %mul399 = fmul contract float %268, %add398 - %div400 = fdiv float %sub397, %mul399 - store float %div400, float* %den, align 4 - %270 = load float, float* %den, align 4 - %conv401 = fpext float %270 to double - %add402 = fadd contract double 1.000000e+00, %conv401 - %div403 = fdiv double 1.000000e+00, %add402 - %conv404 = fptrunc double %div403 to float - store float %conv404, float* %c, align 4 - %271 = load float, float* %c, align 4 - %cmp405 = fcmp olt float %271, 0.000000e+00 - br i1 %cmp405, label %if.then406, label %if.else411 - -if.then406: ; preds = %if.end372 - %272 = load i32, i32* %ty, align 4 - %idxprom407 = sext i32 %272 to i64 - %arrayidx408 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE11temp_result to [16 x [16 x float]]*), i64 0, i64 %idxprom407 - %273 = load i32, i32* %tx, align 4 - %idxprom409 = sext i32 %273 to i64 - %arrayidx410 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx408, i64 0, i64 %idxprom409 - store float 0.000000e+00, float* %arrayidx410, align 4 - br label %if.end424 - -if.else411: ; preds = %if.end372 - %274 = load float, float* %c, align 4 - %cmp412 = fcmp ogt float %274, 1.000000e+00 - br i1 %cmp412, label %if.then413, label %if.else418 - -if.then413: ; preds = %if.else411 - %275 = load i32, i32* %ty, align 4 - %idxprom414 = sext i32 %275 to i64 - %arrayidx415 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE11temp_result to [16 x [16 x float]]*), i64 0, i64 %idxprom414 - %276 = load i32, i32* %tx, align 4 - %idxprom416 = sext i32 %276 to i64 - %arrayidx417 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx415, i64 0, i64 %idxprom416 - store float 1.000000e+00, float* %arrayidx417, align 4 - br label %if.end423 - -if.else418: ; preds = %if.else411 - %277 = load float, float* %c, align 4 - %278 = load i32, i32* %ty, align 4 - %idxprom419 = sext i32 %278 to i64 - %arrayidx420 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE11temp_result to [16 x [16 x float]]*), i64 0, i64 %idxprom419 - %279 = load i32, i32* %tx, align 4 - %idxprom421 = sext i32 %279 to i64 - %arrayidx422 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx420, i64 0, i64 %idxprom421 - store float %277, float* %arrayidx422, align 4 - br label %if.end423 - -if.end423: ; preds = %if.else418, %if.then413 - br label %if.end424 - -if.end424: ; preds = %if.end423, %if.then406 - call void @llvm.nvvm.barrier0() - %280 = load i32, i32* %ty, align 4 - %idxprom425 = sext i32 %280 to i64 - %arrayidx426 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_1PfS_S_S_S_S_iifE11temp_result to [16 x [16 x float]]*), i64 0, i64 %idxprom425 - %281 = load i32, i32* %tx, align 4 - %idxprom427 = sext i32 %281 to i64 - %arrayidx428 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx426, i64 0, i64 %idxprom427 - %282 = load float, float* %arrayidx428, align 4 - %283 = load float*, float** %C_cuda.addr, align 8 - %284 = load i32, i32* %index, align 4 - %idxprom429 = sext i32 %284 to i64 - %arrayidx430 = getelementptr inbounds float, float* %283, i64 %idxprom429 - store float %282, float* %arrayidx430, align 4 - %285 = load float, float* %e, align 4 - %286 = load float*, float** %E_C.addr, align 8 - %287 = load i32, i32* %index, align 4 - %idxprom431 = sext i32 %287 to i64 - %arrayidx432 = getelementptr inbounds float, float* %286, i64 %idxprom431 - store float %285, float* %arrayidx432, align 4 - %288 = load float, float* %w, align 4 - %289 = load float*, float** %W_C.addr, align 8 - %290 = load i32, i32* %index, align 4 - %idxprom433 = sext i32 %290 to i64 - %arrayidx434 = getelementptr inbounds float, float* %289, i64 %idxprom433 - store float %288, float* %arrayidx434, align 4 - %291 = load float, float* %s, align 4 - %292 = load float*, float** %S_C.addr, align 8 - %293 = load i32, i32* %index, align 4 - %idxprom435 = sext i32 %293 to i64 - %arrayidx436 = getelementptr inbounds float, float* %292, i64 %idxprom435 - store float %291, float* %arrayidx436, align 4 - %294 = load float, float* %n, align 4 - %295 = load float*, float** %N_C.addr, align 8 - %296 = load i32, i32* %index, align 4 - %idxprom437 = sext i32 %296 to i64 - %arrayidx438 = getelementptr inbounds float, float* %295, i64 %idxprom437 - store float %294, float* %arrayidx438, align 4 - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() - ret i32 %0 -} - -; Function Attrs: convergent nounwind -declare void @llvm.nvvm.barrier0() #2 - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() - ret i32 %0 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z11srad_cuda_2PfS_S_S_S_S_iiff(float* %E_C, float* %W_C, float* %N_C, float* %S_C, float* %J_cuda, float* %C_cuda, i32 %cols, i32 %rows, float %lambda, float %q0sqr) #0 { -entry: - %E_C.addr = alloca float*, align 8 - %W_C.addr = alloca float*, align 8 - %N_C.addr = alloca float*, align 8 - %S_C.addr = alloca float*, align 8 - %J_cuda.addr = alloca float*, align 8 - %C_cuda.addr = alloca float*, align 8 - %cols.addr = alloca i32, align 4 - %rows.addr = alloca i32, align 4 - %lambda.addr = alloca float, align 4 - %q0sqr.addr = alloca float, align 4 - %bx = alloca i32, align 4 - %by = alloca i32, align 4 - %tx = alloca i32, align 4 - %ty = alloca i32, align 4 - %index = alloca i32, align 4 - %index_s = alloca i32, align 4 - %index_e = alloca i32, align 4 - %cc = alloca float, align 4 - %cn = alloca float, align 4 - %cs = alloca float, align 4 - %ce = alloca float, align 4 - %cw = alloca float, align 4 - %d_sum = alloca float, align 4 - store float* %E_C, float** %E_C.addr, align 8 - store float* %W_C, float** %W_C.addr, align 8 - store float* %N_C, float** %N_C.addr, align 8 - store float* %S_C, float** %S_C.addr, align 8 - store float* %J_cuda, float** %J_cuda.addr, align 8 - store float* %C_cuda, float** %C_cuda.addr, align 8 - store i32 %cols, i32* %cols.addr, align 4 - store i32 %rows, i32* %rows.addr, align 4 - store float %lambda, float* %lambda.addr, align 4 - store float %q0sqr, float* %q0sqr.addr, align 4 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #2 - store i32 %call, i32* %bx, align 4 - %call1 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #2 - store i32 %call1, i32* %by, align 4 - %call2 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #2 - store i32 %call2, i32* %tx, align 4 - %call3 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_yEv() #2 - store i32 %call3, i32* %ty, align 4 - %0 = load i32, i32* %cols.addr, align 4 - %mul = mul nsw i32 %0, 16 - %1 = load i32, i32* %by, align 4 - %mul4 = mul nsw i32 %mul, %1 - %2 = load i32, i32* %bx, align 4 - %mul5 = mul nsw i32 16, %2 - %add = add nsw i32 %mul4, %mul5 - %3 = load i32, i32* %cols.addr, align 4 - %4 = load i32, i32* %ty, align 4 - %mul6 = mul nsw i32 %3, %4 - %add7 = add nsw i32 %add, %mul6 - %5 = load i32, i32* %tx, align 4 - %add8 = add nsw i32 %add7, %5 - store i32 %add8, i32* %index, align 4 - %6 = load i32, i32* %cols.addr, align 4 - %mul9 = mul nsw i32 %6, 16 - %7 = load i32, i32* %by, align 4 - %mul10 = mul nsw i32 %mul9, %7 - %8 = load i32, i32* %bx, align 4 - %mul11 = mul nsw i32 16, %8 - %add12 = add nsw i32 %mul10, %mul11 - %9 = load i32, i32* %cols.addr, align 4 - %mul13 = mul nsw i32 %9, 16 - %add14 = add nsw i32 %add12, %mul13 - %10 = load i32, i32* %tx, align 4 - %add15 = add nsw i32 %add14, %10 - store i32 %add15, i32* %index_s, align 4 - %11 = load i32, i32* %cols.addr, align 4 - %mul16 = mul nsw i32 %11, 16 - %12 = load i32, i32* %by, align 4 - %mul17 = mul nsw i32 %mul16, %12 - %13 = load i32, i32* %bx, align 4 - %mul18 = mul nsw i32 16, %13 - %add19 = add nsw i32 %mul17, %mul18 - %14 = load i32, i32* %cols.addr, align 4 - %15 = load i32, i32* %ty, align 4 - %mul20 = mul nsw i32 %14, %15 - %add21 = add nsw i32 %add19, %mul20 - %add22 = add nsw i32 %add21, 16 - store i32 %add22, i32* %index_e, align 4 - %16 = load float*, float** %J_cuda.addr, align 8 - %17 = load i32, i32* %index, align 4 - %idxprom = sext i32 %17 to i64 - %arrayidx = getelementptr inbounds float, float* %16, i64 %idxprom - %18 = load float, float* %arrayidx, align 4 - %19 = load i32, i32* %ty, align 4 - %idxprom23 = sext i32 %19 to i64 - %arrayidx24 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom23 - %20 = load i32, i32* %tx, align 4 - %idxprom25 = sext i32 %20 to i64 - %arrayidx26 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx24, i64 0, i64 %idxprom25 - store float %18, float* %arrayidx26, align 4 - call void @llvm.nvvm.barrier0() - %21 = load float*, float** %C_cuda.addr, align 8 - %22 = load i32, i32* %index_s, align 4 - %idxprom27 = sext i32 %22 to i64 - %arrayidx28 = getelementptr inbounds float, float* %21, i64 %idxprom27 - %23 = load float, float* %arrayidx28, align 4 - %24 = load i32, i32* %ty, align 4 - %idxprom29 = sext i32 %24 to i64 - %arrayidx30 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE7south_c to [16 x [16 x float]]*), i64 0, i64 %idxprom29 - %25 = load i32, i32* %tx, align 4 - %idxprom31 = sext i32 %25 to i64 - %arrayidx32 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx30, i64 0, i64 %idxprom31 - store float %23, float* %arrayidx32, align 4 - %26 = load i32, i32* %by, align 4 - %call33 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_yEv() #2 - %sub = sub i32 %call33, 1 - %cmp = icmp eq i32 %26, %sub - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %27 = load float*, float** %C_cuda.addr, align 8 - %28 = load i32, i32* %cols.addr, align 4 - %mul34 = mul nsw i32 %28, 16 - %call35 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_yEv() #2 - %sub36 = sub i32 %call35, 1 - %mul37 = mul i32 %mul34, %sub36 - %29 = load i32, i32* %bx, align 4 - %mul38 = mul nsw i32 16, %29 - %add39 = add i32 %mul37, %mul38 - %30 = load i32, i32* %cols.addr, align 4 - %mul40 = mul nsw i32 %30, 15 - %add41 = add i32 %add39, %mul40 - %31 = load i32, i32* %tx, align 4 - %add42 = add i32 %add41, %31 - %idxprom43 = zext i32 %add42 to i64 - %arrayidx44 = getelementptr inbounds float, float* %27, i64 %idxprom43 - %32 = load float, float* %arrayidx44, align 4 - %33 = load i32, i32* %ty, align 4 - %idxprom45 = sext i32 %33 to i64 - %arrayidx46 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE7south_c to [16 x [16 x float]]*), i64 0, i64 %idxprom45 - %34 = load i32, i32* %tx, align 4 - %idxprom47 = sext i32 %34 to i64 - %arrayidx48 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx46, i64 0, i64 %idxprom47 - store float %32, float* %arrayidx48, align 4 - br label %if.end - -if.end: ; preds = %if.then, %entry - call void @llvm.nvvm.barrier0() - %35 = load float*, float** %C_cuda.addr, align 8 - %36 = load i32, i32* %index_e, align 4 - %idxprom49 = sext i32 %36 to i64 - %arrayidx50 = getelementptr inbounds float, float* %35, i64 %idxprom49 - %37 = load float, float* %arrayidx50, align 4 - %38 = load i32, i32* %ty, align 4 - %idxprom51 = sext i32 %38 to i64 - %arrayidx52 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE6east_c to [16 x [16 x float]]*), i64 0, i64 %idxprom51 - %39 = load i32, i32* %tx, align 4 - %idxprom53 = sext i32 %39 to i64 - %arrayidx54 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx52, i64 0, i64 %idxprom53 - store float %37, float* %arrayidx54, align 4 - %40 = load i32, i32* %bx, align 4 - %call55 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #2 - %sub56 = sub i32 %call55, 1 - %cmp57 = icmp eq i32 %40, %sub56 - br i1 %cmp57, label %if.then58, label %if.end75 - -if.then58: ; preds = %if.end - %41 = load float*, float** %C_cuda.addr, align 8 - %42 = load i32, i32* %cols.addr, align 4 - %mul59 = mul nsw i32 %42, 16 - %43 = load i32, i32* %by, align 4 - %mul60 = mul nsw i32 %mul59, %43 - %call61 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #2 - %sub62 = sub i32 %call61, 1 - %mul63 = mul i32 16, %sub62 - %add64 = add i32 %mul60, %mul63 - %44 = load i32, i32* %cols.addr, align 4 - %45 = load i32, i32* %ty, align 4 - %mul65 = mul nsw i32 %44, %45 - %add66 = add i32 %add64, %mul65 - %add67 = add i32 %add66, 16 - %sub68 = sub i32 %add67, 1 - %idxprom69 = zext i32 %sub68 to i64 - %arrayidx70 = getelementptr inbounds float, float* %41, i64 %idxprom69 - %46 = load float, float* %arrayidx70, align 4 - %47 = load i32, i32* %ty, align 4 - %idxprom71 = sext i32 %47 to i64 - %arrayidx72 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE6east_c to [16 x [16 x float]]*), i64 0, i64 %idxprom71 - %48 = load i32, i32* %tx, align 4 - %idxprom73 = sext i32 %48 to i64 - %arrayidx74 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx72, i64 0, i64 %idxprom73 - store float %46, float* %arrayidx74, align 4 - br label %if.end75 - -if.end75: ; preds = %if.then58, %if.end - call void @llvm.nvvm.barrier0() - %49 = load float*, float** %C_cuda.addr, align 8 - %50 = load i32, i32* %index, align 4 - %idxprom76 = sext i32 %50 to i64 - %arrayidx77 = getelementptr inbounds float, float* %49, i64 %idxprom76 - %51 = load float, float* %arrayidx77, align 4 - %52 = load i32, i32* %ty, align 4 - %idxprom78 = sext i32 %52 to i64 - %arrayidx79 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE11c_cuda_temp to [16 x [16 x float]]*), i64 0, i64 %idxprom78 - %53 = load i32, i32* %tx, align 4 - %idxprom80 = sext i32 %53 to i64 - %arrayidx81 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx79, i64 0, i64 %idxprom80 - store float %51, float* %arrayidx81, align 4 - call void @llvm.nvvm.barrier0() - %54 = load i32, i32* %ty, align 4 - %idxprom82 = sext i32 %54 to i64 - %arrayidx83 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE11c_cuda_temp to [16 x [16 x float]]*), i64 0, i64 %idxprom82 - %55 = load i32, i32* %tx, align 4 - %idxprom84 = sext i32 %55 to i64 - %arrayidx85 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx83, i64 0, i64 %idxprom84 - %56 = load float, float* %arrayidx85, align 4 - store float %56, float* %cc, align 4 - %57 = load i32, i32* %ty, align 4 - %cmp86 = icmp eq i32 %57, 15 - br i1 %cmp86, label %land.lhs.true, label %if.else - -land.lhs.true: ; preds = %if.end75 - %58 = load i32, i32* %tx, align 4 - %cmp87 = icmp eq i32 %58, 15 - br i1 %cmp87, label %if.then88, label %if.else - -if.then88: ; preds = %land.lhs.true - %59 = load float, float* %cc, align 4 - store float %59, float* %cn, align 4 - %60 = load i32, i32* %ty, align 4 - %idxprom89 = sext i32 %60 to i64 - %arrayidx90 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE7south_c to [16 x [16 x float]]*), i64 0, i64 %idxprom89 - %61 = load i32, i32* %tx, align 4 - %idxprom91 = sext i32 %61 to i64 - %arrayidx92 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx90, i64 0, i64 %idxprom91 - %62 = load float, float* %arrayidx92, align 4 - store float %62, float* %cs, align 4 - %63 = load float, float* %cc, align 4 - store float %63, float* %cw, align 4 - %64 = load i32, i32* %ty, align 4 - %idxprom93 = sext i32 %64 to i64 - %arrayidx94 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE6east_c to [16 x [16 x float]]*), i64 0, i64 %idxprom93 - %65 = load i32, i32* %tx, align 4 - %idxprom95 = sext i32 %65 to i64 - %arrayidx96 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx94, i64 0, i64 %idxprom95 - %66 = load float, float* %arrayidx96, align 4 - store float %66, float* %ce, align 4 - br label %if.end133 - -if.else: ; preds = %land.lhs.true, %if.end75 - %67 = load i32, i32* %tx, align 4 - %cmp97 = icmp eq i32 %67, 15 - br i1 %cmp97, label %if.then98, label %if.else108 - -if.then98: ; preds = %if.else - %68 = load float, float* %cc, align 4 - store float %68, float* %cn, align 4 - %69 = load i32, i32* %ty, align 4 - %add99 = add nsw i32 %69, 1 - %idxprom100 = sext i32 %add99 to i64 - %arrayidx101 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE11c_cuda_temp to [16 x [16 x float]]*), i64 0, i64 %idxprom100 - %70 = load i32, i32* %tx, align 4 - %idxprom102 = sext i32 %70 to i64 - %arrayidx103 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx101, i64 0, i64 %idxprom102 - %71 = load float, float* %arrayidx103, align 4 - store float %71, float* %cs, align 4 - %72 = load float, float* %cc, align 4 - store float %72, float* %cw, align 4 - %73 = load i32, i32* %ty, align 4 - %idxprom104 = sext i32 %73 to i64 - %arrayidx105 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE6east_c to [16 x [16 x float]]*), i64 0, i64 %idxprom104 - %74 = load i32, i32* %tx, align 4 - %idxprom106 = sext i32 %74 to i64 - %arrayidx107 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx105, i64 0, i64 %idxprom106 - %75 = load float, float* %arrayidx107, align 4 - store float %75, float* %ce, align 4 - br label %if.end132 - -if.else108: ; preds = %if.else - %76 = load i32, i32* %ty, align 4 - %cmp109 = icmp eq i32 %76, 15 - br i1 %cmp109, label %if.then110, label %if.else120 - -if.then110: ; preds = %if.else108 - %77 = load float, float* %cc, align 4 - store float %77, float* %cn, align 4 - %78 = load i32, i32* %ty, align 4 - %idxprom111 = sext i32 %78 to i64 - %arrayidx112 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE7south_c to [16 x [16 x float]]*), i64 0, i64 %idxprom111 - %79 = load i32, i32* %tx, align 4 - %idxprom113 = sext i32 %79 to i64 - %arrayidx114 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx112, i64 0, i64 %idxprom113 - %80 = load float, float* %arrayidx114, align 4 - store float %80, float* %cs, align 4 - %81 = load float, float* %cc, align 4 - store float %81, float* %cw, align 4 - %82 = load i32, i32* %ty, align 4 - %idxprom115 = sext i32 %82 to i64 - %arrayidx116 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE11c_cuda_temp to [16 x [16 x float]]*), i64 0, i64 %idxprom115 - %83 = load i32, i32* %tx, align 4 - %add117 = add nsw i32 %83, 1 - %idxprom118 = sext i32 %add117 to i64 - %arrayidx119 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx116, i64 0, i64 %idxprom118 - %84 = load float, float* %arrayidx119, align 4 - store float %84, float* %ce, align 4 - br label %if.end131 - -if.else120: ; preds = %if.else108 - %85 = load float, float* %cc, align 4 - store float %85, float* %cn, align 4 - %86 = load i32, i32* %ty, align 4 - %add121 = add nsw i32 %86, 1 - %idxprom122 = sext i32 %add121 to i64 - %arrayidx123 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE11c_cuda_temp to [16 x [16 x float]]*), i64 0, i64 %idxprom122 - %87 = load i32, i32* %tx, align 4 - %idxprom124 = sext i32 %87 to i64 - %arrayidx125 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx123, i64 0, i64 %idxprom124 - %88 = load float, float* %arrayidx125, align 4 - store float %88, float* %cs, align 4 - %89 = load float, float* %cc, align 4 - store float %89, float* %cw, align 4 - %90 = load i32, i32* %ty, align 4 - %idxprom126 = sext i32 %90 to i64 - %arrayidx127 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE11c_cuda_temp to [16 x [16 x float]]*), i64 0, i64 %idxprom126 - %91 = load i32, i32* %tx, align 4 - %add128 = add nsw i32 %91, 1 - %idxprom129 = sext i32 %add128 to i64 - %arrayidx130 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx127, i64 0, i64 %idxprom129 - %92 = load float, float* %arrayidx130, align 4 - store float %92, float* %ce, align 4 - br label %if.end131 - -if.end131: ; preds = %if.else120, %if.then110 - br label %if.end132 - -if.end132: ; preds = %if.end131, %if.then98 - br label %if.end133 - -if.end133: ; preds = %if.end132, %if.then88 - %93 = load float, float* %cn, align 4 - %94 = load float*, float** %N_C.addr, align 8 - %95 = load i32, i32* %index, align 4 - %idxprom134 = sext i32 %95 to i64 - %arrayidx135 = getelementptr inbounds float, float* %94, i64 %idxprom134 - %96 = load float, float* %arrayidx135, align 4 - %mul136 = fmul contract float %93, %96 - %97 = load float, float* %cs, align 4 - %98 = load float*, float** %S_C.addr, align 8 - %99 = load i32, i32* %index, align 4 - %idxprom137 = sext i32 %99 to i64 - %arrayidx138 = getelementptr inbounds float, float* %98, i64 %idxprom137 - %100 = load float, float* %arrayidx138, align 4 - %mul139 = fmul contract float %97, %100 - %add140 = fadd contract float %mul136, %mul139 - %101 = load float, float* %cw, align 4 - %102 = load float*, float** %W_C.addr, align 8 - %103 = load i32, i32* %index, align 4 - %idxprom141 = sext i32 %103 to i64 - %arrayidx142 = getelementptr inbounds float, float* %102, i64 %idxprom141 - %104 = load float, float* %arrayidx142, align 4 - %mul143 = fmul contract float %101, %104 - %add144 = fadd contract float %add140, %mul143 - %105 = load float, float* %ce, align 4 - %106 = load float*, float** %E_C.addr, align 8 - %107 = load i32, i32* %index, align 4 - %idxprom145 = sext i32 %107 to i64 - %arrayidx146 = getelementptr inbounds float, float* %106, i64 %idxprom145 - %108 = load float, float* %arrayidx146, align 4 - %mul147 = fmul contract float %105, %108 - %add148 = fadd contract float %add144, %mul147 - store float %add148, float* %d_sum, align 4 - %109 = load i32, i32* %ty, align 4 - %idxprom149 = sext i32 %109 to i64 - %arrayidx150 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE4temp to [16 x [16 x float]]*), i64 0, i64 %idxprom149 - %110 = load i32, i32* %tx, align 4 - %idxprom151 = sext i32 %110 to i64 - %arrayidx152 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx150, i64 0, i64 %idxprom151 - %111 = load float, float* %arrayidx152, align 4 - %conv = fpext float %111 to double - %112 = load float, float* %lambda.addr, align 4 - %conv153 = fpext float %112 to double - %mul154 = fmul contract double 2.500000e-01, %conv153 - %113 = load float, float* %d_sum, align 4 - %conv155 = fpext float %113 to double - %mul156 = fmul contract double %mul154, %conv155 - %add157 = fadd contract double %conv, %mul156 - %conv158 = fptrunc double %add157 to float - %114 = load i32, i32* %ty, align 4 - %idxprom159 = sext i32 %114 to i64 - %arrayidx160 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE13c_cuda_result to [16 x [16 x float]]*), i64 0, i64 %idxprom159 - %115 = load i32, i32* %tx, align 4 - %idxprom161 = sext i32 %115 to i64 - %arrayidx162 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx160, i64 0, i64 %idxprom161 - store float %conv158, float* %arrayidx162, align 4 - call void @llvm.nvvm.barrier0() - %116 = load i32, i32* %ty, align 4 - %idxprom163 = sext i32 %116 to i64 - %arrayidx164 = getelementptr inbounds [16 x [16 x float]], [16 x [16 x float]]* addrspacecast ([16 x [16 x float]] addrspace(3)* @_ZZ11srad_cuda_2PfS_S_S_S_S_iiffE13c_cuda_result to [16 x [16 x float]]*), i64 0, i64 %idxprom163 - %117 = load i32, i32* %tx, align 4 - %idxprom165 = sext i32 %117 to i64 - %arrayidx166 = getelementptr inbounds [16 x float], [16 x float]* %arrayidx164, i64 0, i64 %idxprom165 - %118 = load float, float* %arrayidx166, align 4 - %119 = load float*, float** %J_cuda.addr, align 8 - %120 = load i32, i32* %index, align 4 - %idxprom167 = sext i32 %120 to i64 - %arrayidx168 = getelementptr inbounds float, float* %119, i64 %idxprom167 - store float %118, float* %arrayidx168, align 4 - ret void -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.y() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.y() #3 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() #3 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { convergent nounwind } -attributes #3 = { nounwind readnone } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !6, !5, !7, !7, !7, !7, !8, !8, !7} -!llvm.ident = !{!9} -!nvvmir.version = !{!10} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (float*, float*, float*, float*, float*, float*, i32, i32, float)* @_Z11srad_cuda_1PfS_S_S_S_S_iif, !"kernel", i32 1} -!4 = !{void (float*, float*, float*, float*, float*, float*, i32, i32, float, float)* @_Z11srad_cuda_2PfS_S_S_S_S_iiff, !"kernel", i32 1} -!5 = !{null, !"align", i32 8} -!6 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!7 = !{null, !"align", i32 16} -!8 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!9 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!10 = !{i32 1, i32 4} diff --git a/examples/srad_v2/srad-host-x86_64-unknown-linux-gnu.ll b/examples/srad_v2/srad-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index 7d90ac4..0000000 --- a/examples/srad_v2/srad-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,962 +0,0 @@ -; ModuleID = 'srad-host-x86_64-unknown-linux-gnu.bc' -source_filename = "srad.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque - -$_ZSt3expf = comdat any - -$_ZN4dim3C2Ejjj = comdat any - -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str = private unnamed_addr constant [67 x i8] c"Usage: %s \0A\00", align 1 -@.str.1 = private unnamed_addr constant [28 x i8] c"\09 - number of rows\0A\00", align 1 -@.str.2 = private unnamed_addr constant [29 x i8] c"\09 - number of cols\0A\00", align 1 -@.str.3 = private unnamed_addr constant [35 x i8] c"\09 \09 - y1 value of the speckle\0A\00", align 1 -@.str.4 = private unnamed_addr constant [38 x i8] c"\09 - y2 value of the speckle\0A\00", align 1 -@.str.5 = private unnamed_addr constant [39 x i8] c"\09 - x1 value of the speckle\0A\00", align 1 -@.str.6 = private unnamed_addr constant [39 x i8] c"\09 - x2 value of the speckle\0A\00", align 1 -@.str.7 = private unnamed_addr constant [27 x i8] c"\09 - lambda (0,1)\0A\00", align 1 -@.str.8 = private unnamed_addr constant [41 x i8] c"\09 - number of iterations\0A\00", align 1 -@.str.9 = private unnamed_addr constant [29 x i8] c"WG size of kernel = %d X %d\0A\00", align 1 -@.str.10 = private unnamed_addr constant [39 x i8] c"rows and cols must be multiples of 16\0A\00", align 1 -@.str.11 = private unnamed_addr constant [30 x i8] c"Randomizing the input matrix\0A\00", align 1 -@.str.12 = private unnamed_addr constant [26 x i8] c"Start the SRAD main loop\0A\00", align 1 -@.str.13 = private unnamed_addr constant [18 x i8] c"Printing Output:\0A\00", align 1 -@.str.14 = private unnamed_addr constant [6 x i8] c"%.5f \00", align 1 -@.str.15 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 -@.str.16 = private unnamed_addr constant [18 x i8] c"Computation Done\0A\00", align 1 -@0 = private unnamed_addr constant [31 x i8] c"_Z11srad_cuda_1PfS_S_S_S_S_iif\00", align 1 -@1 = private unnamed_addr constant [32 x i8] c"_Z11srad_cuda_2PfS_S_S_S_S_iiff\00", align 1 -@2 = private constant [94817 x i8] c"P\EDU\BA\01\00\10\00Pr\01\00\00\00\00\00\02\00\01\01@\00\00\00HG\01\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\A0F\01\00\00\00\00\00\E0B\01\00\00\00\00\00=\05=\00@\008\00\03\00@\00\0F\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z11srad_cuda_2PfS_S_S_S_S_iiff\00.nv.info._Z11srad_cuda_2PfS_S_S_S_S_iiff\00.nv.shared._Z11srad_cuda_2PfS_S_S_S_S_iiff\00.nv.global\00.nv.constant0._Z11srad_cuda_2PfS_S_S_S_S_iiff\00.text._Z11srad_cuda_1PfS_S_S_S_S_iif\00.nv.info._Z11srad_cuda_1PfS_S_S_S_S_iif\00.nv.shared._Z11srad_cuda_1PfS_S_S_S_S_iif\00.nv.constant2._Z11srad_cuda_1PfS_S_S_S_S_iif\00.nv.constant0._Z11srad_cuda_1PfS_S_S_S_S_iif\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z11srad_cuda_2PfS_S_S_S_S_iiff\00.text._Z11srad_cuda_2PfS_S_S_S_S_iiff\00.nv.info._Z11srad_cuda_2PfS_S_S_S_S_iiff\00.nv.shared._Z11srad_cuda_2PfS_S_S_S_S_iiff\00.nv.global\00blockIdx\00threadIdx\00gridDim\00$___ZZ11srad_cuda_2PfS_S_S_S_S_iiffE7south_c__1225\00$___ZZ11srad_cuda_2PfS_S_S_S_S_iiffE6east_c__1227\00$___ZZ11srad_cuda_2PfS_S_S_S_S_iiffE11c_cuda_temp__1229\00$___ZZ11srad_cuda_2PfS_S_S_S_S_iiffE13c_cuda_result__1231\00$___ZZ11srad_cuda_2PfS_S_S_S_S_iiffE4temp__1233\00.nv.constant0._Z11srad_cuda_2PfS_S_S_S_S_iiff\00_param\00_Z11srad_cuda_1PfS_S_S_S_S_iif\00.text._Z11srad_cuda_1PfS_S_S_S_S_iif\00.nv.info._Z11srad_cuda_1PfS_S_S_S_S_iif\00.nv.shared._Z11srad_cuda_1PfS_S_S_S_S_iif\00.nv.constant2._Z11srad_cuda_1PfS_S_S_S_S_iif\00__ocg_const\00$_Z11srad_cuda_1PfS_S_S_S_S_iif$__cuda_sm20_dblrcp_rn_slowpath_v3\00$_Z11srad_cuda_1PfS_S_S_S_S_iif$__cuda_sm3x_div_rn_noftz_f32\00$_Z11srad_cuda_1PfS_S_S_S_S_iif$__cuda_sm3x_div_rn_noftz_f32_slowpath\00$___ZZ11srad_cuda_1PfS_S_S_S_S_iifE4temp__199\00$___ZZ11srad_cuda_1PfS_S_S_S_S_iifE11temp_result__201\00$___ZZ11srad_cuda_1PfS_S_S_S_S_iifE5north__203\00$___ZZ11srad_cuda_1PfS_S_S_S_S_iifE5south__205\00$___ZZ11srad_cuda_1PfS_S_S_S_S_iifE4east__207\00$___ZZ11srad_cuda_1PfS_S_S_S_S_iifE4west__209\00.nv.constant0._Z11srad_cuda_1PfS_S_S_S_S_iif\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00R\00\00\00\03\00\0A\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\A1\00\00\00\03\00\0C\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\CC\00\00\00\03\00\0D\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\D7\00\00\00\01\00\0D\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\E0\00\00\00\01\00\0D\00\02\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\EA\00\00\00\01\00\0D\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\F9\01\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00M\02\00\00\03\00\0B\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\9A\02\00\00\03\00\0E\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\C4\02\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\FD\02\00\00\22\00\0B\00\90\CE\00\00\00\00\00\00H\05\00\00\00\00\00\00?\03\00\00\22\00\0B\00\D8\D3\00\00\00\00\00\00`\01\00\00\00\00\00\00|\03\00\00\22\00\0B\008\D5\00\00\00\00\00\00H\08\00\00\00\00\00\00\E0\04\00\00\03\00\09\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\0A\00\00\00\00\00\00\00\00\00@W\00\00\00\00\00\00.\02\00\00\12\10\0B\00\00\00\00\00\00\00\00\00\80\DD\00\00\00\00\00\00\04/\08\00\10\00\00\00\15\00\00\00\04#\08\00\0D\00\00\00\00\00\00\00\04\12\08\00\0D\00\00\00\00\00\00\00\04\11\08\00\0D\00\00\00\00\00\00\00\04#\08\00\0C\00\00\00\00\00\00\00\04\12\08\00\0C\00\00\00\00\00\00\00\04\11\08\00\0C\00\00\00\00\00\00\00\04#\08\00\0B\00\00\00\00\00\00\00\04\12\08\00\0B\00\00\00\00\00\00\00\04\11\08\00\0B\00\00\00\00\00\00\00\04#\08\00\10\00\00\00\00\00\00\00\04\12\08\00\10\00\00\00\90\00\00\00\04\11\08\00\10\00\00\00\90\00\00\00\04/\08\00\0F\00\00\00\16\00\00\00\04#\08\00\0F\00\00\00\00\00\00\00\04\12\08\00\0F\00\00\00x\00\00\00\04\11\08\00\0F\00\00\00x\00\00\00\010\00\00\01*\00\00\04\0A\08\00\07\00\00\00@\01@\00\03\19@\00\04\17\0C\00\00\00\00\00\09\00<\00\00\F0\11\00\04\17\0C\00\00\00\00\00\08\008\00\00\F0\11\00\04\17\0C\00\00\00\00\00\07\004\00\00\F0\11\00\04\17\0C\00\00\00\00\00\06\000\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00\10\08\00\00\88\08\00\00\04\1C\04\00\10W\00\00\04\1E\04\00@\00\00\00\010\00\00\01*\00\00\04\0A\08\00\0E\00\00\00@\01<\00\03\19<\00\04\17\0C\00\00\00\00\00\08\008\00\00\F0\11\00\04\17\0C\00\00\00\00\00\07\004\00\00\F0\11\00\04\17\0C\00\00\00\00\00\06\000\00\00\F0\11\00\04\17\0C\00\00\00\00\00\05\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\04\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\03\00\18\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0!\00\03\1B\FF\00\04\1D\08\00\90\07\00\00\08\08\00\00\04\1C\04\00\88\CE\00\00\04\1E\04\00\80\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\04@\00\01\00\00\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveB\82\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\F0\09visible .entry _Z11srad_\08\01d_1PfS_\02\006iif\A7\04\00\A1\00\0F,\00\0B\0E\8F\04\0F4\00\16\1F14\00 \1F24\00 \1F34\00 \1F44\00 \1754\00/324\00\13\1F64\00 \1674\00\1Ff4\00\15\1F8\FF\04\13_6[144\00\05\15\A5pred %p<19\02\05\00\90\00{%f<166>&\056165&\00\00I\00^fd<10;\05\114e\10P\09.sha_\00\03\AB\00\124\AB\00\1FZ\EB\00\0A\CFE4temp[1024]D\00& 11E\00\7F_resultL\00,o5northE\00-?souE\00.O4eas\CE\00-64weD\00\0F\DE\06\08\1F6\DE\06\12\02*\02\00\8B\05\0F\C0\02\13\1E]\1A\07\0F<\00\15\1E7\AD\06\0Fx\00\16\1F6@\06\00\1F6=\00\15\1F5=\00\00\1F5=\00\15\0F\B8\07\01\1F4=\00\15\1F3\F5\07\01\0F=\00\15\0F\89\07\01\0Fm\01\16\0Fq\07\01\0Fn\01\16#0]<\02#tof\17\04C\00\117\1F\07\04[\02\0A\1C\00\118\1C\00\1F7;\00\05\119\1F\00\1F5;\00\02!10\1D\00\1F9<\00\05!11 \00\1F4=\00\03\122\F7\07\1F1>\00\06\143\D5\07\0F>\00\01\124>\00\1F3>\00\06\145@\08\0F>\00\01\126>\00\1F5>\00\06\147\94\08\0F>\00\01\023\01/17\C8\08\03\1F8\C9\08\02*16\17\00\03\CA\08?d14\CC\08\03*12\18\00\03\CD\08:d10\18\00\134w\00\1A8\FC\08\1F46\13\02\155\12\09Est.f\16\00\01\8B\00+f1(\09\8A%ctaid.xC\00\156\1F\09\06+\0E\154-\00\1By-\00\02\CD\00\184-\00\00\98\01\1FtX\00\00\02\B1\00\185+\00\136+\00\0BV\00\127\C6\00\116+\03\02*\00%7,\F7\00\07\16\00%8,\87\00\A1;\0Amul.lo.s\1A\00\229,5\00 %r8\01#hlh\07\02\D1\02G9, 4F\00\00\C6\02\04\FB\00\09-\00#2,\1E\00c4;\0Aadd[\00\02\B7\02\02J\00(12H\00%4,\C0\00\0B\8F\00%15\90\00\1A1M\00&6,S\00\185M\00%7,8\01\091\00&8,7\00\1B78\01\03=\0B\188H\00\1F9:\01\02/20;\01\06321,8\00\00'\00\08\11\01322, \00\08?\01/23?\01\03324,\1E\00\09?\01625,K\00\09H\00\1F6\F2\00\03627,7\00\00e\03$ub\1A\00%8, \00+19U\16\03\BC\10\08\0C\01\192\0C\01\06\AA\00\113\CC\01\1B2\17\02/31#\01\06532,7\00(314\00\0F\0C\01\04\113\0C\01\1C3\0C\01635,N\00\0A\1A\00&6, \00\180b\00\0F\18\02\04638,7\00\0B\18\02\128\A6\03\183\0C\01\1F3\18\02\03\1F4\18\02\07341,8\00\00'\00\08\DE\00\114\C1\00\1B4\18\02\1F4\0C\01\04\114\0C\01\1C4\0C\01645,K\00\09H\00\1F6W\03\06'47\93\00\196N\00&8,T\00\197\1A\00#9, \00\1C-\88\18\02\A2\04'49}\00/50'\01\02\1F5\1C\02\07352,8\00\00'\00\08\F9\00353, \00\08'\01/54'\01\03355,\1E\00\09'\01656,K\00\081\04/57'\01\06'58\93\00\09\0D\01659,T\00\188\1A\00360, \00*16'\01\139\9E\05\03\96\02\033\07%9,\C7\06\01\18\00\02F\00\15dh\04\158\DE\00\032\09$1, \00\132x\00\03\19\00$2,Q\00\01'\00\01N\00\02\0F\0B\00f\09\01\22\00\0Ac\00\193\F9\00%ov\95\09/4,\84\0C\14\03\06\08\02\F7\0B\05?\00\03a\08\1A4\B8\00$6,u\00\1A6\B8\00(7,6\00\196\A3\00\188\B5\03\08\06\01\03\E6\00\1C8\06\01\02\\\04\05U\00\179\D1\07\00\1D\00\01\D0\07\182\81\01/31\81\01\03\00]\04\04\05\04\09{\00\00\8D\04\03 \00\0B{\00$4,Q\00\01'\00\07\81\01\223,\80\00\1A4c\00\1F5\81\01\04\00\8E\04\0F\C0\0D\13\0F\81\01\02\2237\82\00\1A6\B8\00$8,u\00\0A\81\01\02\07\04\056\00\188\A3\00/40\81\01\04\02\D5\04-d4\87\02\02\89\04\04U\00)41\81\01\114\03\03'f3\7F\03\196\12\04rsetp.ne\7F\003p1,!\00\F2\0C0;\0A@%p1 bra LBB6_2;\0Abra.uni\10\0021;\0A\08\00\17:\DB\01+55\DB\01\03\13\09\1F79\04\03378,\1E\00\08g\04/798\06\03680,4\00\02\E9\0B\00\03\01\049\01\02\1B\04)805\01\00k\04\03\1C\00\0A5\01\02i\04\22d5\04\03'57;\02\125;\02)58;\02(59;\02\07\98\01\02\7F\04\01 \00\196T\02/61\D5\03*\126\D7\01)61\B8\00863,\1D\00\1D0\07\01$64 \01\08\A4\00$5,\1C\00\0A\07\01866,V\00\195<\02\126\8D\0B\1B5\FA\01\135\FA\01'2:T\02\192f\06\067\0B\00Y\00\15n\91\0B\07\B5\05#4,\1F\00,-1\85\02#2,R\00\00'\00\01\88\02\162\88\02\1B4\8E\00\133\8E\00\183\88\02/43\88\02\02(65#\07\07\A6\00\1F6\A6\00\05#7,\1F\00+-1\AA\06368,R\00\00'\00\08\D6\02369, \00\09\D6\02\1F0\04\03\04#1,\1E\00\09=\07572,K\00\1C7|\00$73|\00)155\00%4,;\00\187\F3\03/75S\03\03676,7\00\02\82\0E\00C\01\036\00\12d\00\09\1A7\D6\04\02\AA\08-d4L\02\00\FE\08\03|\01\01'\00\07S\03\124S\03)46S\03/47S\03\04\02\13\09\01 \00\0AS\03/49\A7\05*\02X\08:d49\0B\04(1,\1D\00\1E8Z\04\142 \01\08Z\04$3,\1C\00\0BZ\04(4,V\00\193S\03\115\E0\0A\1Cf\C5\02\134\C5\02/4:k\03\03\C95:\0Abar.sync \CD\08/67L\07\03\00~\02\04*\0A\0A\0A\04\02&\06\1D6\C7\07\00\82\02\03Q\00\01'\00\07\BE\01\126\BE\01)70c\00/71L\07\04\00\80\02\0F\83\14\12\0F\A4\01\02\02\DC\029d72\B7\00\02\A3\02\01t\00\0AK\07\00\DA\02\076\00\194\A2\00\1F6K\07\04\00r\06\03 \00\0B\05\01(8,U\00\197\BC\01\117u\0B(f6\80\01\1F7M\0A\04\00\AC\06\04\83\0A\09{\00\128\17\02\1D8\C6\07482,Q\00\01'\00\07\80\01\127\80\01\1B8c\00\0FM\0A\05?84,G\16\13\0F\80\01\02\128\FC\00\1A8L\0A\128\DC\03\1D8L\0A887,6\00\09L\0A\1F8L\0A\05\138\E5\00\0D\85\02890,U\00\0AL\0A\129L\0A\187V\11\0A\10\11\09\CB\08\01\12\02\158\CB\08\163C\06\1B7~\03\136f\03\186C\06/10D\06\03/96g\0D\02/97\E1\11\07#8,8\00\00'\00\08\FC\05\119\08\01\1B9*\06?100\9E\11\07&01M\009100p\11502,U\00=101\C8\042104\1B\00\0A\\\03E105,\1F\00\0A\D3\05E106,\0D\01\02+\00\07\98\02\129\98\02O106]l\00\00\157\A3\00\07\9C\02?108\1D\04)C109,G\00\0A\C3\00\03\FF\15\01v\00\0A\A1\02\131\FA\15\05;\00(10\A4\02?112\A5\02\04\131\F5\15-11\E3\05\141\F3\15\04\\\00\1A1\E6\05\2211\E7\05\1C94\0B\130j\02\177;\09(82\C4\02\06\95\08\178;\09\19x\08\0B\01}\00\1F8;\09\02#4,R\00\00'\00\01\F8\02\164\F8\02\0C\8F\00\138\8E\00\188\F8\02/91\F7\02\02/85\D7\14\03\1F6\F7\02\06\02\C5\03\02\19\04(86\F7\02\00\F8\03\02 \00\09\8E\15)89\F1\00\07'\03#0,\1F\00\09#\09\00\AA\00\05\1D\00\081\12/92@\03\06&93\95\00)92N\00&4,T\00\193\1A\00#5, \00--1#\09\129t\03\0A\1C\08\02f\00-d99\02\02e\00\12df\00(d9\B1\0E\128M\03.94L\03\159\AF\00\07J\03/96\E6\05)\2297\82\00\0A\DB\09\02\AA\04-d9\B1\0E\02T\04\056\00\09\B1\0E\06\AF\04\0DB\03\02\AD\04\02\22\00\0CY\04'2,Y\00\01\AE\04\08A\03\120\B7\0E\1B8\B2\02\139\B2\02/9:Y\03\04/10*\09\05?115\AB\07\03%11\B6\13\1C6\02\04%7,\22\00\0C\02\04%8,V\00\02+\00\07\CC\01#10\1A\05+18k\00\1F9\B4\07\04\101^\12\0F\16\1F\12\0F\18\05\03\03\A4\12:120\C2\00\04\A3\12-19\18\05\00\18\12\09;\00\192\AB\00/24\D6\01\05\038\12.12/\06\03s\12\06\\\00\0AQ\0E#12\DF\19,0;\A6\01\05w\00\0F\14\0C\05\121C\17\02\22\00\0C\CB\00\1B9\CB\00\0A\A1\02\1F3\A1\02\06\03:\12.13\A1\02\02\EB\16\06\\\00(31\E1\01\131\E1\01\01L\02\08\C1\1A\121\0E.9f11\D2\07\09\AC\09\0A\A8\08$5,\22\00\02\A9\08\165\B1\05,13A\06\05v\11(1:_\00\0A\AC\01\09_\00$6,\22\00\02_\00\1F6_\00\09\142_\00(2:6\13/61\95\01\04\133\88\10\01\22\00\1B6R\13\1F3\FF\10*\123\22\10\00H\00\0A<\13\03\CC\10\06 \00\09\B8\02/36\F5\0B\05\133\DB\0C-36\CE\03\123\CD\0F\07\\\00\0A\CE\03\03\9C\05\113S\00\07\18\00&9,\F5\01\02\EE\19%rn\22\02$0,<\00\01+\00\0A'\02\139\09\03(10'\02\0Ad\12\08{\1B$0, \00\0E\D7\09\123\D4\0D:160\81\01\1F7S\04*\123\99\10\00G\00\0AG\01\02_\10\02v\00\0B\D7\14\03\86\0D\06;\00\0A\9B\01/74\9B\01\05\03q\0D-37S\04\123\A3\10\07\\\00\195\88\03\03\89\03\113\BC\05\07\18\00\1F2\9B\01\09$3,<\00\01+\00\0B\C2\03#08\9C\01\1A3\BE\00\1F7<\16\05\1F7\D4\0A*$37\0E\16\1B8>\16%0,y\00\0Bi\01\03F\0E\05;\00)80\AB\00/82i\01\05\01U\0A\03\22\00\0BL\17\02W\0A\06\\\00)83Q\01\03\C3\11:384i\01\1F5i\01\09$6,<\00\01+\00\0Ci\01\130i\01\1A6\BE\00\1F5n\04\05\03\86\0E.38R\17*87{\02(86Y\03/61\A3\1E\04\02\C0\04\01 \00\0FY\03\00\138\C2\1E\1B2\87\00\04v\01\0E\0C\19\03\C1\0E\05\8F\00)89-\01\03\CA\0F:390E\01\1F8E\01\09$9,<\00\01+\00\0CE\01\134E\01\0C\DE\0B$38\CD\05\193,\06\1F5\8B\06\07$7,\22\00\02,\06\177,\06\1C6,\06\04\B4\12)14_\00\1F6\8B\06\07$8,\22\00!15`\00\1F8`\00\09\04\FC\12)15\8C\06\1F3\8C\06\06\03\B2\07.33\8C\06\1F3\8C\06,\04i\1A\1B3\8C\06\02\F6\1E\07 \00\0A\F1\04\1F3\8C\06\06\04\FB\19\1E3\8C\06\046\1A\05\\\00\08\8C\06#96B\02\193\8B\06/97A\02\07\01\D5\0B\019\00\00(\00\0F\87\06\02)98\86\06\1F5\86\06\04\02b\19\01 \00\0F-\03\00\02\80\1A+15{\01\1F4\86\06+\03\F6\1A+34\86\06\04\F5\1A\1E3\86\06\03\81\16\06;\00\0A\95\01\1F4\86\06\06\03\DA\16.34\86\06\03\DC\16\06\\\00\09\08\11\149m\1C\0A\85\06/00\D7\03\08\01\1C\0D\22f9\DD\03/00\84\06\04*01\BC\00\0F\1E\0B\05\1333\17.34-\06+49\10\01\0A\EE\01\0F\BE#\05\02\22\1B\01 \00\1F-\EF\01\00\135\93\08\0B\B5\06\03\F7\16-35R\0B\123G\17\06\90\00)51-\01\033\1F:352E\01\1F3E\01\09$4,<\00\01+\00\0Fa\06\04*04\05\1E\0F:\15\05/35;\15*\133N\1C;354\15\01%6,y\00\0B\9D\01\03\A1\1C\06;\00\0A\0C\07/58\AD\02\05\02\10!\02\22\00\0C\CE\0A*0,\\\00\09\85\06$05\CE\0A\0A\85\06/06i\01\09$7,<\00\01+\00\0F\85\06\04\1C0[\15$37\C6\05\196&\06\1F7\85\06\07\03\BF\14\157&\06\179&\06\0DR\13\05`\00\197`\00\1F8\86\06\07\03\0F'\00#\00\03a\00/10b\00\09\05G\07\188b\00/51%\05\05$2, \00\0F7\03\01\02\13\11*15\CD\11?302&\05*\03\9F\15\1B3\DA\15\123\F1\15\02v\00\0By\02\03\F5\15\06;\00\0A$\03\1F0\BB\06\06\02\D9\15\02\22\00\0C\0A\22*8,\\\00\08\BB\06\148B\0A\190\BB\06\1F8A\0A\08\01\F0\09\019\00\00(\00\0F\BB\06\02\0Ar\18?309\DB\03\05/10\E2!+\03\EB\15+31\89\06\03<,.30\89\06\046,\05;\00\0A\89\06\1F1\89\06\06\03U\12.31\89\06\03/,\06\\\00\08\89\06\148`\0A)16c\01\0F_\0A\08\01\CC\0A\019\00\00(\00\0F\86\06\03*89\B8\00\0F\85\06\06\03\15\13.31\85\06*19o\02\0A\FA)/53\85\06\05$4, \00\0FN\03\01\132\85\06\0Bp\05\03\08\12.32\85\06\03\11\13\05\90\00\09\B6%#90@\01\192\84\06/91^\09\08#2,9\00\00(\00\0F\80\06\03\1A9\F8\01\0F\06&\06/32\7F\06+\03\99\13,32\0F\01\034\02\1E2\7F\06\03\B6%\06;\00\0AO\03\0F\0E&\06\133\17\13\1F3\10&\00\03\11&\06\\\00\08\7F\06$93\C1\0A\09~\06/94c\01\08#5,9\00\00(\00\0Fz\06\03,95y\06\04\D5\1B)19\19\06\1F9y\06\07\02\A7\03\01#\00\04\19\06\08\00%\0C\01%\142t\16(20b\00/10{\06\08\03\A7-\150`%\1F2a\00\09\04\ED\13\192\ED\13/47z\06\04\02\03\0A/14\B1\09\03\122\1A\0F:148z(\1F7z\06*\132\97\11\1B2\B0\1F\132\B1\1F\1E2\B2\1F\132\ED\11\06;\00\09z\06\1F2\B6\1F\06\132\B7\1F\1E2\B8\1F\132\B9\1F\06\\\00\08z\06\147\8E)\197z\06\1F7Z\0A\08\01\BF\00\019\00\00(\00\0Fz\06\02\0C\B7\00\0Fz\06\05/28z\06+\132\91\11\1B2\DB&\1322 -27z\06\132\E7\11\06;\00\1A2\E4)\0Fz\06\05\132\C8\1F-28z\06\132}\11\06\\\00\08z\06#75c\01\198z\06/76c\01\08#7,9\00\00(\00\0Fz\06\03*77\B8\00\1F7d\01\06\0F\83\13*$28\DD*\1B8\F4*%0,y\00\0Bc\01\03\C4\1B\05;\00\199\83\13?292H+\06\03@\01\0E\1E\1C\132\1F\1C\05\\\00\09!\1C#78c\01)94c\01\0F\85\16\08\01B\14\019\00\00(\00\0F\9D\06\03\1B8\B8\00\0F}\13\05\122t .d2\1E\1C:297\D2\03\199}\13/49\DD\07\05\02\BE\07\1F4\D6\16\02#29\80\0E\0B\F6\02\032\1C/29\89,\00)0,\8F\00\189y\06$81\7F\0A\09y\06/82?\01\08#3,9\00\00(\00\0Fy\06\03,83y\06\05\1F)\192\17\06\1F1y\06\08\02T3\161w+\07\AD\22,24\17\06\04\F2(823:\BA\01/43\BA\01\05\02\C7'\02\22\00\1B6x.\1F5y.+\03\18\11:245\12\02\02\EF0\07 \00\0A-\05\1F4f.\06\03\1D\10-24\DF\01\132\A1'\06\\\00\09\A4&\03\BB\09)25\DF\01\1F6\BB\09\08\01\FC\15\019\00\00(\00\0F\E4\05\02\1A655\0F\F4\0D\05\02\8D\12/14F\16\02#25y\07\0A\F3\0D/25y\07+\03\80(+25y\07\03\7F(.25y\07\03d\10\06;\00\0A\95\01\1F5y\07\06\03I\10.25y\07\03H-\06\\\00\08y\07$63\95\01\09y\07/64\95\01\08#5,9\00\00(\00\0F\16\06\03*65\B8\00\0F\9A-\05\132\09\11.25\22\07*61\0C\01)60\EA\01\0F\90\0C\05\02K\03/14\90\0C\03\132O\18\1C4\891\04e\00\0E\F3\05\03\C6\1C\05\90\00(63)\01\03\EE\159264@\01\0F\EE\15\08\01\CA\1C\019\00\00(\00\0F\F3\05\03\196\1F\1F?265@\01\06%6,\22\00\0C@\01\1B7@\01\0A\F3\05\1F5@\01\05$6, \00\0F?\01\01\03\C2\0A\1B6\87\00\03\14\04\1E6\14\04\03\B6*\05\8F\00\0A\B8*\149\F8\09\09\14\04/70\95\08\08#1,9\00\00(\00\0F\F3\05\03,71\F3\05\04\0B\19\192\0B\19/12\0A\0C\08$4,#\00\04l\0C\07\A9%,26\F4\05\05U\06\185b\00\1F3\AA\17\05\02]7\01 \00\0F\A7\02\01\02`\10+13\0D\06/16\92\04*\03.#+21\BF\01\03\16\10.21F\02\05;5\04;\00\0A\9A\02/20\92\04\05\04\945\0F\955\01)2,\\\00\09\E2\0F\144E\09\0A\E2\0F\1F4E\09\08\01w\06\019\00\00(\00\0F'\06\02\195=\04/37\96\01\05$8, \00\0F\FD\02\00\024#;138\EA\00\03!\04\0F\C75\00*25>\01*24>\01\0F\D0\05\06\04\AC5\1E2\D0\05\03f#\05\\\00\192\1A3\141>\01\09\D0\05/52>\01\08#3,9\00\00(\00\0Fj\1F\03*53\B8\00\0F\D0\05\06\04k6\1E2\D0\05*31\0C\01\1A3\93\01\0F\83\0A\05\00\9E\1A\03 \00\0F)\03\01\133\D0\05\0B\84\0A\03\916.23\D0\05\03\0A\1C\05\90\00\09\956#54@\01)34@\01\0F\03\15\08\01;\18\019\00\00(\00\0F\D0\05\03\0A4\18\1F2\E56\06/234(*$23\\\01\0B\E1\03\03\B2\1C.23\E1\03\03\956\06;\00\0A\E1\03\0F\ED6\06\132\AD\1A.24\E1\03\03\FB\1B\05\\\00(41L\01\03\03\15)24\E1\03/58c\01\08#9,9\00\00(\00\0F_\1F\03\1D5^\1F\05\86\0B\09\D9\18/13\E7\0B\08$5,#\00\04\F4\05\07\EC%,28\F4\05\04\DB\18\192\DB\18/29^\04\05\03\E7?\0F\CB\02\03\121\92 +13\90-/88x(*\03\FE\0E\1B19\0F\1319\0F\1E19\0F\131\DD\0E\05;\00\0A9\0F\1F19\0F\06\1319\0F\1E19\0F\1319\0F\05\\\00\099\0F\133F\09\1A19\0F\1F3F\09\08\01\D2\02\019\00\00(\00\0F4\1F\02\0A\CA\02/19u\03\05/19[:+\131\CE\0E\1B1\AC+\131\AD+\1E1\90\0F\141V+\05;\00\0Av\03\0F\B1+\06\132\B1+.20v\03\03\B1+\04\\\008201L\01\04j\09\1A0W\07\0Fj\09\08\01\E1\03\019\00\00(\00\0F\FF\1E\03*41\B8\00\0F\E2\0E\06\03\E8\1A.20%\07)05o\029204\19\06\0F\19$\05\03\FA\05\1F3\C7\1B\03#20a\09\0B}\0D\03\1C\1B.20Y\07\03\1C\1B\04\90\00\192\1C\1B$42@\01\09Y\07/43@\01\08#4,9\00\00(\00\0F\19\06\03\0Bv\0F\1F0Y\07\06\03?1.20Y\07*11@\01\0A\B2(/33@\01\05$4, \00\0F\81\0A\01\03^\01\0B(\0D\03\D5\1A.21X\07\03r1\04\8F\008213(\01\04~\14\191\E2\03/46?\01\08#7,9\00\00(\00\0F\CF\1E\03\1D4\CE\1E\04 +\192\0C\1E/14\E9\0B\08$6,#\00\03\DC\11\07\81+,30\F4\05\04\E1.\192\B6\18/23\F4\05\04\02\1D\0A/12\8F\0E\03\010*\01&\00\0A\92\04\1F6l.+\03\AB\0E+16l.\03\0C,\1E1n\0F\131\01\0F\06;\00\0Al.\1F6l.\06\03\91+.16l.\03\9D\0E\06\\\00\08\90\16#24\F4\05\196\90\16\1F2j\09\08\01&\1B\019\00\00(\00\0Fm\1E\02\192\E5\0E/25\96\01\05\02\F4\01\1F2\E5\0E\02\121\AB\0E;126\EA\00%8,\1F\00\0C>\01\1B9>\01\0A{\06\1F7\DF.\06\03\0D).17\DF.\03\C6+\05\\\00\197\BEB\03E\099172>\01\0FE\09\08\01\10\1C\019\00\00(\00\0F\CF\05\03*29\B8\00\0F\08\1D\05?174%6*\03\05\1A;174\0F\01%6,y\00\0Bc\01\03[\1A\06;\00\1A6\AB\00\1F8c\01\06\04@\01\1D8c\01\03\C0+\06\\\00\08\9C\10\143#2)80\95\08\0F\B0\14\08\01\93\06\019\00\00(\00\0F\F2\05\03\0A\81(/18-)\05\131=\1A-18'\14+18\AD\03)82\F6\02\0F0&\05\02:\0E/12\DE\0E\02#18\AA\04\0B(\0A\03p\1A.18\E0\03\03p\1A\05\8F\00\09p\1A$33?\01\09\E0\03/34?\01\08#5,9\00\00(\00\0FG\1E\03\1D3G\1E\04\84\1D\1A3\E5\1D\0F\FB\03\05\04\EF\05\0Fy\11\03#13\EA\17\0Ay\11\01-\07\0F\91\05(\03**\0CO\07\03\06\12\02v\00\0B\F0\02\03\90\0D\06;\00\0A\F0\02\1F3\F0\02\06\03u\0D.13\F0\02\02L\0F\07\\\00\09w&\03\E2\08*14v&\0F\DE'\09#4,9\00\00(\00\0F@1\03\0A\1DM\0Fy\11\05\03\8AL\1F1\9B\02\03\02k\0E+11\9B\02\03g\0E.14\22\03*43>\01\0Ab*/14\CF\06\06\03b*.14\EF\02\03*\19\05\\\00\09b*$15>\01\0Ab*\0F\B3'\09#7,9\00\00(\00\0F_*\04\09_\1D\1F1^*\06\03[ \02\22\00\0C\0C\01\1B9\0C\01\09\D9#/19.\04\05$0, \00\0F\BA\08\02\03\CD\0E\0B\CF6\03^*.15\D1\06\03^*\05\90\00\0A^*\03\F7\13\1B1]*\0F\F7\13\08\01\A1\00\22f1\BF3\0F<\1D\04\1A2\BB\0E/53@\01\05\03\95\19.15\8A\03*55@\01\195\D3\02/21@\01\05$2, \00\0F?\01\01\03\82\08\0B\E9=\03\C8\19\1E1\C8\19\03\EE+\06\8F\00\09\C8\19$21?\01\09\9F\0C/22?\01\08#3,9\00\00(\00\0F<\1D\03/23n\05\06/1:z\0B\04?32:\89\11\04?33:\97\17\04?34:\A4\1D\04?35:7$\04?36:\CA*\04?37:i1\04838:\F1\04\05\03B\0A\B4\03\05\D3\01\00\86\03\02\9F<\06\B5\03\01\D7\01\02$\00\01\07\00X;\0Afma \00$3,\\\00\07\07\00\192`\00&4,\A6\02\0D@\00$5,$\00\07\07\00\09\EA3\057\16\00\A7\01\0D@\00$7,$\00\07\07\00\09\AB\04\0B&\0A\0B\E0\00$9,$\00\01\07\00W;\0Adiv \00\02\DE\08\05g\00\0Cx\03\131\F7\05)30D\06\1F1j\01\03)32j\01\00\EF\02\07j\00$3,<\00\01+\00\099\00\0AC\01\0B9\00(5,@\00\1949\00\0A<\01\0B9\00(7,@\00\1969\00\0A5\01\0B\15\01(9,@\00\0C^\0B\132\E6.\0Ap\07\07|\11\04\9FA3f64\1E\00\12d\F6\00)402\00&1,R\00\0C\B9\01\02i\10#149\00\1D1R\00\03 \00\1629\00\116\1C\\#3,\1E\00h0dBFB0\01\00\06]\02\02+\00#4,\9B\00Y0d3FE+\00\01\1A\00\02aW\03\95\00\035\00\020\07\01'\00\18]\BCF\154\01\18\04>\11\05\85\00\1F4\BCF\04\134\C1!.42Q\03\03\102#42Q\03\0B\A6E\144\A6E\1A6\9D\00\192\DC\0A\08\9E\00\04@\06\0C\9D\00\1F8\9D\00\06\04\FF\00\0D>2\134\E4!#42v\01\0BQX\134RX\04C\18\04\9D\00\193\B2\0B\08\0AT\04\B9\03\0A\8BG?432\9D\00\05\03\DE!.43B\06\03\DE!#43\E2\01+33\9D\00\03v\0A\09\C6'\06\DF\18\02\88\0C\08\9D\00\04\F7\02+24\9E\00\1F6\9E\00\06%7,\22\00\0C\9E\00%8,V\00\02+\00\0B\9E\00\04\BC\03\09\B4\0C+65>\0D\05\9D\00\05\CD\06\0B\B7S/40\9D\00\05\04\A4X\0F\A5X\00\03O\13\144\A7X\0C\A8X\03\A9X\01\C7\19\0F\EEj\14\1B2\08\04\0E\EFj\0F-\00\09\0E\F0j\0F5\00\17\1F15\00!\1F25\00!\1F35\00!\1F45\00!\0E\F5j\0F5\00\10\1F65\00!\0E\F7j\0F5\00\10\1F85\00!\1F9-k\13\227[\10\0E\0F-k\1A\1C7,k,43+k-81*k\1D6)k?191)k\16\0D\E8\00!E7:\22._c\13j\0FH\00\1B\106\F7%\0FG\00/211c\1B\00\0E\C1k\0FM\00 \133M\00\0E\C4k\0FO\00!\09Ul\0F\F7j\08\1F7\F7j\18\0ENi\0F\8B\02\06\1E]4k\0F=\00\0E\0F5k\1A\0Eo\03\0F6k\18\0E\E1\03\0F7k\19\0ET\04\0F8k\19\0E\C7\04\0F9k\19\0E:\05\0F:k\19\0E\AD\05\0F;k\19\0E \06\0F\1F4,i\0C\1E6,i\0F\98\0C\16\0F/iV/72/i8\1F4hn\02\0A5f/415f\04\06dn/-1`i\02\02)%*42ci\1B7ci\137ci?7_1\DBf\02\04\12\02\0B\C3n\0B\DBf/44\A6\00\05\02qn\1F4\DBf\00$46\F7n\00'\00\09\11o#7, \00\09?o\1F8\F8\04\03\06\C0n\0A\18n&0,K\00\1C9\01n\141|\00\0A\DBf\0C\ACn\06Mn\0F\1C-\04\02s#\037\00\1F3\DBf\04/54\DBf.\1F5\DBf\0C\1F6\DBf\16\0FV\03\18\0F\DEf?/53\DEf7\1B5\C8\02\132\C8\02\1F2\9C]\05/55\E6\04\03.56\C6f\0F\D7k&\1F6\D7k\0C\0B\E6\04\0E%/\0F6\11\07\0F\C0k\03\02#\01\0B&/(62.l\0F\D9k\00\036\00\09\CB\19.64|l\0F\D8k/\08b\0B/55\C0k\03/56\85b\04$57\8Fq\0F\C0k\04\03\15r\1A7\C0k\1B7\C0k\137\C0k\1A7\C0k/67\E5\04\02/58\E3r\03\1F9\C3\09\06\03\1Cr\02Vr\199\A6k\02\EBn\1B0\85b/62\85b\04$63\96l\1F4\C8l\03\00S\00\0Ell\0F\C5\09\01&66\95\00/65pl\00\02T\00\09\DF\09\03ol\1F7\85b\02\03\1D\00\1F8\EAi.\177\EAi\0B\85b$71\AF\00\0E\EBi\0F%\03\12\0F\EEiV/72\EEi8\1B7\AF\02\134\AF\02\1F4\CC\04\05/79\CC\04\03\1E8?\0B\0F\12j'\1F8\12j\0C\1E6\12j\0F\BB\15\1B\0F\1BjV/72\1Bj8\1F8Wb\05/91\8F\07\04\03\BD\1F\1D9j\02(93\BD\00\0AZ\1F\1F9\8F\05\05\035\1F\1D9\8F\05\03/\0F\C6\02\05\01\BE'\0F#\09+\07\81/\09\A2E\09~/\0C\E1.*7,;\00\0A+1/485/\06%9,\22\00\0Cn-*0,\\\00\08\85\05$23n-/0]x.\04\1D3\EB6\0E`\01\07x5\1A4\A7.\1F1a\01\05\1F5\CF\05-\02\DB'\01J\00\0F\01/\04\1F1\01/\06\05;\00\0B\AE\00\0E\D7\05\0F\CE.-\185\CE.\0F\99.\02\0C\C2\0A\141\F7\05\08\8B?/71\D5e\0C%714\03\1654\03\1B9\\\00\138[\00\09=.\1F84\03\0C/181w\01/761w\01\0F_M\02\02c-+r8\0F@/28*\062\06\92g\0B\F05/30\C5g\06+31qg\0A]1/32\08\02\05\03\F3\10\1E1\F4\10\03\86<\07\E8g\1D3\FE1\01&\00\0Fh\03\04\191\97\03\1F0h\03\0E\0B\B2\00\0FA\11\05\01'.\0Fh\03+\05d5\1B6\0C\01\168N2\0C`\01\1B9\9F5\0AF:\0FI\11\06\03 5.d1K\11\05$5\04\\\00/4162\01/42h\03\05\1C1h\03\04\0E\0C\08\FF\05\06/12\9F\06.\06sp\0B\89l\07pp\0C\DFp*5,;\00\0B\AF\00\0E?\05\0F\DDl-\185\DDl\0F7\03\02\1E5\A46\0F7\03\08/16$m\04\1E6$m\0F\9D\04\18\0F-mQ\0B\DF|\0B\A2\18\06\DF|\0F\80\05\01\02W\09\1F8\\m*'ld\EE7\04bm\0F\9C_\04\0C\AC\09$12\0F\03,1:\BBn\0F\0F\03\09\08\9C\19/73z\06\03\03Rz\0F\FDA\02\02}*\1A7\94@\1F9x\063\04\A8p\0BV\01\03\10-\1E9u\06*01\87p\0A\C2\05\1F0t\06\06\05\10u\0Et\06\03\8CD\06\FDp\1F39n\01\1F0t\06\06\1D16;\0F=\03\0A\0A=\0B/05\CCp\05\05\CAu\0E\1A\06*07\05\01\1F6:|\01/72:|\01\0F\E3@\03/088|\00\07\BCu\0Eq\04(0,\8A\00\01/m\0B$<\01&\00\0F\DC\02\05\1F3\DC\02\06/2:b\06\04\00\16i\0F\E4\09\03(14\BE\0C\1F6\1E)\03/15\1E)\04/16\E9\0F\05\079C\0E\CDB\07 -#include -#include -#include - -// includes, project -#include - -// includes, kernels -#include "srad_kernel.cu" - -void random_matrix(float *I, int rows, int cols); -void runTest(int argc, char **argv); -void usage(int argc, char **argv) { - fprintf(stderr, - "Usage: %s \n", - argv[0]); - fprintf(stderr, "\t - number of rows\n"); - fprintf(stderr, "\t - number of cols\n"); - fprintf(stderr, "\t - y1 value of the speckle\n"); - fprintf(stderr, "\t - y2 value of the speckle\n"); - fprintf(stderr, "\t - x1 value of the speckle\n"); - fprintf(stderr, "\t - x2 value of the speckle\n"); - fprintf(stderr, "\t - lambda (0,1)\n"); - fprintf(stderr, "\t - number of iterations\n"); - - exit(1); -} -//////////////////////////////////////////////////////////////////////////////// -// Program main -//////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { - cudaSetDevice(0); - printf("WG size of kernel = %d X %d\n", BLOCK_SIZE, BLOCK_SIZE); - runTest(argc, argv); - - return EXIT_SUCCESS; -} - -void runTest(int argc, char **argv) { - int rows, cols, size_I, size_R, niter = 10, iter; - float *I, *J, lambda, q0sqr, sum, sum2, tmp, meanROI, varROI; - -#ifdef CPU - float Jc, G2, L, num, den, qsqr; - int *iN, *iS, *jE, *jW, k; - float *dN, *dS, *dW, *dE; - float cN, cS, cW, cE, D; -#endif - -#ifdef GPU - - float *J_cuda; - float *C_cuda; - float *E_C, *W_C, *N_C, *S_C; - -#endif - - unsigned int r1, r2, c1, c2; - float *c; - - if (argc == 9) { - rows = atoi(argv[1]); // number of rows in the domain - cols = atoi(argv[2]); // number of cols in the domain - if ((rows % 16 != 0) || (cols % 16 != 0)) { - fprintf(stderr, "rows and cols must be multiples of 16\n"); - exit(1); - } - r1 = atoi(argv[3]); // y1 position of the speckle - r2 = atoi(argv[4]); // y2 position of the speckle - c1 = atoi(argv[5]); // x1 position of the speckle - c2 = atoi(argv[6]); // x2 position of the speckle - lambda = atof(argv[7]); // Lambda value - niter = atoi(argv[8]); // number of iterations - - } else { - usage(argc, argv); - } - - size_I = cols * rows; - size_R = (r2 - r1 + 1) * (c2 - c1 + 1); - - I = (float *)malloc(size_I * sizeof(float)); - J = (float *)malloc(size_I * sizeof(float)); - c = (float *)malloc(sizeof(float) * size_I); - -#ifdef CPU - - iN = (int *)malloc(sizeof(unsigned int *) * rows); - iS = (int *)malloc(sizeof(unsigned int *) * rows); - jW = (int *)malloc(sizeof(unsigned int *) * cols); - jE = (int *)malloc(sizeof(unsigned int *) * cols); - - dN = (float *)malloc(sizeof(float) * size_I); - dS = (float *)malloc(sizeof(float) * size_I); - dW = (float *)malloc(sizeof(float) * size_I); - dE = (float *)malloc(sizeof(float) * size_I); - - for (int i = 0; i < rows; i++) { - iN[i] = i - 1; - iS[i] = i + 1; - } - for (int j = 0; j < cols; j++) { - jW[j] = j - 1; - jE[j] = j + 1; - } - iN[0] = 0; - iS[rows - 1] = rows - 1; - jW[0] = 0; - jE[cols - 1] = cols - 1; - -#endif - -#ifdef GPU - - // Allocate device memory - cudaMalloc((void **)&J_cuda, sizeof(float) * size_I); - cudaMalloc((void **)&C_cuda, sizeof(float) * size_I); - cudaMalloc((void **)&E_C, sizeof(float) * size_I); - cudaMalloc((void **)&W_C, sizeof(float) * size_I); - cudaMalloc((void **)&S_C, sizeof(float) * size_I); - cudaMalloc((void **)&N_C, sizeof(float) * size_I); - -#endif - - printf("Randomizing the input matrix\n"); - // Generate a random matrix - random_matrix(I, rows, cols); - - for (int k = 0; k < size_I; k++) { - J[k] = (float)exp(I[k]); - } - printf("Start the SRAD main loop\n"); - for (iter = 0; iter < niter; iter++) { - sum = 0; - sum2 = 0; - for (int i = r1; i <= r2; i++) { - for (int j = c1; j <= c2; j++) { - tmp = J[i * cols + j]; - sum += tmp; - sum2 += tmp * tmp; - } - } - meanROI = sum / size_R; - varROI = (sum2 / size_R) - meanROI * meanROI; - q0sqr = varROI / (meanROI * meanROI); - -#ifdef CPU - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - - k = i * cols + j; - Jc = J[k]; - - // directional derivates - dN[k] = J[iN[i] * cols + j] - Jc; - dS[k] = J[iS[i] * cols + j] - Jc; - dW[k] = J[i * cols + jW[j]] - Jc; - dE[k] = J[i * cols + jE[j]] - Jc; - - G2 = (dN[k] * dN[k] + dS[k] * dS[k] + dW[k] * dW[k] + dE[k] * dE[k]) / - (Jc * Jc); - - L = (dN[k] + dS[k] + dW[k] + dE[k]) / Jc; - - num = (0.5 * G2) - ((1.0 / 16.0) * (L * L)); - den = 1 + (.25 * L); - qsqr = num / (den * den); - - // diffusion coefficent (equ 33) - den = (qsqr - q0sqr) / (q0sqr * (1 + q0sqr)); - c[k] = 1.0 / (1.0 + den); - - // saturate diffusion coefficent - if (c[k] < 0) { - c[k] = 0; - } else if (c[k] > 1) { - c[k] = 1; - } - } - } - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - - // current index - k = i * cols + j; - - // diffusion coefficent - cN = c[k]; - cS = c[iS[i] * cols + j]; - cW = c[k]; - cE = c[i * cols + jE[j]]; - - // divergence (equ 58) - D = cN * dN[k] + cS * dS[k] + cW * dW[k] + cE * dE[k]; - - // image update (equ 61) - J[k] = J[k] + 0.25 * lambda * D; - } - } - -#endif // CPU - -#ifdef GPU - - // Currently the input size must be divided by 16 - the block size - int block_x = cols / BLOCK_SIZE; - int block_y = rows / BLOCK_SIZE; - - dim3 dimBlock(BLOCK_SIZE, BLOCK_SIZE); - dim3 dimGrid(block_x, block_y); - - // Copy data from main memory to device memory - cudaMemcpy(J_cuda, J, sizeof(float) * size_I, cudaMemcpyHostToDevice); - - // Run kernels - srad_cuda_1<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, - rows, q0sqr); - cudaThreadSynchronize(); - srad_cuda_2<<>>(E_C, W_C, N_C, S_C, J_cuda, C_cuda, cols, - rows, lambda, q0sqr); - cudaThreadSynchronize(); - - // Copy data from device memory to main memory - cudaMemcpy(J, J_cuda, sizeof(float) * size_I, cudaMemcpyDeviceToHost); - -#endif - } - - cudaThreadSynchronize(); - - //#ifdef OUTPUT - // Printing output - printf("Printing Output:\n"); - for (int i = 0; i < 20; i++) { - for (int j = 0; j < 20; j++) { - printf("%.5f ", J[i * cols + j]); - } - printf("\n"); - } - //#endif - - printf("Computation Done\n"); - - free(I); - free(J); -#ifdef CPU - free(iN); - free(iS); - free(jW); - free(jE); - free(dN); - free(dS); - free(dW); - free(dE); -#endif -#ifdef GPU - cudaFree(C_cuda); - cudaFree(J_cuda); - cudaFree(E_C); - cudaFree(W_C); - cudaFree(N_C); - cudaFree(S_C); -#endif - free(c); -} - -void random_matrix(float *I, int rows, int cols) { - - srand(7); - - for (int i = 0; i < rows; i++) { - for (int j = 0; j < cols; j++) { - I[i * cols + j] = rand() / (float)RAND_MAX; - } - } -} diff --git a/examples/srad_v2/srad.h b/examples/srad_v2/srad.h deleted file mode 100644 index 499c144..0000000 --- a/examples/srad_v2/srad.h +++ /dev/null @@ -1,15 +0,0 @@ -#define STR_SIZE 256 - -#ifdef RD_WG_SIZE_0_0 -#define BLOCK_SIZE RD_WG_SIZE_0_0 -#elif defined(RD_WG_SIZE_0) -#define BLOCK_SIZE RD_WG_SIZE_0 -#elif defined(RD_WG_SIZE) -#define BLOCK_SIZE RD_WG_SIZE -#else -#define BLOCK_SIZE 16 -#endif - -#define GPU -#define TIMER -//#define OUTPUT diff --git a/examples/srad_v2/srad_kernel.cu b/examples/srad_v2/srad_kernel.cu deleted file mode 100644 index d124fd7..0000000 --- a/examples/srad_v2/srad_kernel.cu +++ /dev/null @@ -1,257 +0,0 @@ -#include "srad.h" -#include - -__global__ void -srad_cuda_1( - float *E_C, - float *W_C, - float *N_C, - float *S_C, - float * J_cuda, - float * C_cuda, - int cols, - int rows, - float q0sqr -) -{ - - //block id - int bx = blockIdx.x; - int by = blockIdx.y; - - //thread id - int tx = threadIdx.x; - int ty = threadIdx.y; - - //indices - int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; - int index_n = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + tx - cols; - int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; - int index_w = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty - 1; - int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; - - float n, w, e, s, jc, g2, l, num, den, qsqr, c; - - //shared memory allocation - __shared__ float temp[BLOCK_SIZE][BLOCK_SIZE]; - __shared__ float temp_result[BLOCK_SIZE][BLOCK_SIZE]; - - __shared__ float north[BLOCK_SIZE][BLOCK_SIZE]; - __shared__ float south[BLOCK_SIZE][BLOCK_SIZE]; - __shared__ float east[BLOCK_SIZE][BLOCK_SIZE]; - __shared__ float west[BLOCK_SIZE][BLOCK_SIZE]; - - //load data to shared memory - north[ty][tx] = J_cuda[index_n]; - south[ty][tx] = J_cuda[index_s]; - if ( by == 0 ){ - north[ty][tx] = J_cuda[BLOCK_SIZE * bx + tx]; - } - else if ( by == gridDim.y - 1 ){ - south[ty][tx] = J_cuda[cols * BLOCK_SIZE * (gridDim.y - 1) + BLOCK_SIZE * bx + cols * ( BLOCK_SIZE - 1 ) + tx]; - } - __syncthreads(); - - west[ty][tx] = J_cuda[index_w]; - east[ty][tx] = J_cuda[index_e]; - - if ( bx == 0 ){ - west[ty][tx] = J_cuda[cols * BLOCK_SIZE * by + cols * ty]; - } - else if ( bx == gridDim.x - 1 ){ - east[ty][tx] = J_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * ( gridDim.x - 1) + cols * ty + BLOCK_SIZE-1]; - } - - __syncthreads(); - - - - temp[ty][tx] = J_cuda[index]; - - __syncthreads(); - - jc = temp[ty][tx]; - - if ( ty == 0 && tx == 0 ){ //nw - n = north[ty][tx] - jc; - s = temp[ty+1][tx] - jc; - w = west[ty][tx] - jc; - e = temp[ty][tx+1] - jc; - } - else if ( ty == 0 && tx == BLOCK_SIZE-1 ){ //ne - n = north[ty][tx] - jc; - s = temp[ty+1][tx] - jc; - w = temp[ty][tx-1] - jc; - e = east[ty][tx] - jc; - } - else if ( ty == BLOCK_SIZE -1 && tx == BLOCK_SIZE - 1){ //se - n = temp[ty-1][tx] - jc; - s = south[ty][tx] - jc; - w = temp[ty][tx-1] - jc; - e = east[ty][tx] - jc; - } - else if ( ty == BLOCK_SIZE -1 && tx == 0 ){//sw - n = temp[ty-1][tx] - jc; - s = south[ty][tx] - jc; - w = west[ty][tx] - jc; - e = temp[ty][tx+1] - jc; - } - - else if ( ty == 0 ){ //n - n = north[ty][tx] - jc; - s = temp[ty+1][tx] - jc; - w = temp[ty][tx-1] - jc; - e = temp[ty][tx+1] - jc; - } - else if ( tx == BLOCK_SIZE -1 ){ //e - n = temp[ty-1][tx] - jc; - s = temp[ty+1][tx] - jc; - w = temp[ty][tx-1] - jc; - e = east[ty][tx] - jc; - } - else if ( ty == BLOCK_SIZE -1){ //s - n = temp[ty-1][tx] - jc; - s = south[ty][tx] - jc; - w = temp[ty][tx-1] - jc; - e = temp[ty][tx+1] - jc; - } - else if ( tx == 0 ){ //w - n = temp[ty-1][tx] - jc; - s = temp[ty+1][tx] - jc; - w = west[ty][tx] - jc; - e = temp[ty][tx+1] - jc; - } - else{ //the data elements which are not on the borders - n = temp[ty-1][tx] - jc; - s = temp[ty+1][tx] - jc; - w = temp[ty][tx-1] - jc; - e = temp[ty][tx+1] - jc; - } - - - g2 = ( n * n + s * s + w * w + e * e ) / (jc * jc); - - l = ( n + s + w + e ) / jc; - - num = (0.5*g2) - ((1.0/16.0)*(l*l)) ; - den = 1 + (.25*l); - qsqr = num/(den*den); - - // diffusion coefficent (equ 33) - den = (qsqr-q0sqr) / (q0sqr * (1+q0sqr)) ; - c = 1.0 / (1.0+den) ; - - // saturate diffusion coefficent - if (c < 0){temp_result[ty][tx] = 0;} - else if (c > 1) {temp_result[ty][tx] = 1;} - else {temp_result[ty][tx] = c;} - - __syncthreads(); - - C_cuda[index] = temp_result[ty][tx]; - E_C[index] = e; - W_C[index] = w; - S_C[index] = s; - N_C[index] = n; - -} - -__global__ void -srad_cuda_2( - float *E_C, - float *W_C, - float *N_C, - float *S_C, - float * J_cuda, - float * C_cuda, - int cols, - int rows, - float lambda, - float q0sqr -) -{ - //block id - int bx = blockIdx.x; - int by = blockIdx.y; - - //thread id - int tx = threadIdx.x; - int ty = threadIdx.y; - - //indices - int index = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + tx; - int index_s = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * BLOCK_SIZE + tx; - int index_e = cols * BLOCK_SIZE * by + BLOCK_SIZE * bx + cols * ty + BLOCK_SIZE; - float cc, cn, cs, ce, cw, d_sum; - - //shared memory allocation - __shared__ float south_c[BLOCK_SIZE][BLOCK_SIZE]; - __shared__ float east_c[BLOCK_SIZE][BLOCK_SIZE]; - - __shared__ float c_cuda_temp[BLOCK_SIZE][BLOCK_SIZE]; - __shared__ float c_cuda_result[BLOCK_SIZE][BLOCK_SIZE]; - __shared__ float temp[BLOCK_SIZE][BLOCK_SIZE]; - - //load data to shared memory - temp[ty][tx] = J_cuda[index]; - - __syncthreads(); - - south_c[ty][tx] = C_cuda[index_s]; - - if ( by == gridDim.y - 1 ){ - south_c[ty][tx] = C_cuda[cols * BLOCK_SIZE * (gridDim.y - 1) + BLOCK_SIZE * bx + cols * ( BLOCK_SIZE - 1 ) + tx]; - } - __syncthreads(); - - - east_c[ty][tx] = C_cuda[index_e]; - - if ( bx == gridDim.x - 1 ){ - east_c[ty][tx] = C_cuda[cols * BLOCK_SIZE * by + BLOCK_SIZE * ( gridDim.x - 1) + cols * ty + BLOCK_SIZE-1]; - } - - __syncthreads(); - - c_cuda_temp[ty][tx] = C_cuda[index]; - - __syncthreads(); - - cc = c_cuda_temp[ty][tx]; - - if ( ty == BLOCK_SIZE -1 && tx == BLOCK_SIZE - 1){ //se - cn = cc; - cs = south_c[ty][tx]; - cw = cc; - ce = east_c[ty][tx]; - } - else if ( tx == BLOCK_SIZE -1 ){ //e - cn = cc; - cs = c_cuda_temp[ty+1][tx]; - cw = cc; - ce = east_c[ty][tx]; - } - else if ( ty == BLOCK_SIZE -1){ //s - cn = cc; - cs = south_c[ty][tx]; - cw = cc; - ce = c_cuda_temp[ty][tx+1]; - } - else{ //the data elements which are not on the borders - cn = cc; - cs = c_cuda_temp[ty+1][tx]; - cw = cc; - ce = c_cuda_temp[ty][tx+1]; - } - - // divergence (equ 58) - d_sum = cn * N_C[index] + cs * S_C[index] + cw * W_C[index] + ce * E_C[index]; - - // image update (equ 61) - c_cuda_result[ty][tx] = temp[ty][tx] + 0.25 * lambda * d_sum; - - __syncthreads(); - - J_cuda[index] = c_cuda_result[ty][tx]; - -} diff --git a/examples/streamcluster/run.sh b/examples/streamcluster/run.sh deleted file mode 100644 index 0e4db2e..0000000 --- a/examples/streamcluster/run.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -e -llvm-as streamcluster_cuda_cpu-cuda-nvptx64-nvidia-cuda-sm_61.ll -llvm-as streamcluster_cuda_cpu-host-x86_64-unknown-linux-gnu.ll -../../build/compilation/kernelTranslator streamcluster_cuda_cpu-cuda-nvptx64-nvidia-cuda-sm_61.bc kernel.bc -../../build/compilation/hostTranslator streamcluster_cuda_cpu-host-x86_64-unknown-linux-gnu.bc host.bc -llc --relocation-model=pic --filetype=obj kernel.bc -llc --relocation-model=pic --filetype=obj host.bc - -g++ -Wall -L../../build/runtime -L../../build/runtime/threadPool -o sc_gpu -fPIC -no-pie host.o kernel.o -lc -lx86Runtime -lthreadPool -lpthread -export LD_LIBRARY_PATH=../../build/runtime:../../build/runtime/threadPool:$LD_LIBRARY_PATH -./sc_gpu 10 20 256 32 32 1000 none output.txt 1 -if grep -q -e "0.966199 0.918044 0.348125" output.txt; then - echo "Pass" -else - echo "Error result" - exit 1 -fi diff --git a/examples/streamcluster/streamcluster_cuda.cu b/examples/streamcluster/streamcluster_cuda.cu deleted file mode 100644 index 42465da..0000000 --- a/examples/streamcluster/streamcluster_cuda.cu +++ /dev/null @@ -1,363 +0,0 @@ -/*********************************************** - streamcluster_cuda.cu - : parallelized code of streamcluster - - - original code from PARSEC Benchmark Suite - - parallelization with CUDA API has been applied by - - Shawn Sang-Ha Lee - sl4ge@virginia.edu - University of Virginia - Department of Electrical and Computer Engineering - Department of Computer Science - -***********************************************/ -#include "streamcluster_header.h" - -using namespace std; - -// AUTO-ERROR CHECK FOR ALL CUDA FUNCTIONS -#define CUDA_SAFE_CALL(call) \ - do { \ - cudaError err = call; \ - if (cudaSuccess != err) { \ - fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", __FILE__, \ - __LINE__, cudaGetErrorString(err)); \ - exit(EXIT_FAILURE); \ - } \ - } while (0) - -#define THREADS_PER_BLOCK 512 -#define MAXBLOCKS 65536 - -// host memory -float *work_mem_h; -float *coord_h; - -// device memory -float *work_mem_d; -float *coord_d; -int *center_table_d; -bool *switch_membership_d; -Point *p; - -static int iter = 0; // counter for total# of iteration - -//======================================= -// Euclidean Distance -//======================================= -__device__ float d_dist(int p1, int p2, int num, int dim, float *coord_d) { - float retval = 0.0; - for (int i = 0; i < dim; i++) { - float tmp = coord_d[(i * num) + p1] - coord_d[(i * num) + p2]; - retval += tmp * tmp; - } - return retval; -} - -//======================================= -// Kernel - Compute Cost -//======================================= -__global__ void kernel_compute_cost(int num, int dim, long x, Point *p, int K, - int stride, float *coord_d, - float *work_mem_d, int *center_table_d, - bool *switch_membership_d) { - // block ID and global thread ID - const int bid = blockIdx.x + gridDim.x * blockIdx.y; - const int tid = blockDim.x * bid + threadIdx.x; - - if (tid < num) { - float *lower = &work_mem_d[tid * stride]; - - // cost between this point and point[x]: euclidean distance multiplied by - // weight - float x_cost = d_dist(tid, x, num, dim, coord_d) * p[tid].weight; - - // if computed cost is less then original (it saves), mark it as to reassign - if (x_cost < p[tid].cost) { - switch_membership_d[tid] = 1; - lower[K] += x_cost - p[tid].cost; - } - // if computed cost is larger, save the difference - else { - lower[center_table_d[p[tid].assign]] += p[tid].cost - x_cost; - } - } -} - -//======================================= -// Allocate Device Memory -//======================================= -void allocDevMem(int num, int dim) { - CUDA_SAFE_CALL(cudaMalloc((void **)¢er_table_d, num * sizeof(int))); - CUDA_SAFE_CALL(cudaMalloc((void **)&switch_membership_d, num * sizeof(bool))); - CUDA_SAFE_CALL(cudaMalloc((void **)&p, num * sizeof(Point))); - CUDA_SAFE_CALL(cudaMalloc((void **)&coord_d, num * dim * sizeof(float))); -} - -//======================================= -// Allocate Host Memory -//======================================= -void allocHostMem(int num, int dim) { - coord_h = (float *)malloc(num * dim * sizeof(float)); -} - -//======================================= -// Free Device Memory -//======================================= -void freeDevMem() { - CUDA_SAFE_CALL(cudaFree(center_table_d)); - CUDA_SAFE_CALL(cudaFree(switch_membership_d)); - CUDA_SAFE_CALL(cudaFree(p)); - CUDA_SAFE_CALL(cudaFree(coord_d)); -} - -//======================================= -// Free Host Memory -//======================================= -void freeHostMem() { free(coord_h); } - -//======================================= -// pgain Entry - CUDA SETUP + CUDA CALL -//======================================= -float pgain(long x, Points *points, float z, long int *numcenters, int kmax, - bool *is_center, int *center_table, bool *switch_membership, - bool isCoordChanged, double *serial_t, double *cpu_to_gpu_t, - double *gpu_to_cpu_t, double *alloc_t, double *kernel_t, - double *free_t) { -#ifdef CUDATIME - float tmp_t; - cudaEvent_t start, stop; - cudaEventCreate(&start); - cudaEventCreate(&stop); - - cudaEventRecord(start, 0); -#endif - - cudaError_t error; - - int stride = *numcenters + 1; // size of each work_mem segment - int K = *numcenters; // number of centers - int num = points->num; // number of points - int dim = points->dim; // number of dimension - int nThread = num; // number of threads == number of data points - - //========================================= - // ALLOCATE HOST MEMORY + DATA PREPARATION - //========================================= - work_mem_h = (float *)malloc(stride * (nThread + 1) * sizeof(float)); - // Only on the first iteration - if (iter == 0) { - allocHostMem(num, dim); - } - - // build center-index table - int count = 0; - for (int i = 0; i < num; i++) { - if (is_center[i]) { - center_table[i] = count++; - } - } - - // Extract 'coord' - // Only if first iteration OR coord has changed - if (isCoordChanged || iter == 0) { - for (int i = 0; i < dim; i++) { - for (int j = 0; j < num; j++) { - coord_h[(num * i) + j] = points->p[j].coord[i]; - } - } - } - -#ifdef CUDATIME - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - cudaEventElapsedTime(&tmp_t, start, stop); - *serial_t += (double)tmp_t; - - cudaEventRecord(start, 0); -#endif - - //======================================= - // ALLOCATE GPU MEMORY - //======================================= - CUDA_SAFE_CALL( - cudaMalloc((void **)&work_mem_d, stride * (nThread + 1) * sizeof(float))); - // Only on the first iteration - if (iter == 0) { - allocDevMem(num, dim); - } - -#ifdef CUDATIME - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - cudaEventElapsedTime(&tmp_t, start, stop); - *alloc_t += (double)tmp_t; - - cudaEventRecord(start, 0); -#endif - - //======================================= - // CPU-TO-GPU MEMORY COPY - //======================================= - // Only if first iteration OR coord has changed - if (isCoordChanged || iter == 0) { - CUDA_SAFE_CALL(cudaMemcpy(coord_d, coord_h, num * dim * sizeof(float), - cudaMemcpyHostToDevice)); - } - CUDA_SAFE_CALL(cudaMemcpy(center_table_d, center_table, num * sizeof(int), - cudaMemcpyHostToDevice)); - CUDA_SAFE_CALL( - cudaMemcpy(p, points->p, num * sizeof(Point), cudaMemcpyHostToDevice)); - - CUDA_SAFE_CALL( - cudaMemset((void *)switch_membership_d, 0, num * sizeof(bool))); - CUDA_SAFE_CALL(cudaMemset((void *)work_mem_d, 0, - stride * (nThread + 1) * sizeof(float))); - -#ifdef CUDATIME - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - cudaEventElapsedTime(&tmp_t, start, stop); - *cpu_to_gpu_t += (double)tmp_t; - - cudaEventRecord(start, 0); -#endif - - //======================================= - // KERNEL: CALCULATE COST - //======================================= - // Determine the number of thread blocks in the x- and y-dimension - int num_blocks = - (int)((float)(num + THREADS_PER_BLOCK - 1) / (float)THREADS_PER_BLOCK); - int num_blocks_y = - (int)((float)(num_blocks + MAXBLOCKS - 1) / (float)MAXBLOCKS); - int num_blocks_x = - (int)((float)(num_blocks + num_blocks_y - 1) / (float)num_blocks_y); - dim3 grid_size(num_blocks_x, num_blocks_y, 1); - - kernel_compute_cost<<>>( - num, // in: # of data - dim, // in: dimension of point coordinates - x, // in: point to open a center at - p, // in: data point array - K, // in: number of centers - stride, // in: size of each work_mem segment - coord_d, // in: array of point coordinates - work_mem_d, // out: cost and lower field array - center_table_d, // in: center index table - switch_membership_d // out: changes in membership - ); - cudaThreadSynchronize(); - - // error check - error = cudaGetLastError(); - if (error != cudaSuccess) { - printf("kernel error: %s\n", cudaGetErrorString(error)); - exit(EXIT_FAILURE); - } - -#ifdef CUDATIME - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - cudaEventElapsedTime(&tmp_t, start, stop); - *kernel_t += (double)tmp_t; - - cudaEventRecord(start, 0); -#endif - - //======================================= - // GPU-TO-CPU MEMORY COPY - //======================================= - CUDA_SAFE_CALL(cudaMemcpy(work_mem_h, work_mem_d, - stride * (nThread + 1) * sizeof(float), - cudaMemcpyDeviceToHost)); - CUDA_SAFE_CALL(cudaMemcpy(switch_membership, switch_membership_d, - num * sizeof(bool), cudaMemcpyDeviceToHost)); - -#ifdef CUDATIME - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - cudaEventElapsedTime(&tmp_t, start, stop); - *gpu_to_cpu_t += (double)tmp_t; - - cudaEventRecord(start, 0); -#endif - - //======================================= - // CPU (SERIAL) WORK - //======================================= - int number_of_centers_to_close = 0; - float gl_cost_of_opening_x = z; - float *gl_lower = &work_mem_h[stride * nThread]; - // compute the number of centers to close if we are to open i - for (int i = 0; i < num; i++) { - if (is_center[i]) { - float low = z; - for (int j = 0; j < num; j++) { - low += work_mem_h[j * stride + center_table[i]]; - } - - gl_lower[center_table[i]] = low; - - if (low > 0) { - ++number_of_centers_to_close; - work_mem_h[i * stride + K] -= low; - } - } - gl_cost_of_opening_x += work_mem_h[i * stride + K]; - } - - // if opening a center at x saves cost (i.e. cost is negative) do so; - // otherwise, do nothing - if (gl_cost_of_opening_x < 0) { - for (int i = 0; i < num; i++) { - bool close_center = gl_lower[center_table[points->p[i].assign]] > 0; - if (switch_membership[i] || close_center) { - points->p[i].cost = - dist(points->p[i], points->p[x], dim) * points->p[i].weight; - points->p[i].assign = x; - } - } - - for (int i = 0; i < num; i++) { - if (is_center[i] && gl_lower[center_table[i]] > 0) { - is_center[i] = false; - } - } - - if (x >= 0 && x < num) { - is_center[x] = true; - } - *numcenters = *numcenters + 1 - number_of_centers_to_close; - } else { - gl_cost_of_opening_x = 0; - } - - //======================================= - // DEALLOCATE HOST MEMORY - //======================================= - free(work_mem_h); - -#ifdef CUDATIME - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - cudaEventElapsedTime(&tmp_t, start, stop); - *serial_t += (double)tmp_t; - - cudaEventRecord(start, 0); -#endif - - //======================================= - // DEALLOCATE GPU MEMORY - //======================================= - CUDA_SAFE_CALL(cudaFree(work_mem_d)); - -#ifdef CUDATIME - cudaEventRecord(stop, 0); - cudaEventSynchronize(stop); - cudaEventElapsedTime(&tmp_t, start, stop); - *free_t += (double)tmp_t; -#endif - iter++; - return -gl_cost_of_opening_x; -} diff --git a/examples/streamcluster/streamcluster_cuda_cpu-cuda-nvptx64-nvidia-cuda-sm_61.ll b/examples/streamcluster/streamcluster_cuda_cpu-cuda-nvptx64-nvidia-cuda-sm_61.ll deleted file mode 100644 index 6c0306b..0000000 --- a/examples/streamcluster/streamcluster_cuda_cpu-cuda-nvptx64-nvidia-cuda-sm_61.ll +++ /dev/null @@ -1,366 +0,0 @@ -; ModuleID = 'streamcluster_cuda_cpu-cuda-nvptx64-nvidia-cuda-sm_61.bc' -source_filename = "streamcluster_cuda_cpu.cu" -target datalayout = "e-i64:64-i128:128-v16:16-v32:32-n16:32:64" -target triple = "nvptx64-nvidia-cuda" - -%struct.__cuda_builtin_blockIdx_t = type { i8 } -%struct.__cuda_builtin_gridDim_t = type { i8 } -%struct.__cuda_builtin_blockDim_t = type { i8 } -%struct.__cuda_builtin_threadIdx_t = type { i8 } -%struct.cudaFuncAttributes = type { i64, i64, i64, i32, i32, i32, i32, i32, i32, i32 } -%struct.Point = type { float, float*, i64, float } - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv = comdat any - -$_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv = comdat any - -$_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv = comdat any - -$_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv = comdat any - -$_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv = comdat any - -@blockIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockIdx_t, align 1 -@gridDim = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_gridDim_t, align 1 -@blockDim = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_blockDim_t, align 1 -@threadIdx = extern_weak dso_local addrspace(1) global %struct.__cuda_builtin_threadIdx_t, align 1 - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaMalloc(i8** %p, i64 %s) #0 { -entry: - %p.addr = alloca i8**, align 8 - %s.addr = alloca i64, align 8 - store i8** %p, i8*** %p.addr, align 8 - store i64 %s, i64* %s.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaFuncGetAttributes(%struct.cudaFuncAttributes* %p, i8* %c) #0 { -entry: - %p.addr = alloca %struct.cudaFuncAttributes*, align 8 - %c.addr = alloca i8*, align 8 - store %struct.cudaFuncAttributes* %p, %struct.cudaFuncAttributes** %p.addr, align 8 - store i8* %c, i8** %c.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaDeviceGetAttribute(i32* %value, i32 %attr, i32 %device) #0 { -entry: - %value.addr = alloca i32*, align 8 - %attr.addr = alloca i32, align 4 - %device.addr = alloca i32, align 4 - store i32* %value, i32** %value.addr, align 8 - store i32 %attr, i32* %attr.addr, align 4 - store i32 %device, i32* %device.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaGetDevice(i32* %device) #0 { -entry: - %device.addr = alloca i32*, align 8 - store i32* %device, i32** %device.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessor(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define weak dso_local i32 @cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(i32* %numBlocks, i8* %func, i32 %blockSize, i64 %dynamicSmemSize, i32 %flags) #0 { -entry: - %numBlocks.addr = alloca i32*, align 8 - %func.addr = alloca i8*, align 8 - %blockSize.addr = alloca i32, align 4 - %dynamicSmemSize.addr = alloca i64, align 8 - %flags.addr = alloca i32, align 4 - store i32* %numBlocks, i32** %numBlocks.addr, align 8 - store i8* %func, i8** %func.addr, align 8 - store i32 %blockSize, i32* %blockSize.addr, align 4 - store i64 %dynamicSmemSize, i64* %dynamicSmemSize.addr, align 8 - store i32 %flags, i32* %flags.addr, align 4 - ret i32 999 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local float @_Z6d_distiiiiPf(i32 %p1, i32 %p2, i32 %num, i32 %dim, float* %coord_d) #0 { -entry: - %p1.addr = alloca i32, align 4 - %p2.addr = alloca i32, align 4 - %num.addr = alloca i32, align 4 - %dim.addr = alloca i32, align 4 - %coord_d.addr = alloca float*, align 8 - %retval1 = alloca float, align 4 - %i = alloca i32, align 4 - %tmp = alloca float, align 4 - store i32 %p1, i32* %p1.addr, align 4 - store i32 %p2, i32* %p2.addr, align 4 - store i32 %num, i32* %num.addr, align 4 - store i32 %dim, i32* %dim.addr, align 4 - store float* %coord_d, float** %coord_d.addr, align 8 - store float 0.000000e+00, float* %retval1, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %dim.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %2 = load float*, float** %coord_d.addr, align 8 - %3 = load i32, i32* %i, align 4 - %4 = load i32, i32* %num.addr, align 4 - %mul = mul nsw i32 %3, %4 - %5 = load i32, i32* %p1.addr, align 4 - %add = add nsw i32 %mul, %5 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds float, float* %2, i64 %idxprom - %6 = load float, float* %arrayidx, align 4 - %7 = load float*, float** %coord_d.addr, align 8 - %8 = load i32, i32* %i, align 4 - %9 = load i32, i32* %num.addr, align 4 - %mul2 = mul nsw i32 %8, %9 - %10 = load i32, i32* %p2.addr, align 4 - %add3 = add nsw i32 %mul2, %10 - %idxprom4 = sext i32 %add3 to i64 - %arrayidx5 = getelementptr inbounds float, float* %7, i64 %idxprom4 - %11 = load float, float* %arrayidx5, align 4 - %sub = fsub contract float %6, %11 - store float %sub, float* %tmp, align 4 - %12 = load float, float* %tmp, align 4 - %13 = load float, float* %tmp, align 4 - %mul6 = fmul contract float %12, %13 - %14 = load float, float* %retval1, align 4 - %add7 = fadd contract float %14, %mul6 - store float %add7, float* %retval1, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %15 = load i32, i32* %i, align 4 - %inc = add nsw i32 %15, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %16 = load float, float* %retval1, align 4 - ret float %16 -} - -; Function Attrs: convergent noinline nounwind optnone -define dso_local void @_Z19kernel_compute_costiilP5PointiiPfS1_PiPb(i32 %num, i32 %dim, i64 %x, %struct.Point* %p, i32 %K, i32 %stride, float* %coord_d, float* %work_mem_d, i32* %center_table_d, i8* %switch_membership_d) #0 { -entry: - %num.addr = alloca i32, align 4 - %dim.addr = alloca i32, align 4 - %x.addr = alloca i64, align 8 - %p.addr = alloca %struct.Point*, align 8 - %K.addr = alloca i32, align 4 - %stride.addr = alloca i32, align 4 - %coord_d.addr = alloca float*, align 8 - %work_mem_d.addr = alloca float*, align 8 - %center_table_d.addr = alloca i32*, align 8 - %switch_membership_d.addr = alloca i8*, align 8 - %bid = alloca i32, align 4 - %tid = alloca i32, align 4 - %lower = alloca float*, align 8 - %x_cost = alloca float, align 4 - store i32 %num, i32* %num.addr, align 4 - store i32 %dim, i32* %dim.addr, align 4 - store i64 %x, i64* %x.addr, align 8 - store %struct.Point* %p, %struct.Point** %p.addr, align 8 - store i32 %K, i32* %K.addr, align 4 - store i32 %stride, i32* %stride.addr, align 4 - store float* %coord_d, float** %coord_d.addr, align 8 - store float* %work_mem_d, float** %work_mem_d.addr, align 8 - store i32* %center_table_d, i32** %center_table_d.addr, align 8 - store i8* %switch_membership_d, i8** %switch_membership_d.addr, align 8 - %call = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #3 - %call1 = call i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #3 - %call2 = call i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #3 - %mul = mul i32 %call1, %call2 - %add = add i32 %call, %mul - store i32 %add, i32* %bid, align 4 - %call3 = call i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #3 - %0 = load i32, i32* %bid, align 4 - %mul4 = mul i32 %call3, %0 - %call5 = call i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #3 - %add6 = add i32 %mul4, %call5 - store i32 %add6, i32* %tid, align 4 - %1 = load i32, i32* %tid, align 4 - %2 = load i32, i32* %num.addr, align 4 - %cmp = icmp slt i32 %1, %2 - br i1 %cmp, label %if.then, label %if.end34 - -if.then: ; preds = %entry - %3 = load float*, float** %work_mem_d.addr, align 8 - %4 = load i32, i32* %tid, align 4 - %5 = load i32, i32* %stride.addr, align 4 - %mul7 = mul nsw i32 %4, %5 - %idxprom = sext i32 %mul7 to i64 - %arrayidx = getelementptr inbounds float, float* %3, i64 %idxprom - store float* %arrayidx, float** %lower, align 8 - %6 = load i32, i32* %tid, align 4 - %7 = load i64, i64* %x.addr, align 8 - %conv = trunc i64 %7 to i32 - %8 = load i32, i32* %num.addr, align 4 - %9 = load i32, i32* %dim.addr, align 4 - %10 = load float*, float** %coord_d.addr, align 8 - %call8 = call float @_Z6d_distiiiiPf(i32 %6, i32 %conv, i32 %8, i32 %9, float* %10) #3 - %11 = load %struct.Point*, %struct.Point** %p.addr, align 8 - %12 = load i32, i32* %tid, align 4 - %idxprom9 = sext i32 %12 to i64 - %arrayidx10 = getelementptr inbounds %struct.Point, %struct.Point* %11, i64 %idxprom9 - %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx10, i32 0, i32 0 - %13 = load float, float* %weight, align 8 - %mul11 = fmul contract float %call8, %13 - store float %mul11, float* %x_cost, align 4 - %14 = load float, float* %x_cost, align 4 - %15 = load %struct.Point*, %struct.Point** %p.addr, align 8 - %16 = load i32, i32* %tid, align 4 - %idxprom12 = sext i32 %16 to i64 - %arrayidx13 = getelementptr inbounds %struct.Point, %struct.Point* %15, i64 %idxprom12 - %cost = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx13, i32 0, i32 3 - %17 = load float, float* %cost, align 8 - %cmp14 = fcmp olt float %14, %17 - br i1 %cmp14, label %if.then15, label %if.else - -if.then15: ; preds = %if.then - %18 = load i8*, i8** %switch_membership_d.addr, align 8 - %19 = load i32, i32* %tid, align 4 - %idxprom16 = sext i32 %19 to i64 - %arrayidx17 = getelementptr inbounds i8, i8* %18, i64 %idxprom16 - store i8 1, i8* %arrayidx17, align 1 - %20 = load float, float* %x_cost, align 4 - %21 = load %struct.Point*, %struct.Point** %p.addr, align 8 - %22 = load i32, i32* %tid, align 4 - %idxprom18 = sext i32 %22 to i64 - %arrayidx19 = getelementptr inbounds %struct.Point, %struct.Point* %21, i64 %idxprom18 - %cost20 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx19, i32 0, i32 3 - %23 = load float, float* %cost20, align 8 - %sub = fsub contract float %20, %23 - %24 = load float*, float** %lower, align 8 - %25 = load i32, i32* %K.addr, align 4 - %idxprom21 = sext i32 %25 to i64 - %arrayidx22 = getelementptr inbounds float, float* %24, i64 %idxprom21 - %26 = load float, float* %arrayidx22, align 4 - %add23 = fadd contract float %26, %sub - store float %add23, float* %arrayidx22, align 4 - br label %if.end - -if.else: ; preds = %if.then - %27 = load %struct.Point*, %struct.Point** %p.addr, align 8 - %28 = load i32, i32* %tid, align 4 - %idxprom24 = sext i32 %28 to i64 - %arrayidx25 = getelementptr inbounds %struct.Point, %struct.Point* %27, i64 %idxprom24 - %cost26 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx25, i32 0, i32 3 - %29 = load float, float* %cost26, align 8 - %30 = load float, float* %x_cost, align 4 - %sub27 = fsub contract float %29, %30 - %31 = load float*, float** %lower, align 8 - %32 = load i32*, i32** %center_table_d.addr, align 8 - %33 = load %struct.Point*, %struct.Point** %p.addr, align 8 - %34 = load i32, i32* %tid, align 4 - %idxprom28 = sext i32 %34 to i64 - %arrayidx29 = getelementptr inbounds %struct.Point, %struct.Point* %33, i64 %idxprom28 - %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx29, i32 0, i32 2 - %35 = load i64, i64* %assign, align 8 - %arrayidx30 = getelementptr inbounds i32, i32* %32, i64 %35 - %36 = load i32, i32* %arrayidx30, align 4 - %idxprom31 = sext i32 %36 to i64 - %arrayidx32 = getelementptr inbounds float, float* %31, i64 %idxprom31 - %37 = load float, float* %arrayidx32, align 4 - %add33 = fadd contract float %37, %sub27 - store float %add33, float* %arrayidx32, align 4 - br label %if.end - -if.end: ; preds = %if.else, %if.then15 - br label %if.end34 - -if.end34: ; preds = %if.end, %entry - ret void -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN24__cuda_builtin_gridDim_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockIdx_t17__fetch_builtin_yEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN25__cuda_builtin_blockDim_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() - ret i32 %0 -} - -; Function Attrs: alwaysinline convergent nounwind -define linkonce_odr dso_local i32 @_ZN26__cuda_builtin_threadIdx_t17__fetch_builtin_xEv() #1 comdat align 2 { -entry: - %0 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() - ret i32 %0 -} - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.nctaid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ctaid.y() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.ntid.x() #2 - -; Function Attrs: nounwind readnone -declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() #2 - -attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { alwaysinline convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="sm_61" "target-features"="+ptx64,+sm_61" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind readnone } -attributes #3 = { convergent nounwind } - -!llvm.module.flags = !{!0, !1, !2} -!nvvm.annotations = !{!3, !4, !5, !4, !6, !6, !6, !6, !7, !7, !6} -!llvm.ident = !{!8} -!nvvmir.version = !{!9} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{i32 4, !"nvvm-reflect-ftz", i32 0} -!3 = !{void (i32, i32, i64, %struct.Point*, i32, i32, float*, float*, i32*, i8*)* @_Z19kernel_compute_costiilP5PointiiPfS1_PiPb, !"kernel", i32 1} -!4 = !{null, !"align", i32 8} -!5 = !{null, !"align", i32 8, !"align", i32 65544, !"align", i32 131080} -!6 = !{null, !"align", i32 16} -!7 = !{null, !"align", i32 16, !"align", i32 65552, !"align", i32 131088} -!8 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} -!9 = !{i32 1, i32 4} diff --git a/examples/streamcluster/streamcluster_cuda_cpu-host-x86_64-unknown-linux-gnu.ll b/examples/streamcluster/streamcluster_cuda_cpu-host-x86_64-unknown-linux-gnu.ll deleted file mode 100644 index e7bc5f9..0000000 --- a/examples/streamcluster/streamcluster_cuda_cpu-host-x86_64-unknown-linux-gnu.ll +++ /dev/null @@ -1,5115 +0,0 @@ -; ModuleID = 'streamcluster_cuda_cpu-host-x86_64-unknown-linux-gnu.bc' -source_filename = "streamcluster_cuda_cpu.cu" -target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" -target triple = "x86_64-unknown-linux-gnu" - -%"class.std::ios_base::Init" = type { i8 } -%struct.Point = type { float, float*, i64, float } -%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } -%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } -%struct.dim3 = type { i32, i32, i32 } -%struct.CUstream_st = type opaque -%struct.Points = type { i64, i32, %struct.Point* } -%struct.timeval = type { i64, i64 } -%struct.timezone = type { i32, i32 } -%union.pthread_barrier_t = type { i64, [24 x i8] } -%struct.pkmedian_arg_t = type { %struct.Points*, i64, i64, i64*, i32, %union.pthread_barrier_t* } -%class.PStream = type { i32 (...)** } -%class.SimStream = type { %class.PStream, i64 } -%class.FileStream = type { %class.PStream, %struct._IO_FILE* } - -$_ZN4dim3C2Ejjj = comdat any - -$_ZSt3logf = comdat any - -$_ZN9SimStreamC2El = comdat any - -$_ZN10FileStreamC2EPc = comdat any - -$_ZN7PStreamC2Ev = comdat any - -$_ZN9SimStream4readEPfii = comdat any - -$_ZN9SimStream6ferrorEv = comdat any - -$_ZN9SimStream4feofEv = comdat any - -$_ZN9SimStreamD2Ev = comdat any - -$_ZN9SimStreamD0Ev = comdat any - -$_ZN7PStreamD2Ev = comdat any - -$_ZN7PStreamD0Ev = comdat any - -$__clang_call_terminate = comdat any - -$_ZN10FileStream4readEPfii = comdat any - -$_ZN10FileStream6ferrorEv = comdat any - -$_ZN10FileStream4feofEv = comdat any - -$_ZN10FileStreamD2Ev = comdat any - -$_ZN10FileStreamD0Ev = comdat any - -$_ZTV9SimStream = comdat any - -$_ZTS9SimStream = comdat any - -$_ZTS7PStream = comdat any - -$_ZTI7PStream = comdat any - -$_ZTI9SimStream = comdat any - -$_ZTV7PStream = comdat any - -$_ZTV10FileStream = comdat any - -$_ZTS10FileStream = comdat any - -$_ZTI10FileStream = comdat any - -@_ZStL8__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1 -@__dso_handle = external hidden global i8 -@work_mem_h = dso_local global float* null, align 8 -@coord_h = dso_local global float* null, align 8 -@work_mem_d = dso_local global float* null, align 8 -@coord_d = dso_local global float* null, align 8 -@center_table_d = dso_local global i32* null, align 8 -@switch_membership_d = dso_local global i8* null, align 8 -@p = dso_local global %struct.Point* null, align 8 -@stderr = external dso_local global %struct._IO_FILE*, align 8 -@.str = private unnamed_addr constant [42 x i8] c"Cuda error in file '%s' in line %i : %s.\0A\00", align 1 -@.str.1 = private unnamed_addr constant [24 x i8] c"./streamcluster_cuda.cu\00", align 1 -@_ZL4iter = internal global i32 0, align 4 -@.str.2 = private unnamed_addr constant [18 x i8] c"kernel error: %s\0A\00", align 1 -@isCoordChanged = dso_local global i8 0, align 1 -@serial_t = dso_local global double 0.000000e+00, align 8 -@cpu_to_gpu_t = dso_local global double 0.000000e+00, align 8 -@gpu_to_cpu_t = dso_local global double 0.000000e+00, align 8 -@alloc_t = dso_local global double 0.000000e+00, align 8 -@kernel_t = dso_local global double 0.000000e+00, align 8 -@free_t = dso_local global double 0.000000e+00, align 8 -@time_local_search = dso_local global double 0.000000e+00, align 8 -@time_speedy = dso_local global double 0.000000e+00, align 8 -@time_select_feasible = dso_local global double 0.000000e+00, align 8 -@time_gain = dso_local global double 0.000000e+00, align 8 -@time_shuffle = dso_local global double 0.000000e+00, align 8 -@time_gain_dist = dso_local global double 0.000000e+00, align 8 -@time_gain_init = dso_local global double 0.000000e+00, align 8 -@.str.3 = private unnamed_addr constant [2 x i8] c"w\00", align 1 -@.str.4 = private unnamed_addr constant [4 x i8] c"%d \00", align 1 -@_ZL5nproc = internal global i32 0, align 4 -@_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE9totalcost = internal global float 0.000000e+00, align 4 -@_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE4open = internal global i8 0, align 1 -@_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE5costs = internal global float* null, align 8 -@_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i = internal global i32 0, align 4 -@_ZL9is_center = internal global i8* null, align 8 -@_ZL12center_table = internal global i32* null, align 8 -@_ZL17switch_membership = internal global i8* null, align 8 -@_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k = internal global i64 0, align 8 -@_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE8feasible = internal global i32* null, align 8 -@_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE11numfeasible = internal global i32 0, align 4 -@_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs = internal global float* null, align 8 -@.str.5 = private unnamed_addr constant [18 x i8] c"error opening %s\0A\00", align 1 -@.str.6 = private unnamed_addr constant [4 x i8] c"%u\0A\00", align 1 -@.str.7 = private unnamed_addr constant [5 x i8] c"%lf\0A\00", align 1 -@.str.8 = private unnamed_addr constant [5 x i8] c"%lf \00", align 1 -@.str.9 = private unnamed_addr constant [3 x i8] c"\0A\0A\00", align 1 -@.str.10 = private unnamed_addr constant [32 x i8] c"not enough memory for a chunk!\0A\00", align 1 -@.str.11 = private unnamed_addr constant [16 x i8] c"read %d points\0A\00", align 1 -@.str.12 = private unnamed_addr constant [21 x i8] c"error reading data!\0A\00", align 1 -@.str.13 = private unnamed_addr constant [21 x i8] c"finish local search\0A\00", align 1 -@.str.14 = private unnamed_addr constant [33 x i8] c"oops! no more space for centers\0A\00", align 1 -@.str.15 = private unnamed_addr constant [24 x i8] c"PARSEC Benchmark Suite\0A\00", align 1 -@.str.16 = private unnamed_addr constant [64 x i8] c"usage: %s k1 k2 d n chunksize clustersize infile outfile nproc\0A\00", align 1 -@.str.17 = private unnamed_addr constant [47 x i8] c" k1: Min. number of centers allowed\0A\00", align 1 -@.str.18 = private unnamed_addr constant [47 x i8] c" k2: Max. number of centers allowed\0A\00", align 1 -@.str.19 = private unnamed_addr constant [45 x i8] c" d: Dimension of each data point\0A\00", align 1 -@.str.20 = private unnamed_addr constant [38 x i8] c" n: Number of data points\0A\00", align 1 -@.str.21 = private unnamed_addr constant [57 x i8] c" chunksize: Number of data points to handle per step\0A\00", align 1 -@.str.22 = private unnamed_addr constant [55 x i8] c" clustersize: Maximum number of intermediate centers\0A\00", align 1 -@.str.23 = private unnamed_addr constant [37 x i8] c" infile: Input file (if n<=0)\0A\00", align 1 -@.str.24 = private unnamed_addr constant [28 x i8] c" outfile: Output file\0A\00", align 1 -@.str.25 = private unnamed_addr constant [41 x i8] c" nproc: Number of threads to use\0A\00", align 1 -@.str.26 = private unnamed_addr constant [2 x i8] c"\0A\00", align 1 -@.str.27 = private unnamed_addr constant [77 x i8] c"if n > 0, points will be randomly generated instead of reading from infile.\0A\00", align 1 -@.str.28 = private unnamed_addr constant [13 x i8] c"time = %lfs\0A\00", align 1 -@.str.29 = private unnamed_addr constant [19 x i8] c"time pgain = %lfs\0A\00", align 1 -@.str.30 = private unnamed_addr constant [24 x i8] c"time pgain_dist = %lfs\0A\00", align 1 -@.str.31 = private unnamed_addr constant [24 x i8] c"time pgain_init = %lfs\0A\00", align 1 -@.str.32 = private unnamed_addr constant [21 x i8] c"time pselect = %lfs\0A\00", align 1 -@.str.33 = private unnamed_addr constant [21 x i8] c"time pspeedy = %lfs\0A\00", align 1 -@.str.34 = private unnamed_addr constant [22 x i8] c"time pshuffle = %lfs\0A\00", align 1 -@.str.35 = private unnamed_addr constant [25 x i8] c"time localSearch = %lfs\0A\00", align 1 -@.str.36 = private unnamed_addr constant [34 x i8] c"====CUDA Timing info (pgain)====\0A\00", align 1 -@.str.37 = private unnamed_addr constant [20 x i8] c"time serial = %lfs\0A\00", align 1 -@.str.38 = private unnamed_addr constant [36 x i8] c"time CPU to GPU memory copy = %lfs\0A\00", align 1 -@.str.39 = private unnamed_addr constant [41 x i8] c"time GPU to CPU memory copy back = %lfs\0A\00", align 1 -@.str.40 = private unnamed_addr constant [24 x i8] c"time GPU malloc = %lfs\0A\00", align 1 -@.str.41 = private unnamed_addr constant [22 x i8] c"time GPU free = %lfs\0A\00", align 1 -@.str.42 = private unnamed_addr constant [20 x i8] c"time kernel = %lfs\0A\00", align 1 -@_ZTV9SimStream = linkonce_odr dso_local unnamed_addr constant { [7 x i8*] } { [7 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI9SimStream to i8*), i8* bitcast (i64 (%class.SimStream*, float*, i32, i32)* @_ZN9SimStream4readEPfii to i8*), i8* bitcast (i32 (%class.SimStream*)* @_ZN9SimStream6ferrorEv to i8*), i8* bitcast (i32 (%class.SimStream*)* @_ZN9SimStream4feofEv to i8*), i8* bitcast (void (%class.SimStream*)* @_ZN9SimStreamD2Ev to i8*), i8* bitcast (void (%class.SimStream*)* @_ZN9SimStreamD0Ev to i8*)] }, comdat, align 8 -@_ZTVN10__cxxabiv120__si_class_type_infoE = external dso_local global i8* -@_ZTS9SimStream = linkonce_odr dso_local constant [11 x i8] c"9SimStream\00", comdat, align 1 -@_ZTVN10__cxxabiv117__class_type_infoE = external dso_local global i8* -@_ZTS7PStream = linkonce_odr dso_local constant [9 x i8] c"7PStream\00", comdat, align 1 -@_ZTI7PStream = linkonce_odr dso_local constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @_ZTS7PStream, i32 0, i32 0) }, comdat, align 8 -@_ZTI9SimStream = linkonce_odr dso_local constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([11 x i8], [11 x i8]* @_ZTS9SimStream, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI7PStream to i8*) }, comdat, align 8 -@_ZTV7PStream = linkonce_odr dso_local unnamed_addr constant { [7 x i8*] } { [7 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI7PStream to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*), i8* bitcast (void (%class.PStream*)* @_ZN7PStreamD2Ev to i8*), i8* bitcast (void (%class.PStream*)* @_ZN7PStreamD0Ev to i8*)] }, comdat, align 8 -@_ZTV10FileStream = linkonce_odr dso_local unnamed_addr constant { [7 x i8*] } { [7 x i8*] [i8* null, i8* bitcast ({ i8*, i8*, i8* }* @_ZTI10FileStream to i8*), i8* bitcast (i64 (%class.FileStream*, float*, i32, i32)* @_ZN10FileStream4readEPfii to i8*), i8* bitcast (i32 (%class.FileStream*)* @_ZN10FileStream6ferrorEv to i8*), i8* bitcast (i32 (%class.FileStream*)* @_ZN10FileStream4feofEv to i8*), i8* bitcast (void (%class.FileStream*)* @_ZN10FileStreamD2Ev to i8*), i8* bitcast (void (%class.FileStream*)* @_ZN10FileStreamD0Ev to i8*)] }, comdat, align 8 -@.str.43 = private unnamed_addr constant [3 x i8] c"rb\00", align 1 -@.str.44 = private unnamed_addr constant [24 x i8] c"error opening file %s\0A.\00", align 1 -@_ZTS10FileStream = linkonce_odr dso_local constant [13 x i8] c"10FileStream\00", comdat, align 1 -@_ZTI10FileStream = linkonce_odr dso_local constant { i8*, i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([13 x i8], [13 x i8]* @_ZTS10FileStream, i32 0, i32 0), i8* bitcast ({ i8*, i8* }* @_ZTI7PStream to i8*) }, comdat, align 8 -@.str.45 = private unnamed_addr constant [21 x i8] c"closing file stream\0A\00", align 1 -@0 = private unnamed_addr constant [45 x i8] c"_Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00", align 1 -@1 = private constant [15713 x i8] c"P\EDU\BA\01\00\10\00P=\00\00\00\00\00\00\02\00\01\01@\00\00\00(2\00\00\00\00\00\00\00\00\00\00\00\00\00\00\07\00\01\00=\00\00\00\00\00\00\00\00\00\00\00\11\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\7FELF\02\01\013\07\00\00\00\00\00\00\00\02\00\BE\00e\00\00\00\00\00\00\00\00\00\00\00\801\00\00\00\00\00\00@/\00\00\00\00\00\00=\05=\00@\008\00\03\00@\00\09\00\01\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00.text._Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00.nv.info._Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00.nv.shared._Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00.nv.global\00.nv.constant0._Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00\00.shstrtab\00.strtab\00.symtab\00.symtab_shndx\00.nv.info\00_Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00.text._Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00.nv.info._Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00.nv.shared._Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00.nv.global\00blockIdx\00gridDim\00blockDim\00threadIdx\00$_Z19kernel_compute_costiilP5PointiiPfS1_PiPb$_Z6d_distiiiiPf\00.nv.constant0._Z19kernel_compute_costiilP5PointiiPfS1_PiPb\00_param\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00_\00\00\00\03\00\07\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\03\00\08\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\0B\01\00\00\01\00\08\00\01\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\14\01\00\00\01\00\08\00\03\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00\1C\01\00\00\01\00\08\00\00\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00%\01\00\00\01\00\08\00\02\00\00\00\00\00\00\00\01\00\00\00\00\00\00\00m\01\00\00\03\00\06\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\002\00\00\00\12\10\07\00\00\00\00\00\00\00\00\00\80(\00\00\00\00\00\00/\01\00\00\12\02\07\008\1D\00\00\00\00\00\00H\0B\00\00\00\00\00\00\04/\08\00\08\00\00\00\18\00\00\00\04#\08\00\09\00\00\00\00\00\00\00\04\12\08\00\09\00\00\00\00\00\00\00\04\11\08\00\09\00\00\00\00\00\00\00\04#\08\00\08\00\00\00\00\00\00\00\04\12\08\00\08\00\00\00\80\00\00\00\04\11\08\00\08\00\00\00\80\00\00\00\010\00\00\01*\00\00\04\0A\08\00\07\00\00\00@\01@\00\03\19@\00\04\17\0C\00\00\00\00\00\09\008\00\00\F0!\00\04\17\0C\00\00\00\00\00\08\000\00\00\F0!\00\04\17\0C\00\00\00\00\00\07\00(\00\00\F0!\00\04\17\0C\00\00\00\00\00\06\00 \00\00\F0!\00\04\17\0C\00\00\00\00\00\05\00\1C\00\00\F0\11\00\04\17\0C\00\00\00\00\00\04\00\18\00\00\F0\11\00\04\17\0C\00\00\00\00\00\03\00\10\00\00\F0!\00\04\17\0C\00\00\00\00\00\02\00\08\00\00\F0!\00\04\17\0C\00\00\00\00\00\01\00\04\00\00\F0\11\00\04\17\0C\00\00\00\00\00\00\00\00\00\00\F0\11\00\03\1B\FF\00\04\1D\08\00\E8\07\00\00\08\08\00\00\04\1C\04\000\1D\00\00\04\1E\04\00P\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\EF\1F\E0\FD\03!\00\D3rd<3>;\0A\0Amov.u2\00\1B,e\00b;\0Acvta\8D\00\04%\00\13,\\\00\22ld\C8\00\02\18\00nrd2, [\CE\00\1E])\00\1F1)\00\01a0];\0Ast#\00\81[%SP+0],,\00\0A\16\00\128\16\00\222;\B6\00\01\D8\00a1, 999(\00\02g\00\00\EF\00\18[\9E\01\03M\00\AF1;\0Aret;\0A\0A}\D6\01\1A\FE\02FuncGetAttributes\E1\01\0D#\00\0E\EC\01\0F+\00\06\0F\F7\01\1B\1F1\F7\01Q\1F1\F7\01!\0E\D9\00\0F\02\02\0F\0E8\01\0F\0D\02\8DhDevice\B4\00\0E\0E\02\0E$\00\0F\0F\02\00/32,\00\0B\1F1,\00\18\1F2<\02\13\1F2<\02\1F\1D4<\02\1F2<\02\0C\1F2<\02\13\01_\00\04;\02\0F\D9\00\07\1D]4\00\1F14\00\06\0Fp\02\10\0E\9A\01\0Fq\02\12(32q\02\0B\15\00!12\16\00\09\86\02\1F3\86\02\15\1F3\86\02#2Get\CB\00\0E}\02\05\1B\00\04\DA\00\0F\1C\02\13?3[8W\04.\0F\1B\02\0D\1F3W\04\19\04\B3\01\0D\D0\00\0F\AA\01\06\0F\05\04W\F0\04OccupancyMaxActiveBV\08\FE\03sPerMultiprocessor\9F\01\0F;\00\16\0EB\06\0FC\00%\0EJ\04\0FC\00\1E\1F2\86\00/\1F3\88\02\13O4[32\89\02\1C\1D3\89\02\1F4\89\02\0C\1F4\89\02\19\133\89\02\0F\F1\00\1E\0F\BC\04\00\0FK\00$/2]w\07\00\0FL\00$\0F\1F\05\01\0F\98\00%\0F\A7\07\1D\097\05\186M\05\04,\00\2224-\00\183\CF\03\1F2\CF\03\15\1F2\CF\03L\9FWithFlags\D8\03(\05D\00\0E\E1\03\0FL\00'\0F\EA\030\0CL\00\1F2\98\008\1F3\98\008\1F4H\04\13O5[40\EC\08.\0FH\04\0D\1F5\EC\08\1C\0F\F9\00+\1F]\9C\040\0D\9A\01\0F\A5\040\0D:\02\0F\AE\041\0D\DB\02\0F\B7\041\0D|\03\0F\C0\04I\08-\00\1F3$\0A6\7Fvisible\D9\04\0F\F9\01_Z6d_distiiiiPf(\82\03\0B\1D\00\0E\9F\0C\0F%\00\07\1F1%\00\11\1F2%\00\11\1A3b\04\0E%\00\0F\EF\03\18\1F6\EF\03\18wpred %p\C1\0A\10f\91\01Kf<8>\12\04-17\13\04\1F9\13\04\0C\1F6\E4\0A\1D\0F\F7\00\04\0E\98\03\1E4-\00\0F\C5\03\07\1E3-\00\0FS\0D\07\1F2-\00\06\1E1\1F\04\0F\B4\00\07*0]\F5\02\1F0.\0D\01\144\09\03\0EX\0D\1F3X\0D\02\1A4a\03/16\F0\0B\00J5, 0=\00\03\88\03\1B5\16\00\148\16\00\F2\01bra.uni LBB6_1;\0A\08\00\10:\ED\00\02S\00%6,3\00\16;\16\00%7,\9C\00\B0;\0Asetp.ge.s\1B\002p1,6\00\D2%r7;\0A@%p1 bra`\00\1B4p\00\132p\00\122p\00\06\22\05\03\E1\00\07q\00\1F8\87\00\02$9,8\01\82;\0Amul.lo\85\00Br10,5\007%r90\00411,\93\01T;\0Aadd.\00%2,4\00\121\F4\11C.s64\1E\00!d3\18\00\00\AB\01$hl\00\03#4,\1A\00\132I\00\02\17\00#5,\C3\00\00#\00\01y\00\02S\03\01\D5\008rd5\BD\00413,\0B\02\09\8D\00\174\8D\00\1D3\8D\00\01Z\01)14\8D\00#7,\1A\00\0A\8D\00\178\8D\00\177\8D\00\123\8D\00\005\01csub.rn\18\00\224,\AA\00\22%f\82\02\1Af\B6\05\18f\D1\00%5,\1B\00\07\16\00\05-\02\00\D3\006fma\\\00\227,5\00\03\05\00\1A6a\00\02\A6\02+f7 \02\133 \02\173\90\02(15\0A\02\077\01#6,\1E\00\1F1\EC\02\02/16\ED\02\04&4:\C0\00\181\C0\00\05\90\06\1Ff\90\06\03/f1\90\06\02\F0\06entry _Z19kernel_compa\11 co(\04\80lP5Point0\04vS1_PiPb\93\06\00\B5\00\0F:\00\19\0E\B0\06\0FB\00$\07\CD\06\00\87\03\0FB\00 \1F2B\00.\1F3\C6\00.\1F4B\00.\1F5\C6\00.\1F6B\00.\1F7B\00.\1F8B\00.\1F9n\08\13O7[88n\08\1D\07\A6\10n16 %rs\80\08,17\81\08\1E2\94\0C\1F5\BD\17\0D\1F7\82\08\19\00\BA\04\0F(\01!\0F\8E\0B\00\1F5K\00#\1F8K\00\00\1F4K\00#\1F7\22\0D\01\0FK\00#\1E6\80\09\0F\95\00$\1E5\9D\09\0F\94\00$\1F4X\0D\01\0FK\00#/3]\8D\0A\00\0FK\00#\0FP\0A\00\0F\95\00$\0Fm\0A\00\0F\94\00$#0]\04\03#to\E2\1D).u3\08\03\1F\00\0A\1C\00\1482\08\0F;\00\03\119\1F\00\1F5;\00\02\02\B7\08/d9<\00\05$11\1B\09\0F=\00\01\02\AB\08/11>\00\06\143T\0E\0F>\00\01\02\93\09/13>\00\06\03\9D\09\0F>\00\03\226,$\00\0F\BB\0B\1D\0B\1D\0F\1F1\A6\0B\04\1F6\97\0B\02\1F3\AB\08\02\0B\EA\0B\034\0F+d1\18\00\144\A6\0F\0B\8A\00\144\8B\00\1A0\18\00\135\8C\00\1B82\0C\88%ctaid.x\17\00\00\0E\01\1Fn\18\00\00\157/\00\1Byr\0B\188\F7\0B\06\97\09#9,e\00*r8\EC\00\126\02\01\199\AF\18\00>\02\22ntu\00\0E\B5\0B-64\E7\0B\0B\B9\0B\07K\00\00+\02\03J\00\0D\\\0B\142\\\0B\09\91\00\03(\0A/4;m\0A\00\196w\0C'16G\0C\0F\FE\0C\00$15r\0A\08\00\0D*7_p\0D\137\83\0A87_1\00\0D517,\E0\01\08t\00\1F7\8B\00\03\09\19\0D\0A\04\0D#9,8\00\00'\00\0DI\0C!18\19\00\199J\0C\024\00\01\1C\00\0AL\0C420,\9D\00\01'\00\0A[\02\137\A3\02'20\9B\00/20\B2\00\02\142\CF\01\0A\16\00\172S\01\06\16\00\08#\0D\07#\0E\181~\0CF{ \0A\09-\08Ctemp\FE\04Ireg;\7F\11\01\0B\00\1C0j\12\02\16\00\04:\17\1F03\00\00\1F13\00\02\1413\00\1F13\00\00\1F23\00\02\1423\00\1F23\00\00\1F33\00\02\1433\00)3;&\12\01\0B\00\1843\00#643\00\144\C0\13\0C\9A\00\03\BF\0Ca;\0Acall\87\02\14('\13;, \0A\CD\10R, \0A(\0A\1C\01\22, \09\00\141\09\00\142\09\00\143\09\0074\0A)\B5\06\06N\0D\062\0Dg;\0A} \0A\09\C5\01\0C\E9\0F\03\98\02(237\02\07\9C\02424, \00\1A5\9C\02$5,Q\00\01'\00\0E\EB\0E#25\1D\03\0A\EC\0E\1F1\EC\0E\00/80\EC\0E\06*80\86\02\1F6\C1\00\04\1F7\C1\00\05\03\91\0F\1D7\C1\00$9,Q\00\01'\00\0CR\0F\00\22\00\03\94\0E\02m\04\12u\1E\00(p2Q\0FF@%p2l\04\1B3l\04\132l\04\09l\11\144\84\01\1A5\C3\00\194\84\01\07\AA\00444,8\00\01'\00\03s\05\02\C9\0B\151|\0F\118\B5\00\124\E3\05's1\D0\00/12M\01\03/45M\01\03/46M\01\04447, \00\0AM\01448,Q\00\01'\00\08}\00\03\0F\02#48N\01\08\FE\10\01\1C\06\01\A0\00\00)\00\09\9C\00%9,\E7\04\08\9C\00)50/\10\06\9C\00451, \00\0B\1F\12$2,Q\00\01'\00\08\9C\00\135#\12\132r\01\06\99\00#6, \00\00\A5\00\07\AE\02\01-\00\01\16\00\1B6\ED\01\134\ED\01\183\ED\01/30c\01\03/31c\01\04432, \00\0Ac\01433,Q\00\01'\00\07\C7\00\127\C6\00\2233\15\01\05\1A\12\188\F7\01\09x\01$9,\1F\12(8;\AE\00\1B4u\01\04\18\00%5,\E7\08\09\18\00\156z\00+16\C8\00\02+\02\1D3!\13438,S\00\01'\00\09\16\01\139P\00\0Cx\02\03\F5\00\1C9M\00441,\B8\00\01'\00\08\DC\01\130x\02\1D1\DC\01#1, \009%f9\DB\01\2241\DB\01\1F1\DB\01\04*4:\18\00\135\18\00\B05:\0Aret;\0A\0A}\0A\00\00", section ".nv_fatbin", align 8 -@__cuda_fatbin_wrapper = internal constant { i32, i32, i8*, i8* } { i32 1180844977, i32 1, i8* getelementptr inbounds ([15713 x i8], [15713 x i8]* @1, i64 0, i64 0), i8* null }, section ".nvFatBinSegment", align 8 -@__cuda_gpubin_handle = internal global i8** null, align 8 -@llvm.global_ctors = appending global [2 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_streamcluster_cuda_cpu.cu, i8* null }, { i32, void ()*, i8* } { i32 65535, void ()* bitcast (void (i8*)* @__cuda_module_ctor to void ()*), i8* null }] - -; Function Attrs: noinline uwtable -define internal void @__cxx_global_var_init() #0 section ".text.startup" { -entry: - call void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"* @_ZStL8__ioinit) - %0 = call i32 @__cxa_atexit(void (i8*)* bitcast (void (%"class.std::ios_base::Init"*)* @_ZNSt8ios_base4InitD1Ev to void (i8*)*), i8* getelementptr inbounds (%"class.std::ios_base::Init", %"class.std::ios_base::Init"* @_ZStL8__ioinit, i32 0, i32 0), i8* @__dso_handle) #2 - ret void -} - -declare dso_local void @_ZNSt8ios_base4InitC1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1 - -declare dso_local void @_ZNSt8ios_base4InitD1Ev(%"class.std::ios_base::Init"*) unnamed_addr #1 - -; Function Attrs: nounwind -declare dso_local i32 @__cxa_atexit(void (i8*)*, i8*, i8*) #2 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z19kernel_compute_costiilP5PointiiPfS1_PiPb(i32 %num, i32 %dim, i64 %x, %struct.Point* %p, i32 %K, i32 %stride, float* %coord_d, float* %work_mem_d, i32* %center_table_d, i8* %switch_membership_d) #3 { -entry: - %num.addr = alloca i32, align 4 - %dim.addr = alloca i32, align 4 - %x.addr = alloca i64, align 8 - %p.addr = alloca %struct.Point*, align 8 - %K.addr = alloca i32, align 4 - %stride.addr = alloca i32, align 4 - %coord_d.addr = alloca float*, align 8 - %work_mem_d.addr = alloca float*, align 8 - %center_table_d.addr = alloca i32*, align 8 - %switch_membership_d.addr = alloca i8*, align 8 - %grid_dim = alloca %struct.dim3, align 8 - %block_dim = alloca %struct.dim3, align 8 - %shmem_size = alloca i64, align 8 - %stream = alloca i8*, align 8 - %grid_dim.coerce = alloca { i64, i32 }, align 8 - %block_dim.coerce = alloca { i64, i32 }, align 8 - store i32 %num, i32* %num.addr, align 4 - store i32 %dim, i32* %dim.addr, align 4 - store i64 %x, i64* %x.addr, align 8 - store %struct.Point* %p, %struct.Point** %p.addr, align 8 - store i32 %K, i32* %K.addr, align 4 - store i32 %stride, i32* %stride.addr, align 4 - store float* %coord_d, float** %coord_d.addr, align 8 - store float* %work_mem_d, float** %work_mem_d.addr, align 8 - store i32* %center_table_d, i32** %center_table_d.addr, align 8 - store i8* %switch_membership_d, i8** %switch_membership_d.addr, align 8 - %kernel_args = alloca i8*, i64 10, align 16 - %0 = bitcast i32* %num.addr to i8* - %1 = getelementptr i8*, i8** %kernel_args, i32 0 - store i8* %0, i8** %1 - %2 = bitcast i32* %dim.addr to i8* - %3 = getelementptr i8*, i8** %kernel_args, i32 1 - store i8* %2, i8** %3 - %4 = bitcast i64* %x.addr to i8* - %5 = getelementptr i8*, i8** %kernel_args, i32 2 - store i8* %4, i8** %5 - %6 = bitcast %struct.Point** %p.addr to i8* - %7 = getelementptr i8*, i8** %kernel_args, i32 3 - store i8* %6, i8** %7 - %8 = bitcast i32* %K.addr to i8* - %9 = getelementptr i8*, i8** %kernel_args, i32 4 - store i8* %8, i8** %9 - %10 = bitcast i32* %stride.addr to i8* - %11 = getelementptr i8*, i8** %kernel_args, i32 5 - store i8* %10, i8** %11 - %12 = bitcast float** %coord_d.addr to i8* - %13 = getelementptr i8*, i8** %kernel_args, i32 6 - store i8* %12, i8** %13 - %14 = bitcast float** %work_mem_d.addr to i8* - %15 = getelementptr i8*, i8** %kernel_args, i32 7 - store i8* %14, i8** %15 - %16 = bitcast i32** %center_table_d.addr to i8* - %17 = getelementptr i8*, i8** %kernel_args, i32 8 - store i8* %16, i8** %17 - %18 = bitcast i8** %switch_membership_d.addr to i8* - %19 = getelementptr i8*, i8** %kernel_args, i32 9 - store i8* %18, i8** %19 - %20 = call i32 @__cudaPopCallConfiguration(%struct.dim3* %grid_dim, %struct.dim3* %block_dim, i64* %shmem_size, i8** %stream) - %21 = load i64, i64* %shmem_size, align 8 - %22 = load i8*, i8** %stream, align 8 - %23 = bitcast { i64, i32 }* %grid_dim.coerce to i8* - %24 = bitcast %struct.dim3* %grid_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %23, i8* align 8 %24, i64 12, i1 false) - %25 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 0 - %26 = load i64, i64* %25, align 8 - %27 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %grid_dim.coerce, i32 0, i32 1 - %28 = load i32, i32* %27, align 8 - %29 = bitcast { i64, i32 }* %block_dim.coerce to i8* - %30 = bitcast %struct.dim3* %block_dim to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %29, i8* align 8 %30, i64 12, i1 false) - %31 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 0 - %32 = load i64, i64* %31, align 8 - %33 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %block_dim.coerce, i32 0, i32 1 - %34 = load i32, i32* %33, align 8 - %35 = bitcast i8* %22 to %struct.CUstream_st* - %call = call i32 @cudaLaunchKernel(i8* bitcast (void (i32, i32, i64, %struct.Point*, i32, i32, float*, float*, i32*, i8*)* @_Z19kernel_compute_costiilP5PointiiPfS1_PiPb to i8*), i64 %26, i32 %28, i64 %32, i32 %34, i8** %kernel_args, i64 %21, %struct.CUstream_st* %35) - br label %setup.end - -setup.end: ; preds = %entry - ret void -} - -declare dso_local i32 @__cudaPopCallConfiguration(%struct.dim3*, %struct.dim3*, i64*, i8**) - -declare dso_local i32 @cudaLaunchKernel(i8*, i64, i32, i64, i32, i8**, i64, %struct.CUstream_st*) - -; Function Attrs: argmemonly nounwind willreturn -declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #4 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z11allocDevMemii(i32 %num, i32 %dim) #3 { -entry: - %num.addr = alloca i32, align 4 - %dim.addr = alloca i32, align 4 - %err = alloca i32, align 4 - %err4 = alloca i32, align 4 - %err15 = alloca i32, align 4 - %err26 = alloca i32, align 4 - store i32 %num, i32* %num.addr, align 4 - store i32 %dim, i32* %dim.addr, align 4 - br label %do.body - -do.body: ; preds = %entry - %0 = load i32, i32* %num.addr, align 4 - %conv = sext i32 %0 to i64 - %mul = mul i64 %conv, 4 - %call = call i32 @cudaMalloc(i8** bitcast (i32** @center_table_d to i8**), i64 %mul) - store i32 %call, i32* %err, align 4 - %1 = load i32, i32* %err, align 4 - %cmp = icmp ne i32 0, %1 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %do.body - %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %3 = load i32, i32* %err, align 4 - %call1 = call i8* @cudaGetErrorString(i32 %3) - %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 91, i8* %call1) - call void @exit(i32 1) #15 - unreachable - -if.end: ; preds = %do.body - br label %do.end - -do.end: ; preds = %if.end - br label %do.body3 - -do.body3: ; preds = %do.end - %4 = load i32, i32* %num.addr, align 4 - %conv5 = sext i32 %4 to i64 - %mul6 = mul i64 %conv5, 1 - %call7 = call i32 @cudaMalloc(i8** @switch_membership_d, i64 %mul6) - store i32 %call7, i32* %err4, align 4 - %5 = load i32, i32* %err4, align 4 - %cmp8 = icmp ne i32 0, %5 - br i1 %cmp8, label %if.then9, label %if.end12 - -if.then9: ; preds = %do.body3 - %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %7 = load i32, i32* %err4, align 4 - %call10 = call i8* @cudaGetErrorString(i32 %7) - %call11 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 92, i8* %call10) - call void @exit(i32 1) #15 - unreachable - -if.end12: ; preds = %do.body3 - br label %do.end13 - -do.end13: ; preds = %if.end12 - br label %do.body14 - -do.body14: ; preds = %do.end13 - %8 = load i32, i32* %num.addr, align 4 - %conv16 = sext i32 %8 to i64 - %mul17 = mul i64 %conv16, 32 - %call18 = call i32 @cudaMalloc(i8** bitcast (%struct.Point** @p to i8**), i64 %mul17) - store i32 %call18, i32* %err15, align 4 - %9 = load i32, i32* %err15, align 4 - %cmp19 = icmp ne i32 0, %9 - br i1 %cmp19, label %if.then20, label %if.end23 - -if.then20: ; preds = %do.body14 - %10 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %11 = load i32, i32* %err15, align 4 - %call21 = call i8* @cudaGetErrorString(i32 %11) - %call22 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %10, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 93, i8* %call21) - call void @exit(i32 1) #15 - unreachable - -if.end23: ; preds = %do.body14 - br label %do.end24 - -do.end24: ; preds = %if.end23 - br label %do.body25 - -do.body25: ; preds = %do.end24 - %12 = load i32, i32* %num.addr, align 4 - %13 = load i32, i32* %dim.addr, align 4 - %mul27 = mul nsw i32 %12, %13 - %conv28 = sext i32 %mul27 to i64 - %mul29 = mul i64 %conv28, 4 - %call30 = call i32 @cudaMalloc(i8** bitcast (float** @coord_d to i8**), i64 %mul29) - store i32 %call30, i32* %err26, align 4 - %14 = load i32, i32* %err26, align 4 - %cmp31 = icmp ne i32 0, %14 - br i1 %cmp31, label %if.then32, label %if.end35 - -if.then32: ; preds = %do.body25 - %15 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %16 = load i32, i32* %err26, align 4 - %call33 = call i8* @cudaGetErrorString(i32 %16) - %call34 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %15, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 94, i8* %call33) - call void @exit(i32 1) #15 - unreachable - -if.end35: ; preds = %do.body25 - br label %do.end36 - -do.end36: ; preds = %if.end35 - ret void -} - -declare dso_local i32 @cudaMalloc(i8**, i64) #1 - -declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1 - -declare dso_local i8* @cudaGetErrorString(i32) #1 - -; Function Attrs: noreturn nounwind -declare dso_local void @exit(i32) #5 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z12allocHostMemii(i32 %num, i32 %dim) #6 { -entry: - %num.addr = alloca i32, align 4 - %dim.addr = alloca i32, align 4 - store i32 %num, i32* %num.addr, align 4 - store i32 %dim, i32* %dim.addr, align 4 - %0 = load i32, i32* %num.addr, align 4 - %1 = load i32, i32* %dim.addr, align 4 - %mul = mul nsw i32 %0, %1 - %conv = sext i32 %mul to i64 - %mul1 = mul i64 %conv, 4 - %call = call noalias i8* @malloc(i64 %mul1) #2 - %2 = bitcast i8* %call to float* - store float* %2, float** @coord_h, align 8 - ret void -} - -; Function Attrs: nounwind -declare dso_local noalias i8* @malloc(i64) #7 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z10freeDevMemv() #3 { -entry: - %err = alloca i32, align 4 - %err4 = alloca i32, align 4 - %err13 = alloca i32, align 4 - %err22 = alloca i32, align 4 - br label %do.body - -do.body: ; preds = %entry - %0 = load i32*, i32** @center_table_d, align 8 - %1 = bitcast i32* %0 to i8* - %call = call i32 @cudaFree(i8* %1) - store i32 %call, i32* %err, align 4 - %2 = load i32, i32* %err, align 4 - %cmp = icmp ne i32 0, %2 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %do.body - %3 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %4 = load i32, i32* %err, align 4 - %call1 = call i8* @cudaGetErrorString(i32 %4) - %call2 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %3, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 108, i8* %call1) - call void @exit(i32 1) #15 - unreachable - -if.end: ; preds = %do.body - br label %do.end - -do.end: ; preds = %if.end - br label %do.body3 - -do.body3: ; preds = %do.end - %5 = load i8*, i8** @switch_membership_d, align 8 - %call5 = call i32 @cudaFree(i8* %5) - store i32 %call5, i32* %err4, align 4 - %6 = load i32, i32* %err4, align 4 - %cmp6 = icmp ne i32 0, %6 - br i1 %cmp6, label %if.then7, label %if.end10 - -if.then7: ; preds = %do.body3 - %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %8 = load i32, i32* %err4, align 4 - %call8 = call i8* @cudaGetErrorString(i32 %8) - %call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 109, i8* %call8) - call void @exit(i32 1) #15 - unreachable - -if.end10: ; preds = %do.body3 - br label %do.end11 - -do.end11: ; preds = %if.end10 - br label %do.body12 - -do.body12: ; preds = %do.end11 - %9 = load %struct.Point*, %struct.Point** @p, align 8 - %10 = bitcast %struct.Point* %9 to i8* - %call14 = call i32 @cudaFree(i8* %10) - store i32 %call14, i32* %err13, align 4 - %11 = load i32, i32* %err13, align 4 - %cmp15 = icmp ne i32 0, %11 - br i1 %cmp15, label %if.then16, label %if.end19 - -if.then16: ; preds = %do.body12 - %12 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %13 = load i32, i32* %err13, align 4 - %call17 = call i8* @cudaGetErrorString(i32 %13) - %call18 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %12, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 110, i8* %call17) - call void @exit(i32 1) #15 - unreachable - -if.end19: ; preds = %do.body12 - br label %do.end20 - -do.end20: ; preds = %if.end19 - br label %do.body21 - -do.body21: ; preds = %do.end20 - %14 = load float*, float** @coord_d, align 8 - %15 = bitcast float* %14 to i8* - %call23 = call i32 @cudaFree(i8* %15) - store i32 %call23, i32* %err22, align 4 - %16 = load i32, i32* %err22, align 4 - %cmp24 = icmp ne i32 0, %16 - br i1 %cmp24, label %if.then25, label %if.end28 - -if.then25: ; preds = %do.body21 - %17 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %18 = load i32, i32* %err22, align 4 - %call26 = call i8* @cudaGetErrorString(i32 %18) - %call27 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %17, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 111, i8* %call26) - call void @exit(i32 1) #15 - unreachable - -if.end28: ; preds = %do.body21 - br label %do.end29 - -do.end29: ; preds = %if.end28 - ret void -} - -declare dso_local i32 @cudaFree(i8*) #1 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z11freeHostMemv() #6 { -entry: - %0 = load float*, float** @coord_h, align 8 - %1 = bitcast float* %0 to i8* - call void @free(i8* %1) #2 - ret void -} - -; Function Attrs: nounwind -declare dso_local void @free(i8*) #7 - -; Function Attrs: noinline optnone uwtable -define dso_local float @_Z5pgainlP6PointsfPliPbPiS2_bPdS4_S4_S4_S4_S4_(i64 %x, %struct.Points* %points, float %z, i64* %numcenters, i32 %kmax, i8* %is_center, i32* %center_table, i8* %switch_membership, i1 zeroext %isCoordChanged, double* %serial_t, double* %cpu_to_gpu_t, double* %gpu_to_cpu_t, double* %alloc_t, double* %kernel_t, double* %free_t) #3 { -entry: - %x.addr = alloca i64, align 8 - %points.addr = alloca %struct.Points*, align 8 - %z.addr = alloca float, align 4 - %numcenters.addr = alloca i64*, align 8 - %kmax.addr = alloca i32, align 4 - %is_center.addr = alloca i8*, align 8 - %center_table.addr = alloca i32*, align 8 - %switch_membership.addr = alloca i8*, align 8 - %isCoordChanged.addr = alloca i8, align 1 - %serial_t.addr = alloca double*, align 8 - %cpu_to_gpu_t.addr = alloca double*, align 8 - %gpu_to_cpu_t.addr = alloca double*, align 8 - %alloc_t.addr = alloca double*, align 8 - %kernel_t.addr = alloca double*, align 8 - %free_t.addr = alloca double*, align 8 - %error = alloca i32, align 4 - %stride = alloca i32, align 4 - %K = alloca i32, align 4 - %num = alloca i32, align 4 - %dim = alloca i32, align 4 - %nThread = alloca i32, align 4 - %count = alloca i32, align 4 - %i = alloca i32, align 4 - %i17 = alloca i32, align 4 - %j = alloca i32, align 4 - %err = alloca i32, align 4 - %err57 = alloca i32, align 4 - %err70 = alloca i32, align 4 - %err81 = alloca i32, align 4 - %err93 = alloca i32, align 4 - %err104 = alloca i32, align 4 - %num_blocks = alloca i32, align 4 - %num_blocks_y = alloca i32, align 4 - %num_blocks_x = alloca i32, align 4 - %grid_size = alloca %struct.dim3, align 4 - %agg.tmp = alloca %struct.dim3, align 4 - %agg.tmp130 = alloca %struct.dim3, align 4 - %agg.tmp.coerce = alloca { i64, i32 }, align 4 - %agg.tmp130.coerce = alloca { i64, i32 }, align 4 - %err141 = alloca i32, align 4 - %err154 = alloca i32, align 4 - %number_of_centers_to_close = alloca i32, align 4 - %gl_cost_of_opening_x = alloca float, align 4 - %gl_lower = alloca float*, align 8 - %i167 = alloca i32, align 4 - %low = alloca float, align 4 - %j175 = alloca i32, align 4 - %i213 = alloca i32, align 4 - %close_center = alloca i8, align 1 - %agg.tmp231 = alloca %struct.Point, align 8 - %agg.tmp235 = alloca %struct.Point, align 8 - %i254 = alloca i32, align 4 - %err285 = alloca i32, align 4 - store i64 %x, i64* %x.addr, align 8 - store %struct.Points* %points, %struct.Points** %points.addr, align 8 - store float %z, float* %z.addr, align 4 - store i64* %numcenters, i64** %numcenters.addr, align 8 - store i32 %kmax, i32* %kmax.addr, align 4 - store i8* %is_center, i8** %is_center.addr, align 8 - store i32* %center_table, i32** %center_table.addr, align 8 - store i8* %switch_membership, i8** %switch_membership.addr, align 8 - %frombool = zext i1 %isCoordChanged to i8 - store i8 %frombool, i8* %isCoordChanged.addr, align 1 - store double* %serial_t, double** %serial_t.addr, align 8 - store double* %cpu_to_gpu_t, double** %cpu_to_gpu_t.addr, align 8 - store double* %gpu_to_cpu_t, double** %gpu_to_cpu_t.addr, align 8 - store double* %alloc_t, double** %alloc_t.addr, align 8 - store double* %kernel_t, double** %kernel_t.addr, align 8 - store double* %free_t, double** %free_t.addr, align 8 - %0 = load i64*, i64** %numcenters.addr, align 8 - %1 = load i64, i64* %0, align 8 - %add = add nsw i64 %1, 1 - %conv = trunc i64 %add to i32 - store i32 %conv, i32* %stride, align 4 - %2 = load i64*, i64** %numcenters.addr, align 8 - %3 = load i64, i64* %2, align 8 - %conv1 = trunc i64 %3 to i32 - store i32 %conv1, i32* %K, align 4 - %4 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num2 = getelementptr inbounds %struct.Points, %struct.Points* %4, i32 0, i32 0 - %5 = load i64, i64* %num2, align 8 - %conv3 = trunc i64 %5 to i32 - store i32 %conv3, i32* %num, align 4 - %6 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %dim4 = getelementptr inbounds %struct.Points, %struct.Points* %6, i32 0, i32 1 - %7 = load i32, i32* %dim4, align 8 - store i32 %7, i32* %dim, align 4 - %8 = load i32, i32* %num, align 4 - store i32 %8, i32* %nThread, align 4 - %9 = load i32, i32* %stride, align 4 - %10 = load i32, i32* %nThread, align 4 - %add5 = add nsw i32 %10, 1 - %mul = mul nsw i32 %9, %add5 - %conv6 = sext i32 %mul to i64 - %mul7 = mul i64 %conv6, 4 - %call = call noalias i8* @malloc(i64 %mul7) #2 - %11 = bitcast i8* %call to float* - store float* %11, float** @work_mem_h, align 8 - %12 = load i32, i32* @_ZL4iter, align 4 - %cmp = icmp eq i32 %12, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %13 = load i32, i32* %num, align 4 - %14 = load i32, i32* %dim, align 4 - call void @_Z12allocHostMemii(i32 %13, i32 %14) - br label %if.end - -if.end: ; preds = %if.then, %entry - store i32 0, i32* %count, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end - %15 = load i32, i32* %i, align 4 - %16 = load i32, i32* %num, align 4 - %cmp8 = icmp slt i32 %15, %16 - br i1 %cmp8, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %17 = load i8*, i8** %is_center.addr, align 8 - %18 = load i32, i32* %i, align 4 - %idxprom = sext i32 %18 to i64 - %arrayidx = getelementptr inbounds i8, i8* %17, i64 %idxprom - %19 = load i8, i8* %arrayidx, align 1 - %tobool = trunc i8 %19 to i1 - br i1 %tobool, label %if.then9, label %if.end12 - -if.then9: ; preds = %for.body - %20 = load i32, i32* %count, align 4 - %inc = add nsw i32 %20, 1 - store i32 %inc, i32* %count, align 4 - %21 = load i32*, i32** %center_table.addr, align 8 - %22 = load i32, i32* %i, align 4 - %idxprom10 = sext i32 %22 to i64 - %arrayidx11 = getelementptr inbounds i32, i32* %21, i64 %idxprom10 - store i32 %20, i32* %arrayidx11, align 4 - br label %if.end12 - -if.end12: ; preds = %if.then9, %for.body - br label %for.inc - -for.inc: ; preds = %if.end12 - %23 = load i32, i32* %i, align 4 - %inc13 = add nsw i32 %23, 1 - store i32 %inc13, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %24 = load i8, i8* %isCoordChanged.addr, align 1 - %tobool14 = trunc i8 %24 to i1 - br i1 %tobool14, label %if.then16, label %lor.lhs.false - -lor.lhs.false: ; preds = %for.end - %25 = load i32, i32* @_ZL4iter, align 4 - %cmp15 = icmp eq i32 %25, 0 - br i1 %cmp15, label %if.then16, label %if.end38 - -if.then16: ; preds = %lor.lhs.false, %for.end - store i32 0, i32* %i17, align 4 - br label %for.cond18 - -for.cond18: ; preds = %for.inc35, %if.then16 - %26 = load i32, i32* %i17, align 4 - %27 = load i32, i32* %dim, align 4 - %cmp19 = icmp slt i32 %26, %27 - br i1 %cmp19, label %for.body20, label %for.end37 - -for.body20: ; preds = %for.cond18 - store i32 0, i32* %j, align 4 - br label %for.cond21 - -for.cond21: ; preds = %for.inc32, %for.body20 - %28 = load i32, i32* %j, align 4 - %29 = load i32, i32* %num, align 4 - %cmp22 = icmp slt i32 %28, %29 - br i1 %cmp22, label %for.body23, label %for.end34 - -for.body23: ; preds = %for.cond21 - %30 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p = getelementptr inbounds %struct.Points, %struct.Points* %30, i32 0, i32 2 - %31 = load %struct.Point*, %struct.Point** %p, align 8 - %32 = load i32, i32* %j, align 4 - %idxprom24 = sext i32 %32 to i64 - %arrayidx25 = getelementptr inbounds %struct.Point, %struct.Point* %31, i64 %idxprom24 - %coord = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx25, i32 0, i32 1 - %33 = load float*, float** %coord, align 8 - %34 = load i32, i32* %i17, align 4 - %idxprom26 = sext i32 %34 to i64 - %arrayidx27 = getelementptr inbounds float, float* %33, i64 %idxprom26 - %35 = load float, float* %arrayidx27, align 4 - %36 = load float*, float** @coord_h, align 8 - %37 = load i32, i32* %num, align 4 - %38 = load i32, i32* %i17, align 4 - %mul28 = mul nsw i32 %37, %38 - %39 = load i32, i32* %j, align 4 - %add29 = add nsw i32 %mul28, %39 - %idxprom30 = sext i32 %add29 to i64 - %arrayidx31 = getelementptr inbounds float, float* %36, i64 %idxprom30 - store float %35, float* %arrayidx31, align 4 - br label %for.inc32 - -for.inc32: ; preds = %for.body23 - %40 = load i32, i32* %j, align 4 - %inc33 = add nsw i32 %40, 1 - store i32 %inc33, i32* %j, align 4 - br label %for.cond21 - -for.end34: ; preds = %for.cond21 - br label %for.inc35 - -for.inc35: ; preds = %for.end34 - %41 = load i32, i32* %i17, align 4 - %inc36 = add nsw i32 %41, 1 - store i32 %inc36, i32* %i17, align 4 - br label %for.cond18 - -for.end37: ; preds = %for.cond18 - br label %if.end38 - -if.end38: ; preds = %for.end37, %lor.lhs.false - br label %do.body - -do.body: ; preds = %if.end38 - %42 = load i32, i32* %stride, align 4 - %43 = load i32, i32* %nThread, align 4 - %add39 = add nsw i32 %43, 1 - %mul40 = mul nsw i32 %42, %add39 - %conv41 = sext i32 %mul40 to i64 - %mul42 = mul i64 %conv41, 4 - %call43 = call i32 @cudaMalloc(i8** bitcast (float** @work_mem_d to i8**), i64 %mul42) - store i32 %call43, i32* %err, align 4 - %44 = load i32, i32* %err, align 4 - %cmp44 = icmp ne i32 0, %44 - br i1 %cmp44, label %if.then45, label %if.end48 - -if.then45: ; preds = %do.body - %45 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %46 = load i32, i32* %err, align 4 - %call46 = call i8* @cudaGetErrorString(i32 %46) - %call47 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %45, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 184, i8* %call46) - call void @exit(i32 1) #15 - unreachable - -if.end48: ; preds = %do.body - br label %do.end - -do.end: ; preds = %if.end48 - %47 = load i32, i32* @_ZL4iter, align 4 - %cmp49 = icmp eq i32 %47, 0 - br i1 %cmp49, label %if.then50, label %if.end51 - -if.then50: ; preds = %do.end - %48 = load i32, i32* %num, align 4 - %49 = load i32, i32* %dim, align 4 - call void @_Z11allocDevMemii(i32 %48, i32 %49) - br label %if.end51 - -if.end51: ; preds = %if.then50, %do.end - %50 = load i8, i8* %isCoordChanged.addr, align 1 - %tobool52 = trunc i8 %50 to i1 - br i1 %tobool52, label %if.then55, label %lor.lhs.false53 - -lor.lhs.false53: ; preds = %if.end51 - %51 = load i32, i32* @_ZL4iter, align 4 - %cmp54 = icmp eq i32 %51, 0 - br i1 %cmp54, label %if.then55, label %if.end68 - -if.then55: ; preds = %lor.lhs.false53, %if.end51 - br label %do.body56 - -do.body56: ; preds = %if.then55 - %52 = load float*, float** @coord_d, align 8 - %53 = bitcast float* %52 to i8* - %54 = load float*, float** @coord_h, align 8 - %55 = bitcast float* %54 to i8* - %56 = load i32, i32* %num, align 4 - %57 = load i32, i32* %dim, align 4 - %mul58 = mul nsw i32 %56, %57 - %conv59 = sext i32 %mul58 to i64 - %mul60 = mul i64 %conv59, 4 - %call61 = call i32 @cudaMemcpy(i8* %53, i8* %55, i64 %mul60, i32 1) - store i32 %call61, i32* %err57, align 4 - %58 = load i32, i32* %err57, align 4 - %cmp62 = icmp ne i32 0, %58 - br i1 %cmp62, label %if.then63, label %if.end66 - -if.then63: ; preds = %do.body56 - %59 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %60 = load i32, i32* %err57, align 4 - %call64 = call i8* @cudaGetErrorString(i32 %60) - %call65 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %59, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 205, i8* %call64) - call void @exit(i32 1) #15 - unreachable - -if.end66: ; preds = %do.body56 - br label %do.end67 - -do.end67: ; preds = %if.end66 - br label %if.end68 - -if.end68: ; preds = %do.end67, %lor.lhs.false53 - br label %do.body69 - -do.body69: ; preds = %if.end68 - %61 = load i32*, i32** @center_table_d, align 8 - %62 = bitcast i32* %61 to i8* - %63 = load i32*, i32** %center_table.addr, align 8 - %64 = bitcast i32* %63 to i8* - %65 = load i32, i32* %num, align 4 - %conv71 = sext i32 %65 to i64 - %mul72 = mul i64 %conv71, 4 - %call73 = call i32 @cudaMemcpy(i8* %62, i8* %64, i64 %mul72, i32 1) - store i32 %call73, i32* %err70, align 4 - %66 = load i32, i32* %err70, align 4 - %cmp74 = icmp ne i32 0, %66 - br i1 %cmp74, label %if.then75, label %if.end78 - -if.then75: ; preds = %do.body69 - %67 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %68 = load i32, i32* %err70, align 4 - %call76 = call i8* @cudaGetErrorString(i32 %68) - %call77 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %67, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 208, i8* %call76) - call void @exit(i32 1) #15 - unreachable - -if.end78: ; preds = %do.body69 - br label %do.end79 - -do.end79: ; preds = %if.end78 - br label %do.body80 - -do.body80: ; preds = %do.end79 - %69 = load %struct.Point*, %struct.Point** @p, align 8 - %70 = bitcast %struct.Point* %69 to i8* - %71 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p82 = getelementptr inbounds %struct.Points, %struct.Points* %71, i32 0, i32 2 - %72 = load %struct.Point*, %struct.Point** %p82, align 8 - %73 = bitcast %struct.Point* %72 to i8* - %74 = load i32, i32* %num, align 4 - %conv83 = sext i32 %74 to i64 - %mul84 = mul i64 %conv83, 32 - %call85 = call i32 @cudaMemcpy(i8* %70, i8* %73, i64 %mul84, i32 1) - store i32 %call85, i32* %err81, align 4 - %75 = load i32, i32* %err81, align 4 - %cmp86 = icmp ne i32 0, %75 - br i1 %cmp86, label %if.then87, label %if.end90 - -if.then87: ; preds = %do.body80 - %76 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %77 = load i32, i32* %err81, align 4 - %call88 = call i8* @cudaGetErrorString(i32 %77) - %call89 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %76, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 210, i8* %call88) - call void @exit(i32 1) #15 - unreachable - -if.end90: ; preds = %do.body80 - br label %do.end91 - -do.end91: ; preds = %if.end90 - br label %do.body92 - -do.body92: ; preds = %do.end91 - %78 = load i8*, i8** @switch_membership_d, align 8 - %79 = load i32, i32* %num, align 4 - %conv94 = sext i32 %79 to i64 - %mul95 = mul i64 %conv94, 1 - %call96 = call i32 @cudaMemset(i8* %78, i32 0, i64 %mul95) - store i32 %call96, i32* %err93, align 4 - %80 = load i32, i32* %err93, align 4 - %cmp97 = icmp ne i32 0, %80 - br i1 %cmp97, label %if.then98, label %if.end101 - -if.then98: ; preds = %do.body92 - %81 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %82 = load i32, i32* %err93, align 4 - %call99 = call i8* @cudaGetErrorString(i32 %82) - %call100 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %81, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 213, i8* %call99) - call void @exit(i32 1) #15 - unreachable - -if.end101: ; preds = %do.body92 - br label %do.end102 - -do.end102: ; preds = %if.end101 - br label %do.body103 - -do.body103: ; preds = %do.end102 - %83 = load float*, float** @work_mem_d, align 8 - %84 = bitcast float* %83 to i8* - %85 = load i32, i32* %stride, align 4 - %86 = load i32, i32* %nThread, align 4 - %add105 = add nsw i32 %86, 1 - %mul106 = mul nsw i32 %85, %add105 - %conv107 = sext i32 %mul106 to i64 - %mul108 = mul i64 %conv107, 4 - %call109 = call i32 @cudaMemset(i8* %84, i32 0, i64 %mul108) - store i32 %call109, i32* %err104, align 4 - %87 = load i32, i32* %err104, align 4 - %cmp110 = icmp ne i32 0, %87 - br i1 %cmp110, label %if.then111, label %if.end114 - -if.then111: ; preds = %do.body103 - %88 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %89 = load i32, i32* %err104, align 4 - %call112 = call i8* @cudaGetErrorString(i32 %89) - %call113 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %88, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 215, i8* %call112) - call void @exit(i32 1) #15 - unreachable - -if.end114: ; preds = %do.body103 - br label %do.end115 - -do.end115: ; preds = %if.end114 - %90 = load i32, i32* %num, align 4 - %add116 = add nsw i32 %90, 512 - %sub = sub nsw i32 %add116, 1 - %conv117 = sitofp i32 %sub to float - %div = fdiv float %conv117, 5.120000e+02 - %conv118 = fptosi float %div to i32 - store i32 %conv118, i32* %num_blocks, align 4 - %91 = load i32, i32* %num_blocks, align 4 - %add119 = add nsw i32 %91, 65536 - %sub120 = sub nsw i32 %add119, 1 - %conv121 = sitofp i32 %sub120 to float - %div122 = fdiv float %conv121, 6.553600e+04 - %conv123 = fptosi float %div122 to i32 - store i32 %conv123, i32* %num_blocks_y, align 4 - %92 = load i32, i32* %num_blocks, align 4 - %93 = load i32, i32* %num_blocks_y, align 4 - %add124 = add nsw i32 %92, %93 - %sub125 = sub nsw i32 %add124, 1 - %conv126 = sitofp i32 %sub125 to float - %94 = load i32, i32* %num_blocks_y, align 4 - %conv127 = sitofp i32 %94 to float - %div128 = fdiv float %conv126, %conv127 - %conv129 = fptosi float %div128 to i32 - store i32 %conv129, i32* %num_blocks_x, align 4 - %95 = load i32, i32* %num_blocks_x, align 4 - %96 = load i32, i32* %num_blocks_y, align 4 - call void @_ZN4dim3C2Ejjj(%struct.dim3* %grid_size, i32 %95, i32 %96, i32 1) - %97 = bitcast %struct.dim3* %agg.tmp to i8* - %98 = bitcast %struct.dim3* %grid_size to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %97, i8* align 4 %98, i64 12, i1 false) - call void @_ZN4dim3C2Ejjj(%struct.dim3* %agg.tmp130, i32 512, i32 1, i32 1) - %99 = bitcast { i64, i32 }* %agg.tmp.coerce to i8* - %100 = bitcast %struct.dim3* %agg.tmp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %99, i8* align 4 %100, i64 12, i1 false) - %101 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 0 - %102 = load i64, i64* %101, align 4 - %103 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp.coerce, i32 0, i32 1 - %104 = load i32, i32* %103, align 4 - %105 = bitcast { i64, i32 }* %agg.tmp130.coerce to i8* - %106 = bitcast %struct.dim3* %agg.tmp130 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %105, i8* align 4 %106, i64 12, i1 false) - %107 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp130.coerce, i32 0, i32 0 - %108 = load i64, i64* %107, align 4 - %109 = getelementptr inbounds { i64, i32 }, { i64, i32 }* %agg.tmp130.coerce, i32 0, i32 1 - %110 = load i32, i32* %109, align 4 - %call131 = call i32 @__cudaPushCallConfiguration(i64 %102, i32 %104, i64 %108, i32 %110, i64 0, i8* null) - %tobool132 = icmp ne i32 %call131, 0 - br i1 %tobool132, label %kcall.end, label %kcall.configok - -kcall.configok: ; preds = %do.end115 - %111 = load i32, i32* %num, align 4 - %112 = load i32, i32* %dim, align 4 - %113 = load i64, i64* %x.addr, align 8 - %114 = load %struct.Point*, %struct.Point** @p, align 8 - %115 = load i32, i32* %K, align 4 - %116 = load i32, i32* %stride, align 4 - %117 = load float*, float** @coord_d, align 8 - %118 = load float*, float** @work_mem_d, align 8 - %119 = load i32*, i32** @center_table_d, align 8 - %120 = load i8*, i8** @switch_membership_d, align 8 - call void @_Z19kernel_compute_costiilP5PointiiPfS1_PiPb(i32 %111, i32 %112, i64 %113, %struct.Point* %114, i32 %115, i32 %116, float* %117, float* %118, i32* %119, i8* %120) - br label %kcall.end - -kcall.end: ; preds = %kcall.configok, %do.end115 - %call133 = call i32 @cudaThreadSynchronize() - %call134 = call i32 @cudaGetLastError() - store i32 %call134, i32* %error, align 4 - %121 = load i32, i32* %error, align 4 - %cmp135 = icmp ne i32 %121, 0 - br i1 %cmp135, label %if.then136, label %if.end139 - -if.then136: ; preds = %kcall.end - %122 = load i32, i32* %error, align 4 - %call137 = call i8* @cudaGetErrorString(i32 %122) - %call138 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.2, i64 0, i64 0), i8* %call137) - call void @exit(i32 1) #15 - unreachable - -if.end139: ; preds = %kcall.end - br label %do.body140 - -do.body140: ; preds = %if.end139 - %123 = load float*, float** @work_mem_h, align 8 - %124 = bitcast float* %123 to i8* - %125 = load float*, float** @work_mem_d, align 8 - %126 = bitcast float* %125 to i8* - %127 = load i32, i32* %stride, align 4 - %128 = load i32, i32* %nThread, align 4 - %add142 = add nsw i32 %128, 1 - %mul143 = mul nsw i32 %127, %add142 - %conv144 = sext i32 %mul143 to i64 - %mul145 = mul i64 %conv144, 4 - %call146 = call i32 @cudaMemcpy(i8* %124, i8* %126, i64 %mul145, i32 2) - store i32 %call146, i32* %err141, align 4 - %129 = load i32, i32* %err141, align 4 - %cmp147 = icmp ne i32 0, %129 - br i1 %cmp147, label %if.then148, label %if.end151 - -if.then148: ; preds = %do.body140 - %130 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %131 = load i32, i32* %err141, align 4 - %call149 = call i8* @cudaGetErrorString(i32 %131) - %call150 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %130, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 273, i8* %call149) - call void @exit(i32 1) #15 - unreachable - -if.end151: ; preds = %do.body140 - br label %do.end152 - -do.end152: ; preds = %if.end151 - br label %do.body153 - -do.body153: ; preds = %do.end152 - %132 = load i8*, i8** %switch_membership.addr, align 8 - %133 = load i8*, i8** @switch_membership_d, align 8 - %134 = load i32, i32* %num, align 4 - %conv155 = sext i32 %134 to i64 - %mul156 = mul i64 %conv155, 1 - %call157 = call i32 @cudaMemcpy(i8* %132, i8* %133, i64 %mul156, i32 2) - store i32 %call157, i32* %err154, align 4 - %135 = load i32, i32* %err154, align 4 - %cmp158 = icmp ne i32 0, %135 - br i1 %cmp158, label %if.then159, label %if.end162 - -if.then159: ; preds = %do.body153 - %136 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %137 = load i32, i32* %err154, align 4 - %call160 = call i8* @cudaGetErrorString(i32 %137) - %call161 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %136, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 275, i8* %call160) - call void @exit(i32 1) #15 - unreachable - -if.end162: ; preds = %do.body153 - br label %do.end163 - -do.end163: ; preds = %if.end162 - store i32 0, i32* %number_of_centers_to_close, align 4 - %138 = load float, float* %z.addr, align 4 - store float %138, float* %gl_cost_of_opening_x, align 4 - %139 = load float*, float** @work_mem_h, align 8 - %140 = load i32, i32* %stride, align 4 - %141 = load i32, i32* %nThread, align 4 - %mul164 = mul nsw i32 %140, %141 - %idxprom165 = sext i32 %mul164 to i64 - %arrayidx166 = getelementptr inbounds float, float* %139, i64 %idxprom165 - store float* %arrayidx166, float** %gl_lower, align 8 - store i32 0, i32* %i167, align 4 - br label %for.cond168 - -for.cond168: ; preds = %for.inc208, %do.end163 - %142 = load i32, i32* %i167, align 4 - %143 = load i32, i32* %num, align 4 - %cmp169 = icmp slt i32 %142, %143 - br i1 %cmp169, label %for.body170, label %for.end210 - -for.body170: ; preds = %for.cond168 - %144 = load i8*, i8** %is_center.addr, align 8 - %145 = load i32, i32* %i167, align 4 - %idxprom171 = sext i32 %145 to i64 - %arrayidx172 = getelementptr inbounds i8, i8* %144, i64 %idxprom171 - %146 = load i8, i8* %arrayidx172, align 1 - %tobool173 = trunc i8 %146 to i1 - br i1 %tobool173, label %if.then174, label %if.end202 - -if.then174: ; preds = %for.body170 - %147 = load float, float* %z.addr, align 4 - store float %147, float* %low, align 4 - store i32 0, i32* %j175, align 4 - br label %for.cond176 - -for.cond176: ; preds = %for.inc186, %if.then174 - %148 = load i32, i32* %j175, align 4 - %149 = load i32, i32* %num, align 4 - %cmp177 = icmp slt i32 %148, %149 - br i1 %cmp177, label %for.body178, label %for.end188 - -for.body178: ; preds = %for.cond176 - %150 = load float*, float** @work_mem_h, align 8 - %151 = load i32, i32* %j175, align 4 - %152 = load i32, i32* %stride, align 4 - %mul179 = mul nsw i32 %151, %152 - %153 = load i32*, i32** %center_table.addr, align 8 - %154 = load i32, i32* %i167, align 4 - %idxprom180 = sext i32 %154 to i64 - %arrayidx181 = getelementptr inbounds i32, i32* %153, i64 %idxprom180 - %155 = load i32, i32* %arrayidx181, align 4 - %add182 = add nsw i32 %mul179, %155 - %idxprom183 = sext i32 %add182 to i64 - %arrayidx184 = getelementptr inbounds float, float* %150, i64 %idxprom183 - %156 = load float, float* %arrayidx184, align 4 - %157 = load float, float* %low, align 4 - %add185 = fadd contract float %157, %156 - store float %add185, float* %low, align 4 - br label %for.inc186 - -for.inc186: ; preds = %for.body178 - %158 = load i32, i32* %j175, align 4 - %inc187 = add nsw i32 %158, 1 - store i32 %inc187, i32* %j175, align 4 - br label %for.cond176 - -for.end188: ; preds = %for.cond176 - %159 = load float, float* %low, align 4 - %160 = load float*, float** %gl_lower, align 8 - %161 = load i32*, i32** %center_table.addr, align 8 - %162 = load i32, i32* %i167, align 4 - %idxprom189 = sext i32 %162 to i64 - %arrayidx190 = getelementptr inbounds i32, i32* %161, i64 %idxprom189 - %163 = load i32, i32* %arrayidx190, align 4 - %idxprom191 = sext i32 %163 to i64 - %arrayidx192 = getelementptr inbounds float, float* %160, i64 %idxprom191 - store float %159, float* %arrayidx192, align 4 - %164 = load float, float* %low, align 4 - %cmp193 = fcmp ogt float %164, 0.000000e+00 - br i1 %cmp193, label %if.then194, label %if.end201 - -if.then194: ; preds = %for.end188 - %165 = load i32, i32* %number_of_centers_to_close, align 4 - %inc195 = add nsw i32 %165, 1 - store i32 %inc195, i32* %number_of_centers_to_close, align 4 - %166 = load float, float* %low, align 4 - %167 = load float*, float** @work_mem_h, align 8 - %168 = load i32, i32* %i167, align 4 - %169 = load i32, i32* %stride, align 4 - %mul196 = mul nsw i32 %168, %169 - %170 = load i32, i32* %K, align 4 - %add197 = add nsw i32 %mul196, %170 - %idxprom198 = sext i32 %add197 to i64 - %arrayidx199 = getelementptr inbounds float, float* %167, i64 %idxprom198 - %171 = load float, float* %arrayidx199, align 4 - %sub200 = fsub contract float %171, %166 - store float %sub200, float* %arrayidx199, align 4 - br label %if.end201 - -if.end201: ; preds = %if.then194, %for.end188 - br label %if.end202 - -if.end202: ; preds = %if.end201, %for.body170 - %172 = load float*, float** @work_mem_h, align 8 - %173 = load i32, i32* %i167, align 4 - %174 = load i32, i32* %stride, align 4 - %mul203 = mul nsw i32 %173, %174 - %175 = load i32, i32* %K, align 4 - %add204 = add nsw i32 %mul203, %175 - %idxprom205 = sext i32 %add204 to i64 - %arrayidx206 = getelementptr inbounds float, float* %172, i64 %idxprom205 - %176 = load float, float* %arrayidx206, align 4 - %177 = load float, float* %gl_cost_of_opening_x, align 4 - %add207 = fadd contract float %177, %176 - store float %add207, float* %gl_cost_of_opening_x, align 4 - br label %for.inc208 - -for.inc208: ; preds = %if.end202 - %178 = load i32, i32* %i167, align 4 - %inc209 = add nsw i32 %178, 1 - store i32 %inc209, i32* %i167, align 4 - br label %for.cond168 - -for.end210: ; preds = %for.cond168 - %179 = load float, float* %gl_cost_of_opening_x, align 4 - %cmp211 = fcmp olt float %179, 0.000000e+00 - br i1 %cmp211, label %if.then212, label %if.else - -if.then212: ; preds = %for.end210 - store i32 0, i32* %i213, align 4 - br label %for.cond214 - -for.cond214: ; preds = %for.inc251, %if.then212 - %180 = load i32, i32* %i213, align 4 - %181 = load i32, i32* %num, align 4 - %cmp215 = icmp slt i32 %180, %181 - br i1 %cmp215, label %for.body216, label %for.end253 - -for.body216: ; preds = %for.cond214 - %182 = load float*, float** %gl_lower, align 8 - %183 = load i32*, i32** %center_table.addr, align 8 - %184 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p217 = getelementptr inbounds %struct.Points, %struct.Points* %184, i32 0, i32 2 - %185 = load %struct.Point*, %struct.Point** %p217, align 8 - %186 = load i32, i32* %i213, align 4 - %idxprom218 = sext i32 %186 to i64 - %arrayidx219 = getelementptr inbounds %struct.Point, %struct.Point* %185, i64 %idxprom218 - %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx219, i32 0, i32 2 - %187 = load i64, i64* %assign, align 8 - %arrayidx220 = getelementptr inbounds i32, i32* %183, i64 %187 - %188 = load i32, i32* %arrayidx220, align 4 - %idxprom221 = sext i32 %188 to i64 - %arrayidx222 = getelementptr inbounds float, float* %182, i64 %idxprom221 - %189 = load float, float* %arrayidx222, align 4 - %cmp223 = fcmp ogt float %189, 0.000000e+00 - %frombool224 = zext i1 %cmp223 to i8 - store i8 %frombool224, i8* %close_center, align 1 - %190 = load i8*, i8** %switch_membership.addr, align 8 - %191 = load i32, i32* %i213, align 4 - %idxprom225 = sext i32 %191 to i64 - %arrayidx226 = getelementptr inbounds i8, i8* %190, i64 %idxprom225 - %192 = load i8, i8* %arrayidx226, align 1 - %tobool227 = trunc i8 %192 to i1 - br i1 %tobool227, label %if.then230, label %lor.lhs.false228 - -lor.lhs.false228: ; preds = %for.body216 - %193 = load i8, i8* %close_center, align 1 - %tobool229 = trunc i8 %193 to i1 - br i1 %tobool229, label %if.then230, label %if.end250 - -if.then230: ; preds = %lor.lhs.false228, %for.body216 - %194 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p232 = getelementptr inbounds %struct.Points, %struct.Points* %194, i32 0, i32 2 - %195 = load %struct.Point*, %struct.Point** %p232, align 8 - %196 = load i32, i32* %i213, align 4 - %idxprom233 = sext i32 %196 to i64 - %arrayidx234 = getelementptr inbounds %struct.Point, %struct.Point* %195, i64 %idxprom233 - %197 = bitcast %struct.Point* %agg.tmp231 to i8* - %198 = bitcast %struct.Point* %arrayidx234 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %197, i8* align 8 %198, i64 32, i1 false) - %199 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p236 = getelementptr inbounds %struct.Points, %struct.Points* %199, i32 0, i32 2 - %200 = load %struct.Point*, %struct.Point** %p236, align 8 - %201 = load i64, i64* %x.addr, align 8 - %arrayidx237 = getelementptr inbounds %struct.Point, %struct.Point* %200, i64 %201 - %202 = bitcast %struct.Point* %agg.tmp235 to i8* - %203 = bitcast %struct.Point* %arrayidx237 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %202, i8* align 8 %203, i64 32, i1 false) - %204 = load i32, i32* %dim, align 4 - %call238 = call float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %agg.tmp231, %struct.Point* byval(%struct.Point) align 8 %agg.tmp235, i32 %204) - %205 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p239 = getelementptr inbounds %struct.Points, %struct.Points* %205, i32 0, i32 2 - %206 = load %struct.Point*, %struct.Point** %p239, align 8 - %207 = load i32, i32* %i213, align 4 - %idxprom240 = sext i32 %207 to i64 - %arrayidx241 = getelementptr inbounds %struct.Point, %struct.Point* %206, i64 %idxprom240 - %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx241, i32 0, i32 0 - %208 = load float, float* %weight, align 8 - %mul242 = fmul contract float %call238, %208 - %209 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p243 = getelementptr inbounds %struct.Points, %struct.Points* %209, i32 0, i32 2 - %210 = load %struct.Point*, %struct.Point** %p243, align 8 - %211 = load i32, i32* %i213, align 4 - %idxprom244 = sext i32 %211 to i64 - %arrayidx245 = getelementptr inbounds %struct.Point, %struct.Point* %210, i64 %idxprom244 - %cost = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx245, i32 0, i32 3 - store float %mul242, float* %cost, align 8 - %212 = load i64, i64* %x.addr, align 8 - %213 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p246 = getelementptr inbounds %struct.Points, %struct.Points* %213, i32 0, i32 2 - %214 = load %struct.Point*, %struct.Point** %p246, align 8 - %215 = load i32, i32* %i213, align 4 - %idxprom247 = sext i32 %215 to i64 - %arrayidx248 = getelementptr inbounds %struct.Point, %struct.Point* %214, i64 %idxprom247 - %assign249 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx248, i32 0, i32 2 - store i64 %212, i64* %assign249, align 8 - br label %if.end250 - -if.end250: ; preds = %if.then230, %lor.lhs.false228 - br label %for.inc251 - -for.inc251: ; preds = %if.end250 - %216 = load i32, i32* %i213, align 4 - %inc252 = add nsw i32 %216, 1 - store i32 %inc252, i32* %i213, align 4 - br label %for.cond214 - -for.end253: ; preds = %for.cond214 - store i32 0, i32* %i254, align 4 - br label %for.cond255 - -for.cond255: ; preds = %for.inc270, %for.end253 - %217 = load i32, i32* %i254, align 4 - %218 = load i32, i32* %num, align 4 - %cmp256 = icmp slt i32 %217, %218 - br i1 %cmp256, label %for.body257, label %for.end272 - -for.body257: ; preds = %for.cond255 - %219 = load i8*, i8** %is_center.addr, align 8 - %220 = load i32, i32* %i254, align 4 - %idxprom258 = sext i32 %220 to i64 - %arrayidx259 = getelementptr inbounds i8, i8* %219, i64 %idxprom258 - %221 = load i8, i8* %arrayidx259, align 1 - %tobool260 = trunc i8 %221 to i1 - br i1 %tobool260, label %land.lhs.true, label %if.end269 - -land.lhs.true: ; preds = %for.body257 - %222 = load float*, float** %gl_lower, align 8 - %223 = load i32*, i32** %center_table.addr, align 8 - %224 = load i32, i32* %i254, align 4 - %idxprom261 = sext i32 %224 to i64 - %arrayidx262 = getelementptr inbounds i32, i32* %223, i64 %idxprom261 - %225 = load i32, i32* %arrayidx262, align 4 - %idxprom263 = sext i32 %225 to i64 - %arrayidx264 = getelementptr inbounds float, float* %222, i64 %idxprom263 - %226 = load float, float* %arrayidx264, align 4 - %cmp265 = fcmp ogt float %226, 0.000000e+00 - br i1 %cmp265, label %if.then266, label %if.end269 - -if.then266: ; preds = %land.lhs.true - %227 = load i8*, i8** %is_center.addr, align 8 - %228 = load i32, i32* %i254, align 4 - %idxprom267 = sext i32 %228 to i64 - %arrayidx268 = getelementptr inbounds i8, i8* %227, i64 %idxprom267 - store i8 0, i8* %arrayidx268, align 1 - br label %if.end269 - -if.end269: ; preds = %if.then266, %land.lhs.true, %for.body257 - br label %for.inc270 - -for.inc270: ; preds = %if.end269 - %229 = load i32, i32* %i254, align 4 - %inc271 = add nsw i32 %229, 1 - store i32 %inc271, i32* %i254, align 4 - br label %for.cond255 - -for.end272: ; preds = %for.cond255 - %230 = load i64, i64* %x.addr, align 8 - %cmp273 = icmp sge i64 %230, 0 - br i1 %cmp273, label %land.lhs.true274, label %if.end279 - -land.lhs.true274: ; preds = %for.end272 - %231 = load i64, i64* %x.addr, align 8 - %232 = load i32, i32* %num, align 4 - %conv275 = sext i32 %232 to i64 - %cmp276 = icmp slt i64 %231, %conv275 - br i1 %cmp276, label %if.then277, label %if.end279 - -if.then277: ; preds = %land.lhs.true274 - %233 = load i8*, i8** %is_center.addr, align 8 - %234 = load i64, i64* %x.addr, align 8 - %arrayidx278 = getelementptr inbounds i8, i8* %233, i64 %234 - store i8 1, i8* %arrayidx278, align 1 - br label %if.end279 - -if.end279: ; preds = %if.then277, %land.lhs.true274, %for.end272 - %235 = load i64*, i64** %numcenters.addr, align 8 - %236 = load i64, i64* %235, align 8 - %add280 = add nsw i64 %236, 1 - %237 = load i32, i32* %number_of_centers_to_close, align 4 - %conv281 = sext i32 %237 to i64 - %sub282 = sub nsw i64 %add280, %conv281 - %238 = load i64*, i64** %numcenters.addr, align 8 - store i64 %sub282, i64* %238, align 8 - br label %if.end283 - -if.else: ; preds = %for.end210 - store float 0.000000e+00, float* %gl_cost_of_opening_x, align 4 - br label %if.end283 - -if.end283: ; preds = %if.else, %if.end279 - %239 = load float*, float** @work_mem_h, align 8 - %240 = bitcast float* %239 to i8* - call void @free(i8* %240) #2 - br label %do.body284 - -do.body284: ; preds = %if.end283 - %241 = load float*, float** @work_mem_d, align 8 - %242 = bitcast float* %241 to i8* - %call286 = call i32 @cudaFree(i8* %242) - store i32 %call286, i32* %err285, align 4 - %243 = load i32, i32* %err285, align 4 - %cmp287 = icmp ne i32 0, %243 - br i1 %cmp287, label %if.then288, label %if.end291 - -if.then288: ; preds = %do.body284 - %244 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %245 = load i32, i32* %err285, align 4 - %call289 = call i8* @cudaGetErrorString(i32 %245) - %call290 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %244, i8* getelementptr inbounds ([42 x i8], [42 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 353, i8* %call289) - call void @exit(i32 1) #15 - unreachable - -if.end291: ; preds = %do.body284 - br label %do.end292 - -do.end292: ; preds = %if.end291 - %246 = load i32, i32* @_ZL4iter, align 4 - %inc293 = add nsw i32 %246, 1 - store i32 %inc293, i32* @_ZL4iter, align 4 - %247 = load float, float* %gl_cost_of_opening_x, align 4 - %fneg = fneg float %247 - ret float %fneg -} - -declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #1 - -declare dso_local i32 @cudaMemset(i8*, i32, i64) #1 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN4dim3C2Ejjj(%struct.dim3* %this, i32 %vx, i32 %vy, i32 %vz) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %struct.dim3*, align 8 - %vx.addr = alloca i32, align 4 - %vy.addr = alloca i32, align 4 - %vz.addr = alloca i32, align 4 - store %struct.dim3* %this, %struct.dim3** %this.addr, align 8 - store i32 %vx, i32* %vx.addr, align 4 - store i32 %vy, i32* %vy.addr, align 4 - store i32 %vz, i32* %vz.addr, align 4 - %this1 = load %struct.dim3*, %struct.dim3** %this.addr, align 8 - %x = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 0 - %0 = load i32, i32* %vx.addr, align 4 - store i32 %0, i32* %x, align 4 - %y = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 1 - %1 = load i32, i32* %vy.addr, align 4 - store i32 %1, i32* %y, align 4 - %z = getelementptr inbounds %struct.dim3, %struct.dim3* %this1, i32 0, i32 2 - %2 = load i32, i32* %vz.addr, align 4 - store i32 %2, i32* %z, align 4 - ret void -} - -declare dso_local i32 @__cudaPushCallConfiguration(i64, i32, i64, i32, i64, i8*) #1 - -declare dso_local i32 @cudaThreadSynchronize() #1 - -declare dso_local i32 @cudaGetLastError() #1 - -declare dso_local i32 @printf(i8*, ...) #1 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %p1, %struct.Point* byval(%struct.Point) align 8 %p2, i32 %dim) #6 { -entry: - %dim.addr = alloca i32, align 4 - %i = alloca i32, align 4 - %result = alloca float, align 4 - store i32 %dim, i32* %dim.addr, align 4 - store float 0.000000e+00, float* %result, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %dim.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %coord = getelementptr inbounds %struct.Point, %struct.Point* %p1, i32 0, i32 1 - %2 = load float*, float** %coord, align 8 - %3 = load i32, i32* %i, align 4 - %idxprom = sext i32 %3 to i64 - %arrayidx = getelementptr inbounds float, float* %2, i64 %idxprom - %4 = load float, float* %arrayidx, align 4 - %coord1 = getelementptr inbounds %struct.Point, %struct.Point* %p2, i32 0, i32 1 - %5 = load float*, float** %coord1, align 8 - %6 = load i32, i32* %i, align 4 - %idxprom2 = sext i32 %6 to i64 - %arrayidx3 = getelementptr inbounds float, float* %5, i64 %idxprom2 - %7 = load float, float* %arrayidx3, align 4 - %sub = fsub contract float %4, %7 - %coord4 = getelementptr inbounds %struct.Point, %struct.Point* %p1, i32 0, i32 1 - %8 = load float*, float** %coord4, align 8 - %9 = load i32, i32* %i, align 4 - %idxprom5 = sext i32 %9 to i64 - %arrayidx6 = getelementptr inbounds float, float* %8, i64 %idxprom5 - %10 = load float, float* %arrayidx6, align 4 - %coord7 = getelementptr inbounds %struct.Point, %struct.Point* %p2, i32 0, i32 1 - %11 = load float*, float** %coord7, align 8 - %12 = load i32, i32* %i, align 4 - %idxprom8 = sext i32 %12 to i64 - %arrayidx9 = getelementptr inbounds float, float* %11, i64 %idxprom8 - %13 = load float, float* %arrayidx9, align 4 - %sub10 = fsub contract float %10, %13 - %mul = fmul contract float %sub, %sub10 - %14 = load float, float* %result, align 4 - %add = fadd contract float %14, %mul - store float %add, float* %result, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %15 = load i32, i32* %i, align 4 - %inc = add nsw i32 %15, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %16 = load float, float* %result, align 4 - ret float %16 -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z9inttofileiPc(i32 %data, i8* %filename) #3 { -entry: - %data.addr = alloca i32, align 4 - %filename.addr = alloca i8*, align 8 - %fp = alloca %struct._IO_FILE*, align 8 - store i32 %data, i32* %data.addr, align 4 - store i8* %filename, i8** %filename.addr, align 8 - %0 = load i8*, i8** %filename.addr, align 8 - %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i64 0, i64 0)) - store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 - %1 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %2 = load i32, i32* %data.addr, align 4 - %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.4, i64 0, i64 0), i32 %2) - %3 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call2 = call i32 @fclose(%struct._IO_FILE* %3) - ret void -} - -declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #1 - -declare dso_local i32 @fclose(%struct._IO_FILE*) #1 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local double @_Z7gettimev() #6 { -entry: - %t = alloca %struct.timeval, align 8 - %call = call i32 @gettimeofday(%struct.timeval* %t, %struct.timezone* null) #2 - %tv_sec = getelementptr inbounds %struct.timeval, %struct.timeval* %t, i32 0, i32 0 - %0 = load i64, i64* %tv_sec, align 8 - %conv = sitofp i64 %0 to double - %tv_usec = getelementptr inbounds %struct.timeval, %struct.timeval* %t, i32 0, i32 1 - %1 = load i64, i64* %tv_usec, align 8 - %conv1 = sitofp i64 %1 to double - %mul = fmul contract double %conv1, 0x3EB0C6F7A0B5ED8D - %add = fadd contract double %conv, %mul - ret double %add -} - -; Function Attrs: nounwind -declare dso_local i32 @gettimeofday(%struct.timeval*, %struct.timezone*) #7 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local i32 @_Z11isIdenticalPfS_i(float* %i, float* %j, i32 %D) #6 { -entry: - %retval = alloca i32, align 4 - %i.addr = alloca float*, align 8 - %j.addr = alloca float*, align 8 - %D.addr = alloca i32, align 4 - %a = alloca i32, align 4 - %equal = alloca i32, align 4 - store float* %i, float** %i.addr, align 8 - store float* %j, float** %j.addr, align 8 - store i32 %D, i32* %D.addr, align 4 - store i32 0, i32* %a, align 4 - store i32 1, i32* %equal, align 4 - br label %while.cond - -while.cond: ; preds = %if.end, %entry - %0 = load i32, i32* %equal, align 4 - %tobool = icmp ne i32 %0, 0 - br i1 %tobool, label %land.rhs, label %land.end - -land.rhs: ; preds = %while.cond - %1 = load i32, i32* %a, align 4 - %2 = load i32, i32* %D.addr, align 4 - %cmp = icmp slt i32 %1, %2 - br label %land.end - -land.end: ; preds = %land.rhs, %while.cond - %3 = phi i1 [ false, %while.cond ], [ %cmp, %land.rhs ] - br i1 %3, label %while.body, label %while.end - -while.body: ; preds = %land.end - %4 = load float*, float** %i.addr, align 8 - %5 = load i32, i32* %a, align 4 - %idxprom = sext i32 %5 to i64 - %arrayidx = getelementptr inbounds float, float* %4, i64 %idxprom - %6 = load float, float* %arrayidx, align 4 - %7 = load float*, float** %j.addr, align 8 - %8 = load i32, i32* %a, align 4 - %idxprom1 = sext i32 %8 to i64 - %arrayidx2 = getelementptr inbounds float, float* %7, i64 %idxprom1 - %9 = load float, float* %arrayidx2, align 4 - %cmp3 = fcmp une float %6, %9 - br i1 %cmp3, label %if.then, label %if.else - -if.then: ; preds = %while.body - store i32 0, i32* %equal, align 4 - br label %if.end - -if.else: ; preds = %while.body - %10 = load i32, i32* %a, align 4 - %inc = add nsw i32 %10, 1 - store i32 %inc, i32* %a, align 4 - br label %if.end - -if.end: ; preds = %if.else, %if.then - br label %while.cond - -while.end: ; preds = %land.end - %11 = load i32, i32* %equal, align 4 - %tobool4 = icmp ne i32 %11, 0 - br i1 %tobool4, label %if.then5, label %if.else6 - -if.then5: ; preds = %while.end - store i32 1, i32* %retval, align 4 - br label %return - -if.else6: ; preds = %while.end - store i32 0, i32* %retval, align 4 - br label %return - -return: ; preds = %if.else6, %if.then5 - %12 = load i32, i32* %retval, align 4 - ret i32 %12 -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z7shuffleP6Points(%struct.Points* %points) #6 { -entry: - %points.addr = alloca %struct.Points*, align 8 - %t1 = alloca double, align 8 - %i = alloca i64, align 8 - %j = alloca i64, align 8 - %temp = alloca %struct.Point, align 8 - %t2 = alloca double, align 8 - store %struct.Points* %points, %struct.Points** %points.addr, align 8 - %call = call double @_Z7gettimev() - store double %call, double* %t1, align 8 - store i64 0, i64* %i, align 8 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i64, i64* %i, align 8 - %1 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num = getelementptr inbounds %struct.Points, %struct.Points* %1, i32 0, i32 0 - %2 = load i64, i64* %num, align 8 - %sub = sub nsw i64 %2, 1 - %cmp = icmp slt i64 %0, %sub - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %call1 = call i64 @lrand48() #2 - %3 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num2 = getelementptr inbounds %struct.Points, %struct.Points* %3, i32 0, i32 0 - %4 = load i64, i64* %num2, align 8 - %5 = load i64, i64* %i, align 8 - %sub3 = sub nsw i64 %4, %5 - %rem = srem i64 %call1, %sub3 - %6 = load i64, i64* %i, align 8 - %add = add nsw i64 %rem, %6 - store i64 %add, i64* %j, align 8 - %7 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p = getelementptr inbounds %struct.Points, %struct.Points* %7, i32 0, i32 2 - %8 = load %struct.Point*, %struct.Point** %p, align 8 - %9 = load i64, i64* %i, align 8 - %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %8, i64 %9 - %10 = bitcast %struct.Point* %temp to i8* - %11 = bitcast %struct.Point* %arrayidx to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %10, i8* align 8 %11, i64 32, i1 false) - %12 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p4 = getelementptr inbounds %struct.Points, %struct.Points* %12, i32 0, i32 2 - %13 = load %struct.Point*, %struct.Point** %p4, align 8 - %14 = load i64, i64* %j, align 8 - %arrayidx5 = getelementptr inbounds %struct.Point, %struct.Point* %13, i64 %14 - %15 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p6 = getelementptr inbounds %struct.Points, %struct.Points* %15, i32 0, i32 2 - %16 = load %struct.Point*, %struct.Point** %p6, align 8 - %17 = load i64, i64* %i, align 8 - %arrayidx7 = getelementptr inbounds %struct.Point, %struct.Point* %16, i64 %17 - %18 = bitcast %struct.Point* %arrayidx7 to i8* - %19 = bitcast %struct.Point* %arrayidx5 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %18, i8* align 8 %19, i64 32, i1 false) - %20 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p8 = getelementptr inbounds %struct.Points, %struct.Points* %20, i32 0, i32 2 - %21 = load %struct.Point*, %struct.Point** %p8, align 8 - %22 = load i64, i64* %j, align 8 - %arrayidx9 = getelementptr inbounds %struct.Point, %struct.Point* %21, i64 %22 - %23 = bitcast %struct.Point* %arrayidx9 to i8* - %24 = bitcast %struct.Point* %temp to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %23, i8* align 8 %24, i64 32, i1 false) - br label %for.inc - -for.inc: ; preds = %for.body - %25 = load i64, i64* %i, align 8 - %inc = add nsw i64 %25, 1 - store i64 %inc, i64* %i, align 8 - br label %for.cond - -for.end: ; preds = %for.cond - %call10 = call double @_Z7gettimev() - store double %call10, double* %t2, align 8 - %26 = load double, double* %t2, align 8 - %27 = load double, double* %t1, align 8 - %sub11 = fsub contract double %26, %27 - %28 = load double, double* @time_shuffle, align 8 - %add12 = fadd contract double %28, %sub11 - store double %add12, double* @time_shuffle, align 8 - ret void -} - -; Function Attrs: nounwind -declare dso_local i64 @lrand48() #7 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z10intshufflePii(i32* %intarray, i32 %length) #6 { -entry: - %intarray.addr = alloca i32*, align 8 - %length.addr = alloca i32, align 4 - %t1 = alloca double, align 8 - %i = alloca i64, align 8 - %j = alloca i64, align 8 - %temp = alloca i32, align 4 - %t2 = alloca double, align 8 - store i32* %intarray, i32** %intarray.addr, align 8 - store i32 %length, i32* %length.addr, align 4 - %call = call double @_Z7gettimev() - store double %call, double* %t1, align 8 - store i64 0, i64* %i, align 8 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i64, i64* %i, align 8 - %1 = load i32, i32* %length.addr, align 4 - %conv = sext i32 %1 to i64 - %cmp = icmp slt i64 %0, %conv - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %call1 = call i64 @lrand48() #2 - %2 = load i32, i32* %length.addr, align 4 - %conv2 = sext i32 %2 to i64 - %3 = load i64, i64* %i, align 8 - %sub = sub nsw i64 %conv2, %3 - %rem = srem i64 %call1, %sub - %4 = load i64, i64* %i, align 8 - %add = add nsw i64 %rem, %4 - store i64 %add, i64* %j, align 8 - %5 = load i32*, i32** %intarray.addr, align 8 - %6 = load i64, i64* %i, align 8 - %arrayidx = getelementptr inbounds i32, i32* %5, i64 %6 - %7 = load i32, i32* %arrayidx, align 4 - store i32 %7, i32* %temp, align 4 - %8 = load i32*, i32** %intarray.addr, align 8 - %9 = load i64, i64* %j, align 8 - %arrayidx3 = getelementptr inbounds i32, i32* %8, i64 %9 - %10 = load i32, i32* %arrayidx3, align 4 - %11 = load i32*, i32** %intarray.addr, align 8 - %12 = load i64, i64* %i, align 8 - %arrayidx4 = getelementptr inbounds i32, i32* %11, i64 %12 - store i32 %10, i32* %arrayidx4, align 4 - %13 = load i32, i32* %temp, align 4 - %14 = load i32*, i32** %intarray.addr, align 8 - %15 = load i64, i64* %j, align 8 - %arrayidx5 = getelementptr inbounds i32, i32* %14, i64 %15 - store i32 %13, i32* %arrayidx5, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %16 = load i64, i64* %i, align 8 - %inc = add nsw i64 %16, 1 - store i64 %inc, i64* %i, align 8 - br label %for.cond - -for.end: ; preds = %for.cond - %call6 = call double @_Z7gettimev() - store double %call6, double* %t2, align 8 - %17 = load double, double* %t2, align 8 - %18 = load double, double* %t1, align 8 - %sub7 = fsub contract double %17, %18 - %19 = load double, double* @time_shuffle, align 8 - %add8 = fadd contract double %19, %sub7 - store double %add8, double* @time_shuffle, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local float @_Z7pspeedyP6PointsfPliP17pthread_barrier_t(%struct.Points* %points, float %z, i64* %kcenter, i32 %pid, %union.pthread_barrier_t* %barrier) #6 { -entry: - %points.addr = alloca %struct.Points*, align 8 - %z.addr = alloca float, align 4 - %kcenter.addr = alloca i64*, align 8 - %pid.addr = alloca i32, align 4 - %barrier.addr = alloca %union.pthread_barrier_t*, align 8 - %t1 = alloca double, align 8 - %bsize = alloca i64, align 8 - %k1 = alloca i64, align 8 - %k2 = alloca i64, align 8 - %k = alloca i32, align 4 - %distance = alloca float, align 4 - %agg.tmp = alloca %struct.Point, align 8 - %agg.tmp6 = alloca %struct.Point, align 8 - %k33 = alloca i32, align 4 - %distance39 = alloca float, align 4 - %agg.tmp40 = alloca %struct.Point, align 8 - %agg.tmp44 = alloca %struct.Point, align 8 - %to_open = alloca i8, align 1 - %k95 = alloca i32, align 4 - %distance101 = alloca float, align 4 - %agg.tmp102 = alloca %struct.Point, align 8 - %agg.tmp106 = alloca %struct.Point, align 8 - %mytotal = alloca float, align 4 - %k146 = alloca i32, align 4 - %i = alloca i32, align 4 - %t2 = alloca double, align 8 - store %struct.Points* %points, %struct.Points** %points.addr, align 8 - store float %z, float* %z.addr, align 4 - store i64* %kcenter, i64** %kcenter.addr, align 8 - store i32 %pid, i32* %pid.addr, align 4 - store %union.pthread_barrier_t* %barrier, %union.pthread_barrier_t** %barrier.addr, align 8 - %call = call double @_Z7gettimev() - store double %call, double* %t1, align 8 - %0 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num = getelementptr inbounds %struct.Points, %struct.Points* %0, i32 0, i32 0 - %1 = load i64, i64* %num, align 8 - %2 = load i32, i32* @_ZL5nproc, align 4 - %conv = sext i32 %2 to i64 - %div = sdiv i64 %1, %conv - store i64 %div, i64* %bsize, align 8 - %3 = load i64, i64* %bsize, align 8 - %4 = load i32, i32* %pid.addr, align 4 - %conv1 = sext i32 %4 to i64 - %mul = mul nsw i64 %3, %conv1 - store i64 %mul, i64* %k1, align 8 - %5 = load i64, i64* %k1, align 8 - %6 = load i64, i64* %bsize, align 8 - %add = add nsw i64 %5, %6 - store i64 %add, i64* %k2, align 8 - %7 = load i32, i32* %pid.addr, align 4 - %8 = load i32, i32* @_ZL5nproc, align 4 - %sub = sub nsw i32 %8, 1 - %cmp = icmp eq i32 %7, %sub - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %9 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num2 = getelementptr inbounds %struct.Points, %struct.Points* %9, i32 0, i32 0 - %10 = load i64, i64* %num2, align 8 - store i64 %10, i64* %k2, align 8 - br label %if.end - -if.end: ; preds = %if.then, %entry - %11 = load i64, i64* %k1, align 8 - %conv3 = trunc i64 %11 to i32 - store i32 %conv3, i32* %k, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end - %12 = load i32, i32* %k, align 4 - %conv4 = sext i32 %12 to i64 - %13 = load i64, i64* %k2, align 8 - %cmp5 = icmp slt i64 %conv4, %13 - br i1 %cmp5, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %14 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p = getelementptr inbounds %struct.Points, %struct.Points* %14, i32 0, i32 2 - %15 = load %struct.Point*, %struct.Point** %p, align 8 - %16 = load i32, i32* %k, align 4 - %idxprom = sext i32 %16 to i64 - %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %15, i64 %idxprom - %17 = bitcast %struct.Point* %agg.tmp to i8* - %18 = bitcast %struct.Point* %arrayidx to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %17, i8* align 8 %18, i64 32, i1 false) - %19 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p7 = getelementptr inbounds %struct.Points, %struct.Points* %19, i32 0, i32 2 - %20 = load %struct.Point*, %struct.Point** %p7, align 8 - %arrayidx8 = getelementptr inbounds %struct.Point, %struct.Point* %20, i64 0 - %21 = bitcast %struct.Point* %agg.tmp6 to i8* - %22 = bitcast %struct.Point* %arrayidx8 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %21, i8* align 8 %22, i64 32, i1 false) - %23 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %dim = getelementptr inbounds %struct.Points, %struct.Points* %23, i32 0, i32 1 - %24 = load i32, i32* %dim, align 8 - %call9 = call float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %agg.tmp, %struct.Point* byval(%struct.Point) align 8 %agg.tmp6, i32 %24) - store float %call9, float* %distance, align 4 - %25 = load float, float* %distance, align 4 - %26 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p10 = getelementptr inbounds %struct.Points, %struct.Points* %26, i32 0, i32 2 - %27 = load %struct.Point*, %struct.Point** %p10, align 8 - %28 = load i32, i32* %k, align 4 - %idxprom11 = sext i32 %28 to i64 - %arrayidx12 = getelementptr inbounds %struct.Point, %struct.Point* %27, i64 %idxprom11 - %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx12, i32 0, i32 0 - %29 = load float, float* %weight, align 8 - %mul13 = fmul contract float %25, %29 - %30 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p14 = getelementptr inbounds %struct.Points, %struct.Points* %30, i32 0, i32 2 - %31 = load %struct.Point*, %struct.Point** %p14, align 8 - %32 = load i32, i32* %k, align 4 - %idxprom15 = sext i32 %32 to i64 - %arrayidx16 = getelementptr inbounds %struct.Point, %struct.Point* %31, i64 %idxprom15 - %cost = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx16, i32 0, i32 3 - store float %mul13, float* %cost, align 8 - %33 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p17 = getelementptr inbounds %struct.Points, %struct.Points* %33, i32 0, i32 2 - %34 = load %struct.Point*, %struct.Point** %p17, align 8 - %35 = load i32, i32* %k, align 4 - %idxprom18 = sext i32 %35 to i64 - %arrayidx19 = getelementptr inbounds %struct.Point, %struct.Point* %34, i64 %idxprom18 - %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx19, i32 0, i32 2 - store i64 0, i64* %assign, align 8 - br label %for.inc - -for.inc: ; preds = %for.body - %36 = load i32, i32* %k, align 4 - %inc = add nsw i32 %36, 1 - store i32 %inc, i32* %k, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %37 = load i32, i32* %pid.addr, align 4 - %cmp20 = icmp eq i32 %37, 0 - br i1 %cmp20, label %if.then21, label %if.end25 - -if.then21: ; preds = %for.end - %38 = load i64*, i64** %kcenter.addr, align 8 - store i64 1, i64* %38, align 8 - %39 = load i32, i32* @_ZL5nproc, align 4 - %conv22 = sext i32 %39 to i64 - %mul23 = mul i64 4, %conv22 - %call24 = call noalias i8* @malloc(i64 %mul23) #2 - %40 = bitcast i8* %call24 to float* - store float* %40, float** @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE5costs, align 8 - br label %if.end25 - -if.end25: ; preds = %if.then21, %for.end - %41 = load i32, i32* %pid.addr, align 4 - %cmp26 = icmp ne i32 %41, 0 - br i1 %cmp26, label %if.then27, label %if.else - -if.then27: ; preds = %if.end25 - br label %while.body - -while.body: ; preds = %if.then27, %for.end78 - %42 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 - %conv28 = sext i32 %42 to i64 - %43 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num29 = getelementptr inbounds %struct.Points, %struct.Points* %43, i32 0, i32 0 - %44 = load i64, i64* %num29, align 8 - %cmp30 = icmp sge i64 %conv28, %44 - br i1 %cmp30, label %if.then31, label %if.end32 - -if.then31: ; preds = %while.body - br label %while.end - -if.end32: ; preds = %while.body - %45 = load i64, i64* %k1, align 8 - %conv34 = trunc i64 %45 to i32 - store i32 %conv34, i32* %k33, align 4 - br label %for.cond35 - -for.cond35: ; preds = %for.inc76, %if.end32 - %46 = load i32, i32* %k33, align 4 - %conv36 = sext i32 %46 to i64 - %47 = load i64, i64* %k2, align 8 - %cmp37 = icmp slt i64 %conv36, %47 - br i1 %cmp37, label %for.body38, label %for.end78 - -for.body38: ; preds = %for.cond35 - %48 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p41 = getelementptr inbounds %struct.Points, %struct.Points* %48, i32 0, i32 2 - %49 = load %struct.Point*, %struct.Point** %p41, align 8 - %50 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 - %idxprom42 = sext i32 %50 to i64 - %arrayidx43 = getelementptr inbounds %struct.Point, %struct.Point* %49, i64 %idxprom42 - %51 = bitcast %struct.Point* %agg.tmp40 to i8* - %52 = bitcast %struct.Point* %arrayidx43 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %51, i8* align 8 %52, i64 32, i1 false) - %53 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p45 = getelementptr inbounds %struct.Points, %struct.Points* %53, i32 0, i32 2 - %54 = load %struct.Point*, %struct.Point** %p45, align 8 - %55 = load i32, i32* %k33, align 4 - %idxprom46 = sext i32 %55 to i64 - %arrayidx47 = getelementptr inbounds %struct.Point, %struct.Point* %54, i64 %idxprom46 - %56 = bitcast %struct.Point* %agg.tmp44 to i8* - %57 = bitcast %struct.Point* %arrayidx47 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %56, i8* align 8 %57, i64 32, i1 false) - %58 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %dim48 = getelementptr inbounds %struct.Points, %struct.Points* %58, i32 0, i32 1 - %59 = load i32, i32* %dim48, align 8 - %call49 = call float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %agg.tmp40, %struct.Point* byval(%struct.Point) align 8 %agg.tmp44, i32 %59) - store float %call49, float* %distance39, align 4 - %60 = load float, float* %distance39, align 4 - %61 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p50 = getelementptr inbounds %struct.Points, %struct.Points* %61, i32 0, i32 2 - %62 = load %struct.Point*, %struct.Point** %p50, align 8 - %63 = load i32, i32* %k33, align 4 - %idxprom51 = sext i32 %63 to i64 - %arrayidx52 = getelementptr inbounds %struct.Point, %struct.Point* %62, i64 %idxprom51 - %weight53 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx52, i32 0, i32 0 - %64 = load float, float* %weight53, align 8 - %mul54 = fmul contract float %60, %64 - %65 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p55 = getelementptr inbounds %struct.Points, %struct.Points* %65, i32 0, i32 2 - %66 = load %struct.Point*, %struct.Point** %p55, align 8 - %67 = load i32, i32* %k33, align 4 - %idxprom56 = sext i32 %67 to i64 - %arrayidx57 = getelementptr inbounds %struct.Point, %struct.Point* %66, i64 %idxprom56 - %cost58 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx57, i32 0, i32 3 - %68 = load float, float* %cost58, align 8 - %cmp59 = fcmp olt float %mul54, %68 - br i1 %cmp59, label %if.then60, label %if.end75 - -if.then60: ; preds = %for.body38 - %69 = load float, float* %distance39, align 4 - %70 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p61 = getelementptr inbounds %struct.Points, %struct.Points* %70, i32 0, i32 2 - %71 = load %struct.Point*, %struct.Point** %p61, align 8 - %72 = load i32, i32* %k33, align 4 - %idxprom62 = sext i32 %72 to i64 - %arrayidx63 = getelementptr inbounds %struct.Point, %struct.Point* %71, i64 %idxprom62 - %weight64 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx63, i32 0, i32 0 - %73 = load float, float* %weight64, align 8 - %mul65 = fmul contract float %69, %73 - %74 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p66 = getelementptr inbounds %struct.Points, %struct.Points* %74, i32 0, i32 2 - %75 = load %struct.Point*, %struct.Point** %p66, align 8 - %76 = load i32, i32* %k33, align 4 - %idxprom67 = sext i32 %76 to i64 - %arrayidx68 = getelementptr inbounds %struct.Point, %struct.Point* %75, i64 %idxprom67 - %cost69 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx68, i32 0, i32 3 - store float %mul65, float* %cost69, align 8 - %77 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 - %conv70 = sext i32 %77 to i64 - %78 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p71 = getelementptr inbounds %struct.Points, %struct.Points* %78, i32 0, i32 2 - %79 = load %struct.Point*, %struct.Point** %p71, align 8 - %80 = load i32, i32* %k33, align 4 - %idxprom72 = sext i32 %80 to i64 - %arrayidx73 = getelementptr inbounds %struct.Point, %struct.Point* %79, i64 %idxprom72 - %assign74 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx73, i32 0, i32 2 - store i64 %conv70, i64* %assign74, align 8 - br label %if.end75 - -if.end75: ; preds = %if.then60, %for.body38 - br label %for.inc76 - -for.inc76: ; preds = %if.end75 - %81 = load i32, i32* %k33, align 4 - %inc77 = add nsw i32 %81, 1 - store i32 %inc77, i32* %k33, align 4 - br label %for.cond35 - -for.end78: ; preds = %for.cond35 - br label %while.body - -while.end: ; preds = %if.then31 - br label %if.end145 - -if.else: ; preds = %if.end25 - store i32 1, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 - br label %for.cond79 - -for.cond79: ; preds = %for.inc142, %if.else - %82 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 - %conv80 = sext i32 %82 to i64 - %83 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num81 = getelementptr inbounds %struct.Points, %struct.Points* %83, i32 0, i32 0 - %84 = load i64, i64* %num81, align 8 - %cmp82 = icmp slt i64 %conv80, %84 - br i1 %cmp82, label %for.body83, label %for.end144 - -for.body83: ; preds = %for.cond79 - %call84 = call i64 @lrand48() #2 - %conv85 = sitofp i64 %call84 to float - %div86 = fdiv float %conv85, 0x41E0000000000000 - %85 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p87 = getelementptr inbounds %struct.Points, %struct.Points* %85, i32 0, i32 2 - %86 = load %struct.Point*, %struct.Point** %p87, align 8 - %87 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 - %idxprom88 = sext i32 %87 to i64 - %arrayidx89 = getelementptr inbounds %struct.Point, %struct.Point* %86, i64 %idxprom88 - %cost90 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx89, i32 0, i32 3 - %88 = load float, float* %cost90, align 8 - %89 = load float, float* %z.addr, align 4 - %div91 = fdiv float %88, %89 - %cmp92 = fcmp olt float %div86, %div91 - %frombool = zext i1 %cmp92 to i8 - store i8 %frombool, i8* %to_open, align 1 - %90 = load i8, i8* %to_open, align 1 - %tobool = trunc i8 %90 to i1 - br i1 %tobool, label %if.then93, label %if.end141 - -if.then93: ; preds = %for.body83 - %91 = load i64*, i64** %kcenter.addr, align 8 - %92 = load i64, i64* %91, align 8 - %inc94 = add nsw i64 %92, 1 - store i64 %inc94, i64* %91, align 8 - store i8 1, i8* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE4open, align 1 - %93 = load i64, i64* %k1, align 8 - %conv96 = trunc i64 %93 to i32 - store i32 %conv96, i32* %k95, align 4 - br label %for.cond97 - -for.cond97: ; preds = %for.inc138, %if.then93 - %94 = load i32, i32* %k95, align 4 - %conv98 = sext i32 %94 to i64 - %95 = load i64, i64* %k2, align 8 - %cmp99 = icmp slt i64 %conv98, %95 - br i1 %cmp99, label %for.body100, label %for.end140 - -for.body100: ; preds = %for.cond97 - %96 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p103 = getelementptr inbounds %struct.Points, %struct.Points* %96, i32 0, i32 2 - %97 = load %struct.Point*, %struct.Point** %p103, align 8 - %98 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 - %idxprom104 = sext i32 %98 to i64 - %arrayidx105 = getelementptr inbounds %struct.Point, %struct.Point* %97, i64 %idxprom104 - %99 = bitcast %struct.Point* %agg.tmp102 to i8* - %100 = bitcast %struct.Point* %arrayidx105 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %99, i8* align 8 %100, i64 32, i1 false) - %101 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p107 = getelementptr inbounds %struct.Points, %struct.Points* %101, i32 0, i32 2 - %102 = load %struct.Point*, %struct.Point** %p107, align 8 - %103 = load i32, i32* %k95, align 4 - %idxprom108 = sext i32 %103 to i64 - %arrayidx109 = getelementptr inbounds %struct.Point, %struct.Point* %102, i64 %idxprom108 - %104 = bitcast %struct.Point* %agg.tmp106 to i8* - %105 = bitcast %struct.Point* %arrayidx109 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %104, i8* align 8 %105, i64 32, i1 false) - %106 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %dim110 = getelementptr inbounds %struct.Points, %struct.Points* %106, i32 0, i32 1 - %107 = load i32, i32* %dim110, align 8 - %call111 = call float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %agg.tmp102, %struct.Point* byval(%struct.Point) align 8 %agg.tmp106, i32 %107) - store float %call111, float* %distance101, align 4 - %108 = load float, float* %distance101, align 4 - %109 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p112 = getelementptr inbounds %struct.Points, %struct.Points* %109, i32 0, i32 2 - %110 = load %struct.Point*, %struct.Point** %p112, align 8 - %111 = load i32, i32* %k95, align 4 - %idxprom113 = sext i32 %111 to i64 - %arrayidx114 = getelementptr inbounds %struct.Point, %struct.Point* %110, i64 %idxprom113 - %weight115 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx114, i32 0, i32 0 - %112 = load float, float* %weight115, align 8 - %mul116 = fmul contract float %108, %112 - %113 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p117 = getelementptr inbounds %struct.Points, %struct.Points* %113, i32 0, i32 2 - %114 = load %struct.Point*, %struct.Point** %p117, align 8 - %115 = load i32, i32* %k95, align 4 - %idxprom118 = sext i32 %115 to i64 - %arrayidx119 = getelementptr inbounds %struct.Point, %struct.Point* %114, i64 %idxprom118 - %cost120 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx119, i32 0, i32 3 - %116 = load float, float* %cost120, align 8 - %cmp121 = fcmp olt float %mul116, %116 - br i1 %cmp121, label %if.then122, label %if.end137 - -if.then122: ; preds = %for.body100 - %117 = load float, float* %distance101, align 4 - %118 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p123 = getelementptr inbounds %struct.Points, %struct.Points* %118, i32 0, i32 2 - %119 = load %struct.Point*, %struct.Point** %p123, align 8 - %120 = load i32, i32* %k95, align 4 - %idxprom124 = sext i32 %120 to i64 - %arrayidx125 = getelementptr inbounds %struct.Point, %struct.Point* %119, i64 %idxprom124 - %weight126 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx125, i32 0, i32 0 - %121 = load float, float* %weight126, align 8 - %mul127 = fmul contract float %117, %121 - %122 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p128 = getelementptr inbounds %struct.Points, %struct.Points* %122, i32 0, i32 2 - %123 = load %struct.Point*, %struct.Point** %p128, align 8 - %124 = load i32, i32* %k95, align 4 - %idxprom129 = sext i32 %124 to i64 - %arrayidx130 = getelementptr inbounds %struct.Point, %struct.Point* %123, i64 %idxprom129 - %cost131 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx130, i32 0, i32 3 - store float %mul127, float* %cost131, align 8 - %125 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 - %conv132 = sext i32 %125 to i64 - %126 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p133 = getelementptr inbounds %struct.Points, %struct.Points* %126, i32 0, i32 2 - %127 = load %struct.Point*, %struct.Point** %p133, align 8 - %128 = load i32, i32* %k95, align 4 - %idxprom134 = sext i32 %128 to i64 - %arrayidx135 = getelementptr inbounds %struct.Point, %struct.Point* %127, i64 %idxprom134 - %assign136 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx135, i32 0, i32 2 - store i64 %conv132, i64* %assign136, align 8 - br label %if.end137 - -if.end137: ; preds = %if.then122, %for.body100 - br label %for.inc138 - -for.inc138: ; preds = %if.end137 - %129 = load i32, i32* %k95, align 4 - %inc139 = add nsw i32 %129, 1 - store i32 %inc139, i32* %k95, align 4 - br label %for.cond97 - -for.end140: ; preds = %for.cond97 - store i8 0, i8* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE4open, align 1 - br label %if.end141 - -if.end141: ; preds = %for.end140, %for.body83 - br label %for.inc142 - -for.inc142: ; preds = %if.end141 - %130 = load i32, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 - %inc143 = add nsw i32 %130, 1 - store i32 %inc143, i32* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE1i, align 4 - br label %for.cond79 - -for.end144: ; preds = %for.cond79 - store i8 1, i8* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE4open, align 1 - br label %if.end145 - -if.end145: ; preds = %for.end144, %while.end - store i8 0, i8* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE4open, align 1 - store float 0.000000e+00, float* %mytotal, align 4 - %131 = load i64, i64* %k1, align 8 - %conv147 = trunc i64 %131 to i32 - store i32 %conv147, i32* %k146, align 4 - br label %for.cond148 - -for.cond148: ; preds = %for.inc157, %if.end145 - %132 = load i32, i32* %k146, align 4 - %conv149 = sext i32 %132 to i64 - %133 = load i64, i64* %k2, align 8 - %cmp150 = icmp slt i64 %conv149, %133 - br i1 %cmp150, label %for.body151, label %for.end159 - -for.body151: ; preds = %for.cond148 - %134 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p152 = getelementptr inbounds %struct.Points, %struct.Points* %134, i32 0, i32 2 - %135 = load %struct.Point*, %struct.Point** %p152, align 8 - %136 = load i32, i32* %k146, align 4 - %idxprom153 = sext i32 %136 to i64 - %arrayidx154 = getelementptr inbounds %struct.Point, %struct.Point* %135, i64 %idxprom153 - %cost155 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx154, i32 0, i32 3 - %137 = load float, float* %cost155, align 8 - %138 = load float, float* %mytotal, align 4 - %add156 = fadd contract float %138, %137 - store float %add156, float* %mytotal, align 4 - br label %for.inc157 - -for.inc157: ; preds = %for.body151 - %139 = load i32, i32* %k146, align 4 - %inc158 = add nsw i32 %139, 1 - store i32 %inc158, i32* %k146, align 4 - br label %for.cond148 - -for.end159: ; preds = %for.cond148 - %140 = load float, float* %mytotal, align 4 - %141 = load float*, float** @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE5costs, align 8 - %142 = load i32, i32* %pid.addr, align 4 - %idxprom160 = sext i32 %142 to i64 - %arrayidx161 = getelementptr inbounds float, float* %141, i64 %idxprom160 - store float %140, float* %arrayidx161, align 4 - %143 = load i32, i32* %pid.addr, align 4 - %cmp162 = icmp eq i32 %143, 0 - br i1 %cmp162, label %if.then163, label %if.end175 - -if.then163: ; preds = %for.end159 - %144 = load float, float* %z.addr, align 4 - %145 = load i64*, i64** %kcenter.addr, align 8 - %146 = load i64, i64* %145, align 8 - %conv164 = sitofp i64 %146 to float - %mul165 = fmul contract float %144, %conv164 - store float %mul165, float* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE9totalcost, align 4 - store i32 0, i32* %i, align 4 - br label %for.cond166 - -for.cond166: ; preds = %for.inc172, %if.then163 - %147 = load i32, i32* %i, align 4 - %148 = load i32, i32* @_ZL5nproc, align 4 - %cmp167 = icmp slt i32 %147, %148 - br i1 %cmp167, label %for.body168, label %for.end174 - -for.body168: ; preds = %for.cond166 - %149 = load float*, float** @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE5costs, align 8 - %150 = load i32, i32* %i, align 4 - %idxprom169 = sext i32 %150 to i64 - %arrayidx170 = getelementptr inbounds float, float* %149, i64 %idxprom169 - %151 = load float, float* %arrayidx170, align 4 - %152 = load float, float* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE9totalcost, align 4 - %add171 = fadd contract float %152, %151 - store float %add171, float* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE9totalcost, align 4 - br label %for.inc172 - -for.inc172: ; preds = %for.body168 - %153 = load i32, i32* %i, align 4 - %inc173 = add nsw i32 %153, 1 - store i32 %inc173, i32* %i, align 4 - br label %for.cond166 - -for.end174: ; preds = %for.cond166 - %154 = load float*, float** @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE5costs, align 8 - %155 = bitcast float* %154 to i8* - call void @free(i8* %155) #2 - br label %if.end175 - -if.end175: ; preds = %for.end174, %for.end159 - %call176 = call double @_Z7gettimev() - store double %call176, double* %t2, align 8 - %156 = load i32, i32* %pid.addr, align 4 - %cmp177 = icmp eq i32 %156, 0 - br i1 %cmp177, label %if.then178, label %if.end181 - -if.then178: ; preds = %if.end175 - %157 = load double, double* %t2, align 8 - %158 = load double, double* %t1, align 8 - %sub179 = fsub contract double %157, %158 - %159 = load double, double* @time_speedy, align 8 - %add180 = fadd contract double %159, %sub179 - store double %add180, double* @time_speedy, align 8 - br label %if.end181 - -if.end181: ; preds = %if.then178, %if.end175 - %160 = load float, float* @_ZZ7pspeedyP6PointsfPliP17pthread_barrier_tE9totalcost, align 4 - ret float %160 -} - -; Function Attrs: noinline optnone uwtable -define dso_local float @_Z3pFLP6PointsPiifPliflfiP17pthread_barrier_t(%struct.Points* %points, i32* %feasible, i32 %numfeasible, float %z, i64* %k, i32 %kmax, float %cost, i64 %iter, float %e, i32 %pid, %union.pthread_barrier_t* %barrier) #3 { -entry: - %points.addr = alloca %struct.Points*, align 8 - %feasible.addr = alloca i32*, align 8 - %numfeasible.addr = alloca i32, align 4 - %z.addr = alloca float, align 4 - %k.addr = alloca i64*, align 8 - %kmax.addr = alloca i32, align 4 - %cost.addr = alloca float, align 4 - %iter.addr = alloca i64, align 8 - %e.addr = alloca float, align 4 - %pid.addr = alloca i32, align 4 - %barrier.addr = alloca %union.pthread_barrier_t*, align 8 - %i = alloca i64, align 8 - %x = alloca i64, align 8 - %change = alloca float, align 4 - %numberOfPoints = alloca i64, align 8 - store %struct.Points* %points, %struct.Points** %points.addr, align 8 - store i32* %feasible, i32** %feasible.addr, align 8 - store i32 %numfeasible, i32* %numfeasible.addr, align 4 - store float %z, float* %z.addr, align 4 - store i64* %k, i64** %k.addr, align 8 - store i32 %kmax, i32* %kmax.addr, align 4 - store float %cost, float* %cost.addr, align 4 - store i64 %iter, i64* %iter.addr, align 8 - store float %e, float* %e.addr, align 4 - store i32 %pid, i32* %pid.addr, align 4 - store %union.pthread_barrier_t* %barrier, %union.pthread_barrier_t** %barrier.addr, align 8 - %0 = load float, float* %cost.addr, align 4 - store float %0, float* %change, align 4 - br label %while.cond - -while.cond: ; preds = %for.end, %entry - %1 = load float, float* %change, align 4 - %2 = load float, float* %cost.addr, align 4 - %div = fdiv float %1, %2 - %conv = fpext float %div to double - %3 = load float, float* %e.addr, align 4 - %conv1 = fpext float %3 to double - %mul = fmul contract double 1.000000e+00, %conv1 - %cmp = fcmp ogt double %conv, %mul - br i1 %cmp, label %while.body, label %while.end - -while.body: ; preds = %while.cond - store float 0.000000e+00, float* %change, align 4 - %4 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num = getelementptr inbounds %struct.Points, %struct.Points* %4, i32 0, i32 0 - %5 = load i64, i64* %num, align 8 - store i64 %5, i64* %numberOfPoints, align 8 - %6 = load i32, i32* %pid.addr, align 4 - %cmp2 = icmp eq i32 %6, 0 - br i1 %cmp2, label %if.then, label %if.end - -if.then: ; preds = %while.body - %7 = load i32*, i32** %feasible.addr, align 8 - %8 = load i32, i32* %numfeasible.addr, align 4 - call void @_Z10intshufflePii(i32* %7, i32 %8) - br label %if.end - -if.end: ; preds = %if.then, %while.body - store i64 0, i64* %i, align 8 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end - %9 = load i64, i64* %i, align 8 - %10 = load i64, i64* %iter.addr, align 8 - %cmp3 = icmp slt i64 %9, %10 - br i1 %cmp3, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %11 = load i64, i64* %i, align 8 - %12 = load i32, i32* %numfeasible.addr, align 4 - %conv4 = sext i32 %12 to i64 - %rem = srem i64 %11, %conv4 - store i64 %rem, i64* %x, align 8 - %13 = load i32*, i32** %feasible.addr, align 8 - %14 = load i64, i64* %x, align 8 - %arrayidx = getelementptr inbounds i32, i32* %13, i64 %14 - %15 = load i32, i32* %arrayidx, align 4 - %conv5 = sext i32 %15 to i64 - %16 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %17 = load float, float* %z.addr, align 4 - %18 = load i64*, i64** %k.addr, align 8 - %19 = load i32, i32* %kmax.addr, align 4 - %20 = load i8*, i8** @_ZL9is_center, align 8 - %21 = load i32*, i32** @_ZL12center_table, align 8 - %22 = load i8*, i8** @_ZL17switch_membership, align 8 - %23 = load i8, i8* @isCoordChanged, align 1 - %tobool = trunc i8 %23 to i1 - %call = call float @_Z5pgainlP6PointsfPliPbPiS2_bPdS4_S4_S4_S4_S4_(i64 %conv5, %struct.Points* %16, float %17, i64* %18, i32 %19, i8* %20, i32* %21, i8* %22, i1 zeroext %tobool, double* @serial_t, double* @cpu_to_gpu_t, double* @gpu_to_cpu_t, double* @alloc_t, double* @kernel_t, double* @free_t) - %24 = load float, float* %change, align 4 - %add = fadd contract float %24, %call - store float %add, float* %change, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %25 = load i64, i64* %i, align 8 - %inc = add nsw i64 %25, 1 - store i64 %inc, i64* %i, align 8 - br label %for.cond - -for.end: ; preds = %for.cond - %26 = load float, float* %change, align 4 - %27 = load float, float* %cost.addr, align 4 - %sub = fsub contract float %27, %26 - store float %sub, float* %cost.addr, align 4 - br label %while.cond - -while.end: ; preds = %while.cond - %28 = load float, float* %cost.addr, align 4 - ret float %28 -} - -; Function Attrs: noinline optnone uwtable -define dso_local i32 @_Z19selectfeasible_fastP6PointsPPiiiP17pthread_barrier_t(%struct.Points* %points, i32** %feasible, i32 %kmin, i32 %pid, %union.pthread_barrier_t* %barrier) #3 { -entry: - %retval = alloca i32, align 4 - %points.addr = alloca %struct.Points*, align 8 - %feasible.addr = alloca i32**, align 8 - %kmin.addr = alloca i32, align 4 - %pid.addr = alloca i32, align 4 - %barrier.addr = alloca %union.pthread_barrier_t*, align 8 - %t1 = alloca double, align 8 - %numfeasible = alloca i32, align 4 - %accumweight = alloca float*, align 8 - %totalweight = alloca float, align 4 - %k1 = alloca i64, align 8 - %k2 = alloca i64, align 8 - %w = alloca float, align 4 - %l = alloca i32, align 4 - %r = alloca i32, align 4 - %k = alloca i32, align 4 - %i = alloca i32, align 4 - %i29 = alloca i32, align 4 - %i49 = alloca i32, align 4 - %t2 = alloca double, align 8 - store %struct.Points* %points, %struct.Points** %points.addr, align 8 - store i32** %feasible, i32*** %feasible.addr, align 8 - store i32 %kmin, i32* %kmin.addr, align 4 - store i32 %pid, i32* %pid.addr, align 4 - store %union.pthread_barrier_t* %barrier, %union.pthread_barrier_t** %barrier.addr, align 8 - %call = call double @_Z7gettimev() - store double %call, double* %t1, align 8 - %0 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num = getelementptr inbounds %struct.Points, %struct.Points* %0, i32 0, i32 0 - %1 = load i64, i64* %num, align 8 - %conv = trunc i64 %1 to i32 - store i32 %conv, i32* %numfeasible, align 4 - %2 = load i32, i32* %numfeasible, align 4 - %conv1 = sitofp i32 %2 to float - %3 = load i32, i32* %kmin.addr, align 4 - %mul = mul nsw i32 3, %3 - %conv2 = sitofp i32 %mul to float - %4 = load i32, i32* %kmin.addr, align 4 - %conv3 = sitofp i32 %4 to float - %call4 = call float @_ZSt3logf(float %conv3) - %mul5 = fmul contract float %conv2, %call4 - %cmp = fcmp ogt float %conv1, %mul5 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %5 = load i32, i32* %kmin.addr, align 4 - %mul6 = mul nsw i32 3, %5 - %conv7 = sitofp i32 %mul6 to float - %6 = load i32, i32* %kmin.addr, align 4 - %conv8 = sitofp i32 %6 to float - %call9 = call float @_ZSt3logf(float %conv8) - %mul10 = fmul contract float %conv7, %call9 - %conv11 = fptosi float %mul10 to i32 - store i32 %conv11, i32* %numfeasible, align 4 - br label %if.end - -if.end: ; preds = %if.then, %entry - %7 = load i32, i32* %numfeasible, align 4 - %conv12 = sext i32 %7 to i64 - %mul13 = mul i64 %conv12, 4 - %call14 = call noalias i8* @malloc(i64 %mul13) #2 - %8 = bitcast i8* %call14 to i32* - %9 = load i32**, i32*** %feasible.addr, align 8 - store i32* %8, i32** %9, align 8 - store i64 0, i64* %k1, align 8 - %10 = load i32, i32* %numfeasible, align 4 - %conv15 = sext i32 %10 to i64 - store i64 %conv15, i64* %k2, align 8 - %11 = load i32, i32* %numfeasible, align 4 - %conv16 = sext i32 %11 to i64 - %12 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num17 = getelementptr inbounds %struct.Points, %struct.Points* %12, i32 0, i32 0 - %13 = load i64, i64* %num17, align 8 - %cmp18 = icmp eq i64 %conv16, %13 - br i1 %cmp18, label %if.then19, label %if.end23 - -if.then19: ; preds = %if.end - %14 = load i64, i64* %k1, align 8 - %conv20 = trunc i64 %14 to i32 - store i32 %conv20, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.then19 - %15 = load i32, i32* %i, align 4 - %conv21 = sext i32 %15 to i64 - %16 = load i64, i64* %k2, align 8 - %cmp22 = icmp slt i64 %conv21, %16 - br i1 %cmp22, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %17 = load i32, i32* %i, align 4 - %18 = load i32**, i32*** %feasible.addr, align 8 - %19 = load i32*, i32** %18, align 8 - %20 = load i32, i32* %i, align 4 - %idxprom = sext i32 %20 to i64 - %arrayidx = getelementptr inbounds i32, i32* %19, i64 %idxprom - store i32 %17, i32* %arrayidx, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %21 = load i32, i32* %i, align 4 - %inc = add nsw i32 %21, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %22 = load i32, i32* %numfeasible, align 4 - store i32 %22, i32* %retval, align 4 - br label %return - -if.end23: ; preds = %if.end - %23 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num24 = getelementptr inbounds %struct.Points, %struct.Points* %23, i32 0, i32 0 - %24 = load i64, i64* %num24, align 8 - %mul25 = mul i64 4, %24 - %call26 = call noalias i8* @malloc(i64 %mul25) #2 - %25 = bitcast i8* %call26 to float* - store float* %25, float** %accumweight, align 8 - %26 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p = getelementptr inbounds %struct.Points, %struct.Points* %26, i32 0, i32 2 - %27 = load %struct.Point*, %struct.Point** %p, align 8 - %arrayidx27 = getelementptr inbounds %struct.Point, %struct.Point* %27, i64 0 - %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx27, i32 0, i32 0 - %28 = load float, float* %weight, align 8 - %29 = load float*, float** %accumweight, align 8 - %arrayidx28 = getelementptr inbounds float, float* %29, i64 0 - store float %28, float* %arrayidx28, align 4 - store float 0.000000e+00, float* %totalweight, align 4 - store i32 1, i32* %i29, align 4 - br label %for.cond30 - -for.cond30: ; preds = %for.inc43, %if.end23 - %30 = load i32, i32* %i29, align 4 - %conv31 = sext i32 %30 to i64 - %31 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num32 = getelementptr inbounds %struct.Points, %struct.Points* %31, i32 0, i32 0 - %32 = load i64, i64* %num32, align 8 - %cmp33 = icmp slt i64 %conv31, %32 - br i1 %cmp33, label %for.body34, label %for.end45 - -for.body34: ; preds = %for.cond30 - %33 = load float*, float** %accumweight, align 8 - %34 = load i32, i32* %i29, align 4 - %sub = sub nsw i32 %34, 1 - %idxprom35 = sext i32 %sub to i64 - %arrayidx36 = getelementptr inbounds float, float* %33, i64 %idxprom35 - %35 = load float, float* %arrayidx36, align 4 - %36 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p37 = getelementptr inbounds %struct.Points, %struct.Points* %36, i32 0, i32 2 - %37 = load %struct.Point*, %struct.Point** %p37, align 8 - %38 = load i32, i32* %i29, align 4 - %idxprom38 = sext i32 %38 to i64 - %arrayidx39 = getelementptr inbounds %struct.Point, %struct.Point* %37, i64 %idxprom38 - %weight40 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx39, i32 0, i32 0 - %39 = load float, float* %weight40, align 8 - %add = fadd contract float %35, %39 - %40 = load float*, float** %accumweight, align 8 - %41 = load i32, i32* %i29, align 4 - %idxprom41 = sext i32 %41 to i64 - %arrayidx42 = getelementptr inbounds float, float* %40, i64 %idxprom41 - store float %add, float* %arrayidx42, align 4 - br label %for.inc43 - -for.inc43: ; preds = %for.body34 - %42 = load i32, i32* %i29, align 4 - %inc44 = add nsw i32 %42, 1 - store i32 %inc44, i32* %i29, align 4 - br label %for.cond30 - -for.end45: ; preds = %for.cond30 - %43 = load float*, float** %accumweight, align 8 - %44 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num46 = getelementptr inbounds %struct.Points, %struct.Points* %44, i32 0, i32 0 - %45 = load i64, i64* %num46, align 8 - %sub47 = sub nsw i64 %45, 1 - %arrayidx48 = getelementptr inbounds float, float* %43, i64 %sub47 - %46 = load float, float* %arrayidx48, align 4 - store float %46, float* %totalweight, align 4 - %47 = load i64, i64* %k1, align 8 - %conv50 = trunc i64 %47 to i32 - store i32 %conv50, i32* %i49, align 4 - br label %for.cond51 - -for.cond51: ; preds = %for.inc78, %for.end45 - %48 = load i32, i32* %i49, align 4 - %conv52 = sext i32 %48 to i64 - %49 = load i64, i64* %k2, align 8 - %cmp53 = icmp slt i64 %conv52, %49 - br i1 %cmp53, label %for.body54, label %for.end80 - -for.body54: ; preds = %for.cond51 - %call55 = call i64 @lrand48() #2 - %conv56 = sitofp i64 %call55 to float - %div = fdiv float %conv56, 0x41E0000000000000 - %50 = load float, float* %totalweight, align 4 - %mul57 = fmul contract float %div, %50 - store float %mul57, float* %w, align 4 - store i32 0, i32* %l, align 4 - %51 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num58 = getelementptr inbounds %struct.Points, %struct.Points* %51, i32 0, i32 0 - %52 = load i64, i64* %num58, align 8 - %sub59 = sub nsw i64 %52, 1 - %conv60 = trunc i64 %sub59 to i32 - store i32 %conv60, i32* %r, align 4 - %53 = load float*, float** %accumweight, align 8 - %arrayidx61 = getelementptr inbounds float, float* %53, i64 0 - %54 = load float, float* %arrayidx61, align 4 - %55 = load float, float* %w, align 4 - %cmp62 = fcmp ogt float %54, %55 - br i1 %cmp62, label %if.then63, label %if.end66 - -if.then63: ; preds = %for.body54 - %56 = load i32**, i32*** %feasible.addr, align 8 - %57 = load i32*, i32** %56, align 8 - %58 = load i32, i32* %i49, align 4 - %idxprom64 = sext i32 %58 to i64 - %arrayidx65 = getelementptr inbounds i32, i32* %57, i64 %idxprom64 - store i32 0, i32* %arrayidx65, align 4 - br label %for.inc78 - -if.end66: ; preds = %for.body54 - br label %while.cond - -while.cond: ; preds = %if.end75, %if.end66 - %59 = load i32, i32* %l, align 4 - %add67 = add nsw i32 %59, 1 - %60 = load i32, i32* %r, align 4 - %cmp68 = icmp slt i32 %add67, %60 - br i1 %cmp68, label %while.body, label %while.end - -while.body: ; preds = %while.cond - %61 = load i32, i32* %l, align 4 - %62 = load i32, i32* %r, align 4 - %add69 = add nsw i32 %61, %62 - %div70 = sdiv i32 %add69, 2 - store i32 %div70, i32* %k, align 4 - %63 = load float*, float** %accumweight, align 8 - %64 = load i32, i32* %k, align 4 - %idxprom71 = sext i32 %64 to i64 - %arrayidx72 = getelementptr inbounds float, float* %63, i64 %idxprom71 - %65 = load float, float* %arrayidx72, align 4 - %66 = load float, float* %w, align 4 - %cmp73 = fcmp ogt float %65, %66 - br i1 %cmp73, label %if.then74, label %if.else - -if.then74: ; preds = %while.body - %67 = load i32, i32* %k, align 4 - store i32 %67, i32* %r, align 4 - br label %if.end75 - -if.else: ; preds = %while.body - %68 = load i32, i32* %k, align 4 - store i32 %68, i32* %l, align 4 - br label %if.end75 - -if.end75: ; preds = %if.else, %if.then74 - br label %while.cond - -while.end: ; preds = %while.cond - %69 = load i32, i32* %r, align 4 - %70 = load i32**, i32*** %feasible.addr, align 8 - %71 = load i32*, i32** %70, align 8 - %72 = load i32, i32* %i49, align 4 - %idxprom76 = sext i32 %72 to i64 - %arrayidx77 = getelementptr inbounds i32, i32* %71, i64 %idxprom76 - store i32 %69, i32* %arrayidx77, align 4 - br label %for.inc78 - -for.inc78: ; preds = %while.end, %if.then63 - %73 = load i32, i32* %i49, align 4 - %inc79 = add nsw i32 %73, 1 - store i32 %inc79, i32* %i49, align 4 - br label %for.cond51 - -for.end80: ; preds = %for.cond51 - %74 = load float*, float** %accumweight, align 8 - %75 = bitcast float* %74 to i8* - call void @free(i8* %75) #2 - %call81 = call double @_Z7gettimev() - store double %call81, double* %t2, align 8 - %76 = load double, double* %t2, align 8 - %77 = load double, double* %t1, align 8 - %sub82 = fsub contract double %76, %77 - %78 = load double, double* @time_select_feasible, align 8 - %add83 = fadd contract double %78, %sub82 - store double %add83, double* @time_select_feasible, align 8 - %79 = load i32, i32* %numfeasible, align 4 - store i32 %79, i32* %retval, align 4 - br label %return - -return: ; preds = %for.end80, %for.end - %80 = load i32, i32* %retval, align 4 - ret i32 %80 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local float @_ZSt3logf(float %__x) #6 comdat { -entry: - %__x.addr = alloca float, align 4 - store float %__x, float* %__x.addr, align 4 - %0 = load float, float* %__x.addr, align 4 - %call = call float @logf(float %0) #2 - ret float %call -} - -; Function Attrs: noinline optnone uwtable -define dso_local float @_Z8pkmedianP6PointsllPliP17pthread_barrier_t(%struct.Points* %points, i64 %kmin, i64 %kmax, i64* %kfinal, i32 %pid, %union.pthread_barrier_t* %barrier) #3 { -entry: - %retval = alloca float, align 4 - %points.addr = alloca %struct.Points*, align 8 - %kmin.addr = alloca i64, align 8 - %kmax.addr = alloca i64, align 8 - %kfinal.addr = alloca i64*, align 8 - %pid.addr = alloca i32, align 4 - %barrier.addr = alloca %union.pthread_barrier_t*, align 8 - %i = alloca i32, align 4 - %cost = alloca float, align 4 - %lastcost = alloca float, align 4 - %hiz = alloca float, align 4 - %loz = alloca float, align 4 - %z = alloca float, align 4 - %numberOfPoints = alloca i64, align 8 - %ptDimension = alloca i64, align 8 - %bsize = alloca i64, align 8 - %k1 = alloca i64, align 8 - %k2 = alloca i64, align 8 - %myhiz = alloca float, align 4 - %kk = alloca i64, align 8 - %agg.tmp = alloca %struct.Point, align 8 - %agg.tmp10 = alloca %struct.Point, align 8 - %i20 = alloca i32, align 4 - %kk37 = alloca i64, align 8 - %i81 = alloca i32, align 4 - store %struct.Points* %points, %struct.Points** %points.addr, align 8 - store i64 %kmin, i64* %kmin.addr, align 8 - store i64 %kmax, i64* %kmax.addr, align 8 - store i64* %kfinal, i64** %kfinal.addr, align 8 - store i32 %pid, i32* %pid.addr, align 4 - store %union.pthread_barrier_t* %barrier, %union.pthread_barrier_t** %barrier.addr, align 8 - %0 = load i32, i32* %pid.addr, align 4 - %cmp = icmp eq i32 %0, 0 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load i32, i32* @_ZL5nproc, align 4 - %conv = sext i32 %1 to i64 - %call = call noalias i8* @calloc(i64 %conv, i64 4) #2 - %2 = bitcast i8* %call to float* - store float* %2, float** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs, align 8 - br label %if.end - -if.end: ; preds = %if.then, %entry - store float 0.000000e+00, float* %loz, align 4 - store float 0.000000e+00, float* %hiz, align 4 - %3 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num = getelementptr inbounds %struct.Points, %struct.Points* %3, i32 0, i32 0 - %4 = load i64, i64* %num, align 8 - store i64 %4, i64* %numberOfPoints, align 8 - %5 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %dim = getelementptr inbounds %struct.Points, %struct.Points* %5, i32 0, i32 1 - %6 = load i32, i32* %dim, align 8 - %conv1 = sext i32 %6 to i64 - store i64 %conv1, i64* %ptDimension, align 8 - %7 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num2 = getelementptr inbounds %struct.Points, %struct.Points* %7, i32 0, i32 0 - %8 = load i64, i64* %num2, align 8 - %9 = load i32, i32* @_ZL5nproc, align 4 - %conv3 = sext i32 %9 to i64 - %div = sdiv i64 %8, %conv3 - store i64 %div, i64* %bsize, align 8 - %10 = load i64, i64* %bsize, align 8 - %11 = load i32, i32* %pid.addr, align 4 - %conv4 = sext i32 %11 to i64 - %mul = mul nsw i64 %10, %conv4 - store i64 %mul, i64* %k1, align 8 - %12 = load i64, i64* %k1, align 8 - %13 = load i64, i64* %bsize, align 8 - %add = add nsw i64 %12, %13 - store i64 %add, i64* %k2, align 8 - %14 = load i32, i32* %pid.addr, align 4 - %15 = load i32, i32* @_ZL5nproc, align 4 - %sub = sub nsw i32 %15, 1 - %cmp5 = icmp eq i32 %14, %sub - br i1 %cmp5, label %if.then6, label %if.end8 - -if.then6: ; preds = %if.end - %16 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num7 = getelementptr inbounds %struct.Points, %struct.Points* %16, i32 0, i32 0 - %17 = load i64, i64* %num7, align 8 - store i64 %17, i64* %k2, align 8 - br label %if.end8 - -if.end8: ; preds = %if.then6, %if.end - store float 0.000000e+00, float* %myhiz, align 4 - %18 = load i64, i64* %k1, align 8 - store i64 %18, i64* %kk, align 8 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end8 - %19 = load i64, i64* %kk, align 8 - %20 = load i64, i64* %k2, align 8 - %cmp9 = icmp slt i64 %19, %20 - br i1 %cmp9, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %21 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p = getelementptr inbounds %struct.Points, %struct.Points* %21, i32 0, i32 2 - %22 = load %struct.Point*, %struct.Point** %p, align 8 - %23 = load i64, i64* %kk, align 8 - %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %22, i64 %23 - %24 = bitcast %struct.Point* %agg.tmp to i8* - %25 = bitcast %struct.Point* %arrayidx to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %24, i8* align 8 %25, i64 32, i1 false) - %26 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p11 = getelementptr inbounds %struct.Points, %struct.Points* %26, i32 0, i32 2 - %27 = load %struct.Point*, %struct.Point** %p11, align 8 - %arrayidx12 = getelementptr inbounds %struct.Point, %struct.Point* %27, i64 0 - %28 = bitcast %struct.Point* %agg.tmp10 to i8* - %29 = bitcast %struct.Point* %arrayidx12 to i8* - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 %28, i8* align 8 %29, i64 32, i1 false) - %30 = load i64, i64* %ptDimension, align 8 - %conv13 = trunc i64 %30 to i32 - %call14 = call float @_Z4dist5PointS_i(%struct.Point* byval(%struct.Point) align 8 %agg.tmp, %struct.Point* byval(%struct.Point) align 8 %agg.tmp10, i32 %conv13) - %31 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p15 = getelementptr inbounds %struct.Points, %struct.Points* %31, i32 0, i32 2 - %32 = load %struct.Point*, %struct.Point** %p15, align 8 - %33 = load i64, i64* %kk, align 8 - %arrayidx16 = getelementptr inbounds %struct.Point, %struct.Point* %32, i64 %33 - %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx16, i32 0, i32 0 - %34 = load float, float* %weight, align 8 - %mul17 = fmul contract float %call14, %34 - %35 = load float, float* %myhiz, align 4 - %add18 = fadd contract float %35, %mul17 - store float %add18, float* %myhiz, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %36 = load i64, i64* %kk, align 8 - %inc = add nsw i64 %36, 1 - store i64 %inc, i64* %kk, align 8 - br label %for.cond - -for.end: ; preds = %for.cond - %37 = load float, float* %myhiz, align 4 - %38 = load float*, float** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs, align 8 - %39 = load i32, i32* %pid.addr, align 4 - %idxprom = sext i32 %39 to i64 - %arrayidx19 = getelementptr inbounds float, float* %38, i64 %idxprom - store float %37, float* %arrayidx19, align 4 - store i32 0, i32* %i20, align 4 - br label %for.cond21 - -for.cond21: ; preds = %for.inc27, %for.end - %40 = load i32, i32* %i20, align 4 - %41 = load i32, i32* @_ZL5nproc, align 4 - %cmp22 = icmp slt i32 %40, %41 - br i1 %cmp22, label %for.body23, label %for.end29 - -for.body23: ; preds = %for.cond21 - %42 = load float*, float** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs, align 8 - %43 = load i32, i32* %i20, align 4 - %idxprom24 = sext i32 %43 to i64 - %arrayidx25 = getelementptr inbounds float, float* %42, i64 %idxprom24 - %44 = load float, float* %arrayidx25, align 4 - %45 = load float, float* %hiz, align 4 - %add26 = fadd contract float %45, %44 - store float %add26, float* %hiz, align 4 - br label %for.inc27 - -for.inc27: ; preds = %for.body23 - %46 = load i32, i32* %i20, align 4 - %inc28 = add nsw i32 %46, 1 - store i32 %inc28, i32* %i20, align 4 - br label %for.cond21 - -for.end29: ; preds = %for.cond21 - store float 0.000000e+00, float* %loz, align 4 - %47 = load float, float* %hiz, align 4 - %48 = load float, float* %loz, align 4 - %add30 = fadd contract float %47, %48 - %conv31 = fpext float %add30 to double - %div32 = fdiv double %conv31, 2.000000e+00 - %conv33 = fptrunc double %div32 to float - store float %conv33, float* %z, align 4 - %49 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num34 = getelementptr inbounds %struct.Points, %struct.Points* %49, i32 0, i32 0 - %50 = load i64, i64* %num34, align 8 - %51 = load i64, i64* %kmax.addr, align 8 - %cmp35 = icmp sle i64 %50, %51 - br i1 %cmp35, label %if.then36, label %if.end52 - -if.then36: ; preds = %for.end29 - %52 = load i64, i64* %k1, align 8 - store i64 %52, i64* %kk37, align 8 - br label %for.cond38 - -for.cond38: ; preds = %for.inc46, %if.then36 - %53 = load i64, i64* %kk37, align 8 - %54 = load i64, i64* %k2, align 8 - %cmp39 = icmp slt i64 %53, %54 - br i1 %cmp39, label %for.body40, label %for.end48 - -for.body40: ; preds = %for.cond38 - %55 = load i64, i64* %kk37, align 8 - %56 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p41 = getelementptr inbounds %struct.Points, %struct.Points* %56, i32 0, i32 2 - %57 = load %struct.Point*, %struct.Point** %p41, align 8 - %58 = load i64, i64* %kk37, align 8 - %arrayidx42 = getelementptr inbounds %struct.Point, %struct.Point* %57, i64 %58 - %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx42, i32 0, i32 2 - store i64 %55, i64* %assign, align 8 - %59 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p43 = getelementptr inbounds %struct.Points, %struct.Points* %59, i32 0, i32 2 - %60 = load %struct.Point*, %struct.Point** %p43, align 8 - %61 = load i64, i64* %kk37, align 8 - %arrayidx44 = getelementptr inbounds %struct.Point, %struct.Point* %60, i64 %61 - %cost45 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx44, i32 0, i32 3 - store float 0.000000e+00, float* %cost45, align 8 - br label %for.inc46 - -for.inc46: ; preds = %for.body40 - %62 = load i64, i64* %kk37, align 8 - %inc47 = add nsw i64 %62, 1 - store i64 %inc47, i64* %kk37, align 8 - br label %for.cond38 - -for.end48: ; preds = %for.cond38 - store float 0.000000e+00, float* %cost, align 4 - %63 = load i32, i32* %pid.addr, align 4 - %cmp49 = icmp eq i32 %63, 0 - br i1 %cmp49, label %if.then50, label %if.end51 - -if.then50: ; preds = %for.end48 - %64 = load float*, float** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs, align 8 - %65 = bitcast float* %64 to i8* - call void @free(i8* %65) #2 - %66 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %67 = load i64*, i64** %kfinal.addr, align 8 - store i64 %66, i64* %67, align 8 - br label %if.end51 - -if.end51: ; preds = %if.then50, %for.end48 - %68 = load float, float* %cost, align 4 - store float %68, float* %retval, align 4 - br label %return - -if.end52: ; preds = %for.end29 - %69 = load i32, i32* %pid.addr, align 4 - %cmp53 = icmp eq i32 %69, 0 - br i1 %cmp53, label %if.then54, label %if.end55 - -if.then54: ; preds = %if.end52 - %70 = load %struct.Points*, %struct.Points** %points.addr, align 8 - call void @_Z7shuffleP6Points(%struct.Points* %70) - br label %if.end55 - -if.end55: ; preds = %if.then54, %if.end52 - %71 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %72 = load float, float* %z, align 4 - %73 = load i32, i32* %pid.addr, align 4 - %74 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 - %call56 = call float @_Z7pspeedyP6PointsfPliP17pthread_barrier_t(%struct.Points* %71, float %72, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, i32 %73, %union.pthread_barrier_t* %74) - store float %call56, float* %cost, align 4 - store i32 0, i32* %i, align 4 - br label %while.cond - -while.cond: ; preds = %while.body, %if.end55 - %75 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %76 = load i64, i64* %kmin.addr, align 8 - %cmp57 = icmp slt i64 %75, %76 - br i1 %cmp57, label %land.rhs, label %land.end - -land.rhs: ; preds = %while.cond - %77 = load i32, i32* %i, align 4 - %cmp58 = icmp slt i32 %77, 1 - br label %land.end - -land.end: ; preds = %land.rhs, %while.cond - %78 = phi i1 [ false, %while.cond ], [ %cmp58, %land.rhs ] - br i1 %78, label %while.body, label %while.end - -while.body: ; preds = %land.end - %79 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %80 = load float, float* %z, align 4 - %81 = load i32, i32* %pid.addr, align 4 - %82 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 - %call59 = call float @_Z7pspeedyP6PointsfPliP17pthread_barrier_t(%struct.Points* %79, float %80, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, i32 %81, %union.pthread_barrier_t* %82) - store float %call59, float* %cost, align 4 - %83 = load i32, i32* %i, align 4 - %inc60 = add nsw i32 %83, 1 - store i32 %inc60, i32* %i, align 4 - br label %while.cond - -while.end: ; preds = %land.end - br label %while.cond61 - -while.cond61: ; preds = %if.end73, %while.end - %84 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %85 = load i64, i64* %kmin.addr, align 8 - %cmp62 = icmp slt i64 %84, %85 - br i1 %cmp62, label %while.body63, label %while.end76 - -while.body63: ; preds = %while.cond61 - %86 = load i32, i32* %i, align 4 - %cmp64 = icmp sge i32 %86, 1 - br i1 %cmp64, label %if.then65, label %if.end70 - -if.then65: ; preds = %while.body63 - %87 = load float, float* %z, align 4 - store float %87, float* %hiz, align 4 - %88 = load float, float* %hiz, align 4 - %89 = load float, float* %loz, align 4 - %add66 = fadd contract float %88, %89 - %conv67 = fpext float %add66 to double - %div68 = fdiv double %conv67, 2.000000e+00 - %conv69 = fptrunc double %div68 to float - store float %conv69, float* %z, align 4 - store i32 0, i32* %i, align 4 - br label %if.end70 - -if.end70: ; preds = %if.then65, %while.body63 - %90 = load i32, i32* %pid.addr, align 4 - %cmp71 = icmp eq i32 %90, 0 - br i1 %cmp71, label %if.then72, label %if.end73 - -if.then72: ; preds = %if.end70 - %91 = load %struct.Points*, %struct.Points** %points.addr, align 8 - call void @_Z7shuffleP6Points(%struct.Points* %91) - br label %if.end73 - -if.end73: ; preds = %if.then72, %if.end70 - %92 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %93 = load float, float* %z, align 4 - %94 = load i32, i32* %pid.addr, align 4 - %95 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 - %call74 = call float @_Z7pspeedyP6PointsfPliP17pthread_barrier_t(%struct.Points* %92, float %93, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, i32 %94, %union.pthread_barrier_t* %95) - store float %call74, float* %cost, align 4 - %96 = load i32, i32* %i, align 4 - %inc75 = add nsw i32 %96, 1 - store i32 %inc75, i32* %i, align 4 - br label %while.cond61 - -while.end76: ; preds = %while.cond61 - %97 = load i32, i32* %pid.addr, align 4 - %cmp77 = icmp eq i32 %97, 0 - br i1 %cmp77, label %if.then78, label %if.end95 - -if.then78: ; preds = %while.end76 - %98 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %99 = load i64, i64* %kmin.addr, align 8 - %conv79 = trunc i64 %99 to i32 - %100 = load i32, i32* %pid.addr, align 4 - %101 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 - %call80 = call i32 @_Z19selectfeasible_fastP6PointsPPiiiP17pthread_barrier_t(%struct.Points* %98, i32** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE8feasible, i32 %conv79, i32 %100, %union.pthread_barrier_t* %101) - store i32 %call80, i32* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE11numfeasible, align 4 - store i32 0, i32* %i81, align 4 - br label %for.cond82 - -for.cond82: ; preds = %for.inc92, %if.then78 - %102 = load i32, i32* %i81, align 4 - %conv83 = sext i32 %102 to i64 - %103 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num84 = getelementptr inbounds %struct.Points, %struct.Points* %103, i32 0, i32 0 - %104 = load i64, i64* %num84, align 8 - %cmp85 = icmp slt i64 %conv83, %104 - br i1 %cmp85, label %for.body86, label %for.end94 - -for.body86: ; preds = %for.cond82 - %105 = load i8*, i8** @_ZL9is_center, align 8 - %106 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p87 = getelementptr inbounds %struct.Points, %struct.Points* %106, i32 0, i32 2 - %107 = load %struct.Point*, %struct.Point** %p87, align 8 - %108 = load i32, i32* %i81, align 4 - %idxprom88 = sext i32 %108 to i64 - %arrayidx89 = getelementptr inbounds %struct.Point, %struct.Point* %107, i64 %idxprom88 - %assign90 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx89, i32 0, i32 2 - %109 = load i64, i64* %assign90, align 8 - %arrayidx91 = getelementptr inbounds i8, i8* %105, i64 %109 - store i8 1, i8* %arrayidx91, align 1 - br label %for.inc92 - -for.inc92: ; preds = %for.body86 - %110 = load i32, i32* %i81, align 4 - %inc93 = add nsw i32 %110, 1 - store i32 %inc93, i32* %i81, align 4 - br label %for.cond82 - -for.end94: ; preds = %for.cond82 - br label %if.end95 - -if.end95: ; preds = %for.end94, %while.end76 - br label %while.body97 - -while.body97: ; preds = %if.end95, %if.end160 - %111 = load float, float* %cost, align 4 - store float %111, float* %lastcost, align 4 - %112 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %113 = load i32*, i32** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE8feasible, align 8 - %114 = load i32, i32* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE11numfeasible, align 4 - %115 = load float, float* %z, align 4 - %116 = load i64, i64* %kmax.addr, align 8 - %conv98 = trunc i64 %116 to i32 - %117 = load float, float* %cost, align 4 - %118 = load i64, i64* %kmax.addr, align 8 - %mul99 = mul nsw i64 3, %118 - %conv100 = sitofp i64 %mul99 to float - %119 = load i64, i64* %kmax.addr, align 8 - %conv101 = sitofp i64 %119 to float - %call102 = call float @_ZSt3logf(float %conv101) - %mul103 = fmul contract float %conv100, %call102 - %conv104 = fptosi float %mul103 to i64 - %120 = load i32, i32* %pid.addr, align 4 - %121 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 - %call105 = call float @_Z3pFLP6PointsPiifPliflfiP17pthread_barrier_t(%struct.Points* %112, i32* %113, i32 %114, float %115, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, i32 %conv98, float %117, i64 %conv104, float 0x3FB99999A0000000, i32 %120, %union.pthread_barrier_t* %121) - store float %call105, float* %cost, align 4 - %122 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %conv106 = sitofp i64 %122 to double - %123 = load i64, i64* %kmax.addr, align 8 - %conv107 = sitofp i64 %123 to double - %mul108 = fmul contract double 1.100000e+00, %conv107 - %cmp109 = fcmp ole double %conv106, %mul108 - br i1 %cmp109, label %land.lhs.true, label %lor.lhs.false - -land.lhs.true: ; preds = %while.body97 - %124 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %conv110 = sitofp i64 %124 to double - %125 = load i64, i64* %kmin.addr, align 8 - %conv111 = sitofp i64 %125 to double - %mul112 = fmul contract double 9.000000e-01, %conv111 - %cmp113 = fcmp oge double %conv110, %mul112 - br i1 %cmp113, label %if.then119, label %lor.lhs.false - -lor.lhs.false: ; preds = %land.lhs.true, %while.body97 - %126 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %127 = load i64, i64* %kmax.addr, align 8 - %add114 = add nsw i64 %127, 2 - %cmp115 = icmp sle i64 %126, %add114 - br i1 %cmp115, label %land.lhs.true116, label %if.end128 - -land.lhs.true116: ; preds = %lor.lhs.false - %128 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %129 = load i64, i64* %kmin.addr, align 8 - %sub117 = sub nsw i64 %129, 2 - %cmp118 = icmp sge i64 %128, %sub117 - br i1 %cmp118, label %if.then119, label %if.end128 - -if.then119: ; preds = %land.lhs.true116, %land.lhs.true - %130 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %131 = load i32*, i32** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE8feasible, align 8 - %132 = load i32, i32* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE11numfeasible, align 4 - %133 = load float, float* %z, align 4 - %134 = load i64, i64* %kmax.addr, align 8 - %conv120 = trunc i64 %134 to i32 - %135 = load float, float* %cost, align 4 - %136 = load i64, i64* %kmax.addr, align 8 - %mul121 = mul nsw i64 3, %136 - %conv122 = sitofp i64 %mul121 to float - %137 = load i64, i64* %kmax.addr, align 8 - %conv123 = sitofp i64 %137 to float - %call124 = call float @_ZSt3logf(float %conv123) - %mul125 = fmul contract float %conv122, %call124 - %conv126 = fptosi float %mul125 to i64 - %138 = load i32, i32* %pid.addr, align 4 - %139 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier.addr, align 8 - %call127 = call float @_Z3pFLP6PointsPiifPliflfiP17pthread_barrier_t(%struct.Points* %130, i32* %131, i32 %132, float %133, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, i32 %conv120, float %135, i64 %conv126, float 0x3F50624DE0000000, i32 %138, %union.pthread_barrier_t* %139) - store float %call127, float* %cost, align 4 - br label %if.end128 - -if.end128: ; preds = %if.then119, %land.lhs.true116, %lor.lhs.false - %140 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %141 = load i64, i64* %kmax.addr, align 8 - %cmp129 = icmp sgt i64 %140, %141 - br i1 %cmp129, label %if.then130, label %if.end139 - -if.then130: ; preds = %if.end128 - %142 = load float, float* %z, align 4 - store float %142, float* %loz, align 4 - %143 = load float, float* %hiz, align 4 - %144 = load float, float* %loz, align 4 - %add131 = fadd contract float %143, %144 - %conv132 = fpext float %add131 to double - %div133 = fdiv double %conv132, 2.000000e+00 - %conv134 = fptrunc double %div133 to float - store float %conv134, float* %z, align 4 - %145 = load float, float* %z, align 4 - %146 = load float, float* %loz, align 4 - %sub135 = fsub contract float %145, %146 - %147 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %conv136 = sitofp i64 %147 to float - %mul137 = fmul contract float %sub135, %conv136 - %148 = load float, float* %cost, align 4 - %add138 = fadd contract float %148, %mul137 - store float %add138, float* %cost, align 4 - br label %if.end139 - -if.end139: ; preds = %if.then130, %if.end128 - %149 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %150 = load i64, i64* %kmin.addr, align 8 - %cmp140 = icmp slt i64 %149, %150 - br i1 %cmp140, label %if.then141, label %if.end150 - -if.then141: ; preds = %if.end139 - %151 = load float, float* %z, align 4 - store float %151, float* %hiz, align 4 - %152 = load float, float* %hiz, align 4 - %153 = load float, float* %loz, align 4 - %add142 = fadd contract float %152, %153 - %conv143 = fpext float %add142 to double - %div144 = fdiv double %conv143, 2.000000e+00 - %conv145 = fptrunc double %div144 to float - store float %conv145, float* %z, align 4 - %154 = load float, float* %z, align 4 - %155 = load float, float* %hiz, align 4 - %sub146 = fsub contract float %154, %155 - %156 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %conv147 = sitofp i64 %156 to float - %mul148 = fmul contract float %sub146, %conv147 - %157 = load float, float* %cost, align 4 - %add149 = fadd contract float %157, %mul148 - store float %add149, float* %cost, align 4 - br label %if.end150 - -if.end150: ; preds = %if.then141, %if.end139 - %158 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %159 = load i64, i64* %kmax.addr, align 8 - %cmp151 = icmp sle i64 %158, %159 - br i1 %cmp151, label %land.lhs.true152, label %lor.lhs.false154 - -land.lhs.true152: ; preds = %if.end150 - %160 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %161 = load i64, i64* %kmin.addr, align 8 - %cmp153 = icmp sge i64 %160, %161 - br i1 %cmp153, label %if.then159, label %lor.lhs.false154 - -lor.lhs.false154: ; preds = %land.lhs.true152, %if.end150 - %162 = load float, float* %loz, align 4 - %conv155 = fpext float %162 to double - %163 = load float, float* %hiz, align 4 - %conv156 = fpext float %163 to double - %mul157 = fmul contract double 0x3FEFF7CED916872B, %conv156 - %cmp158 = fcmp oge double %conv155, %mul157 - br i1 %cmp158, label %if.then159, label %if.end160 - -if.then159: ; preds = %lor.lhs.false154, %land.lhs.true152 - br label %while.end161 - -if.end160: ; preds = %lor.lhs.false154 - br label %while.body97 - -while.end161: ; preds = %if.then159 - %164 = load i32, i32* %pid.addr, align 4 - %cmp162 = icmp eq i32 %164, 0 - br i1 %cmp162, label %if.then163, label %if.end164 - -if.then163: ; preds = %while.end161 - %165 = load i32*, i32** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE8feasible, align 8 - %166 = bitcast i32* %165 to i8* - call void @free(i8* %166) #2 - %167 = load float*, float** @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE4hizs, align 8 - %168 = bitcast float* %167 to i8* - call void @free(i8* %168) #2 - %169 = load i64, i64* @_ZZ8pkmedianP6PointsllPliP17pthread_barrier_tE1k, align 8 - %170 = load i64*, i64** %kfinal.addr, align 8 - store i64 %169, i64* %170, align 8 - br label %if.end164 - -if.end164: ; preds = %if.then163, %while.end161 - %171 = load float, float* %cost, align 4 - store float %171, float* %retval, align 4 - br label %return - -return: ; preds = %if.end164, %if.end51 - %172 = load float, float* %retval, align 4 - ret float %172 -} - -; Function Attrs: nounwind -declare dso_local noalias i8* @calloc(i64, i64) #7 - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local i32 @_Z11contcentersP6Points(%struct.Points* %points) #6 { -entry: - %points.addr = alloca %struct.Points*, align 8 - %i = alloca i64, align 8 - %ii = alloca i64, align 8 - %relweight = alloca float, align 4 - store %struct.Points* %points, %struct.Points** %points.addr, align 8 - store i64 0, i64* %i, align 8 - br label %for.cond - -for.cond: ; preds = %for.inc48, %entry - %0 = load i64, i64* %i, align 8 - %1 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num = getelementptr inbounds %struct.Points, %struct.Points* %1, i32 0, i32 0 - %2 = load i64, i64* %num, align 8 - %cmp = icmp slt i64 %0, %2 - br i1 %cmp, label %for.body, label %for.end50 - -for.body: ; preds = %for.cond - %3 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p = getelementptr inbounds %struct.Points, %struct.Points* %3, i32 0, i32 2 - %4 = load %struct.Point*, %struct.Point** %p, align 8 - %5 = load i64, i64* %i, align 8 - %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %4, i64 %5 - %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx, i32 0, i32 2 - %6 = load i64, i64* %assign, align 8 - %7 = load i64, i64* %i, align 8 - %cmp1 = icmp ne i64 %6, %7 - br i1 %cmp1, label %if.then, label %if.end - -if.then: ; preds = %for.body - %8 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p2 = getelementptr inbounds %struct.Points, %struct.Points* %8, i32 0, i32 2 - %9 = load %struct.Point*, %struct.Point** %p2, align 8 - %10 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p3 = getelementptr inbounds %struct.Points, %struct.Points* %10, i32 0, i32 2 - %11 = load %struct.Point*, %struct.Point** %p3, align 8 - %12 = load i64, i64* %i, align 8 - %arrayidx4 = getelementptr inbounds %struct.Point, %struct.Point* %11, i64 %12 - %assign5 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx4, i32 0, i32 2 - %13 = load i64, i64* %assign5, align 8 - %arrayidx6 = getelementptr inbounds %struct.Point, %struct.Point* %9, i64 %13 - %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx6, i32 0, i32 0 - %14 = load float, float* %weight, align 8 - %15 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p7 = getelementptr inbounds %struct.Points, %struct.Points* %15, i32 0, i32 2 - %16 = load %struct.Point*, %struct.Point** %p7, align 8 - %17 = load i64, i64* %i, align 8 - %arrayidx8 = getelementptr inbounds %struct.Point, %struct.Point* %16, i64 %17 - %weight9 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx8, i32 0, i32 0 - %18 = load float, float* %weight9, align 8 - %add = fadd contract float %14, %18 - store float %add, float* %relweight, align 4 - %19 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p10 = getelementptr inbounds %struct.Points, %struct.Points* %19, i32 0, i32 2 - %20 = load %struct.Point*, %struct.Point** %p10, align 8 - %21 = load i64, i64* %i, align 8 - %arrayidx11 = getelementptr inbounds %struct.Point, %struct.Point* %20, i64 %21 - %weight12 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx11, i32 0, i32 0 - %22 = load float, float* %weight12, align 8 - %23 = load float, float* %relweight, align 4 - %div = fdiv float %22, %23 - store float %div, float* %relweight, align 4 - store i64 0, i64* %ii, align 8 - br label %for.cond13 - -for.cond13: ; preds = %for.inc, %if.then - %24 = load i64, i64* %ii, align 8 - %25 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %dim = getelementptr inbounds %struct.Points, %struct.Points* %25, i32 0, i32 1 - %26 = load i32, i32* %dim, align 8 - %conv = sext i32 %26 to i64 - %cmp14 = icmp slt i64 %24, %conv - br i1 %cmp14, label %for.body15, label %for.end - -for.body15: ; preds = %for.cond13 - %27 = load float, float* %relweight, align 4 - %conv16 = fpext float %27 to double - %sub = fsub contract double 1.000000e+00, %conv16 - %28 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p17 = getelementptr inbounds %struct.Points, %struct.Points* %28, i32 0, i32 2 - %29 = load %struct.Point*, %struct.Point** %p17, align 8 - %30 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p18 = getelementptr inbounds %struct.Points, %struct.Points* %30, i32 0, i32 2 - %31 = load %struct.Point*, %struct.Point** %p18, align 8 - %32 = load i64, i64* %i, align 8 - %arrayidx19 = getelementptr inbounds %struct.Point, %struct.Point* %31, i64 %32 - %assign20 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx19, i32 0, i32 2 - %33 = load i64, i64* %assign20, align 8 - %arrayidx21 = getelementptr inbounds %struct.Point, %struct.Point* %29, i64 %33 - %coord = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx21, i32 0, i32 1 - %34 = load float*, float** %coord, align 8 - %35 = load i64, i64* %ii, align 8 - %arrayidx22 = getelementptr inbounds float, float* %34, i64 %35 - %36 = load float, float* %arrayidx22, align 4 - %conv23 = fpext float %36 to double - %mul = fmul contract double %conv23, %sub - %conv24 = fptrunc double %mul to float - store float %conv24, float* %arrayidx22, align 4 - %37 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p25 = getelementptr inbounds %struct.Points, %struct.Points* %37, i32 0, i32 2 - %38 = load %struct.Point*, %struct.Point** %p25, align 8 - %39 = load i64, i64* %i, align 8 - %arrayidx26 = getelementptr inbounds %struct.Point, %struct.Point* %38, i64 %39 - %coord27 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx26, i32 0, i32 1 - %40 = load float*, float** %coord27, align 8 - %41 = load i64, i64* %ii, align 8 - %arrayidx28 = getelementptr inbounds float, float* %40, i64 %41 - %42 = load float, float* %arrayidx28, align 4 - %43 = load float, float* %relweight, align 4 - %mul29 = fmul contract float %42, %43 - %44 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p30 = getelementptr inbounds %struct.Points, %struct.Points* %44, i32 0, i32 2 - %45 = load %struct.Point*, %struct.Point** %p30, align 8 - %46 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p31 = getelementptr inbounds %struct.Points, %struct.Points* %46, i32 0, i32 2 - %47 = load %struct.Point*, %struct.Point** %p31, align 8 - %48 = load i64, i64* %i, align 8 - %arrayidx32 = getelementptr inbounds %struct.Point, %struct.Point* %47, i64 %48 - %assign33 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx32, i32 0, i32 2 - %49 = load i64, i64* %assign33, align 8 - %arrayidx34 = getelementptr inbounds %struct.Point, %struct.Point* %45, i64 %49 - %coord35 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx34, i32 0, i32 1 - %50 = load float*, float** %coord35, align 8 - %51 = load i64, i64* %ii, align 8 - %arrayidx36 = getelementptr inbounds float, float* %50, i64 %51 - %52 = load float, float* %arrayidx36, align 4 - %add37 = fadd contract float %52, %mul29 - store float %add37, float* %arrayidx36, align 4 - br label %for.inc - -for.inc: ; preds = %for.body15 - %53 = load i64, i64* %ii, align 8 - %inc = add nsw i64 %53, 1 - store i64 %inc, i64* %ii, align 8 - br label %for.cond13 - -for.end: ; preds = %for.cond13 - %54 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p38 = getelementptr inbounds %struct.Points, %struct.Points* %54, i32 0, i32 2 - %55 = load %struct.Point*, %struct.Point** %p38, align 8 - %56 = load i64, i64* %i, align 8 - %arrayidx39 = getelementptr inbounds %struct.Point, %struct.Point* %55, i64 %56 - %weight40 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx39, i32 0, i32 0 - %57 = load float, float* %weight40, align 8 - %58 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p41 = getelementptr inbounds %struct.Points, %struct.Points* %58, i32 0, i32 2 - %59 = load %struct.Point*, %struct.Point** %p41, align 8 - %60 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p42 = getelementptr inbounds %struct.Points, %struct.Points* %60, i32 0, i32 2 - %61 = load %struct.Point*, %struct.Point** %p42, align 8 - %62 = load i64, i64* %i, align 8 - %arrayidx43 = getelementptr inbounds %struct.Point, %struct.Point* %61, i64 %62 - %assign44 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx43, i32 0, i32 2 - %63 = load i64, i64* %assign44, align 8 - %arrayidx45 = getelementptr inbounds %struct.Point, %struct.Point* %59, i64 %63 - %weight46 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx45, i32 0, i32 0 - %64 = load float, float* %weight46, align 8 - %add47 = fadd contract float %64, %57 - store float %add47, float* %weight46, align 8 - br label %if.end - -if.end: ; preds = %for.end, %for.body - br label %for.inc48 - -for.inc48: ; preds = %if.end - %65 = load i64, i64* %i, align 8 - %inc49 = add nsw i64 %65, 1 - store i64 %inc49, i64* %i, align 8 - br label %for.cond - -for.end50: ; preds = %for.cond - ret i32 0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define dso_local void @_Z11copycentersP6PointsS0_Pll(%struct.Points* %points, %struct.Points* %centers, i64* %centerIDs, i64 %offset) #6 { -entry: - %points.addr = alloca %struct.Points*, align 8 - %centers.addr = alloca %struct.Points*, align 8 - %centerIDs.addr = alloca i64*, align 8 - %offset.addr = alloca i64, align 8 - %i = alloca i64, align 8 - %k = alloca i64, align 8 - %is_a_median = alloca i8*, align 8 - store %struct.Points* %points, %struct.Points** %points.addr, align 8 - store %struct.Points* %centers, %struct.Points** %centers.addr, align 8 - store i64* %centerIDs, i64** %centerIDs.addr, align 8 - store i64 %offset, i64* %offset.addr, align 8 - %0 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num = getelementptr inbounds %struct.Points, %struct.Points* %0, i32 0, i32 0 - %1 = load i64, i64* %num, align 8 - %call = call noalias i8* @calloc(i64 %1, i64 1) #2 - store i8* %call, i8** %is_a_median, align 8 - store i64 0, i64* %i, align 8 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %2 = load i64, i64* %i, align 8 - %3 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num1 = getelementptr inbounds %struct.Points, %struct.Points* %3, i32 0, i32 0 - %4 = load i64, i64* %num1, align 8 - %cmp = icmp slt i64 %2, %4 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %5 = load i8*, i8** %is_a_median, align 8 - %6 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p = getelementptr inbounds %struct.Points, %struct.Points* %6, i32 0, i32 2 - %7 = load %struct.Point*, %struct.Point** %p, align 8 - %8 = load i64, i64* %i, align 8 - %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %7, i64 %8 - %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx, i32 0, i32 2 - %9 = load i64, i64* %assign, align 8 - %arrayidx2 = getelementptr inbounds i8, i8* %5, i64 %9 - store i8 1, i8* %arrayidx2, align 1 - br label %for.inc - -for.inc: ; preds = %for.body - %10 = load i64, i64* %i, align 8 - %inc = add nsw i64 %10, 1 - store i64 %inc, i64* %i, align 8 - br label %for.cond - -for.end: ; preds = %for.cond - %11 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %num3 = getelementptr inbounds %struct.Points, %struct.Points* %11, i32 0, i32 0 - %12 = load i64, i64* %num3, align 8 - store i64 %12, i64* %k, align 8 - store i64 0, i64* %i, align 8 - br label %for.cond4 - -for.cond4: ; preds = %for.inc21, %for.end - %13 = load i64, i64* %i, align 8 - %14 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %num5 = getelementptr inbounds %struct.Points, %struct.Points* %14, i32 0, i32 0 - %15 = load i64, i64* %num5, align 8 - %cmp6 = icmp slt i64 %13, %15 - br i1 %cmp6, label %for.body7, label %for.end23 - -for.body7: ; preds = %for.cond4 - %16 = load i8*, i8** %is_a_median, align 8 - %17 = load i64, i64* %i, align 8 - %arrayidx8 = getelementptr inbounds i8, i8* %16, i64 %17 - %18 = load i8, i8* %arrayidx8, align 1 - %tobool = trunc i8 %18 to i1 - br i1 %tobool, label %if.then, label %if.end - -if.then: ; preds = %for.body7 - %19 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %p9 = getelementptr inbounds %struct.Points, %struct.Points* %19, i32 0, i32 2 - %20 = load %struct.Point*, %struct.Point** %p9, align 8 - %21 = load i64, i64* %k, align 8 - %arrayidx10 = getelementptr inbounds %struct.Point, %struct.Point* %20, i64 %21 - %coord = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx10, i32 0, i32 1 - %22 = load float*, float** %coord, align 8 - %23 = bitcast float* %22 to i8* - %24 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p11 = getelementptr inbounds %struct.Points, %struct.Points* %24, i32 0, i32 2 - %25 = load %struct.Point*, %struct.Point** %p11, align 8 - %26 = load i64, i64* %i, align 8 - %arrayidx12 = getelementptr inbounds %struct.Point, %struct.Point* %25, i64 %26 - %coord13 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx12, i32 0, i32 1 - %27 = load float*, float** %coord13, align 8 - %28 = bitcast float* %27 to i8* - %29 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %dim = getelementptr inbounds %struct.Points, %struct.Points* %29, i32 0, i32 1 - %30 = load i32, i32* %dim, align 8 - %conv = sext i32 %30 to i64 - %mul = mul i64 %conv, 4 - call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %23, i8* align 4 %28, i64 %mul, i1 false) - %31 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %p14 = getelementptr inbounds %struct.Points, %struct.Points* %31, i32 0, i32 2 - %32 = load %struct.Point*, %struct.Point** %p14, align 8 - %33 = load i64, i64* %i, align 8 - %arrayidx15 = getelementptr inbounds %struct.Point, %struct.Point* %32, i64 %33 - %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx15, i32 0, i32 0 - %34 = load float, float* %weight, align 8 - %35 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %p16 = getelementptr inbounds %struct.Points, %struct.Points* %35, i32 0, i32 2 - %36 = load %struct.Point*, %struct.Point** %p16, align 8 - %37 = load i64, i64* %k, align 8 - %arrayidx17 = getelementptr inbounds %struct.Point, %struct.Point* %36, i64 %37 - %weight18 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx17, i32 0, i32 0 - store float %34, float* %weight18, align 8 - %38 = load i64, i64* %i, align 8 - %39 = load i64, i64* %offset.addr, align 8 - %add = add nsw i64 %38, %39 - %40 = load i64*, i64** %centerIDs.addr, align 8 - %41 = load i64, i64* %k, align 8 - %arrayidx19 = getelementptr inbounds i64, i64* %40, i64 %41 - store i64 %add, i64* %arrayidx19, align 8 - %42 = load i64, i64* %k, align 8 - %inc20 = add nsw i64 %42, 1 - store i64 %inc20, i64* %k, align 8 - br label %if.end - -if.end: ; preds = %if.then, %for.body7 - br label %for.inc21 - -for.inc21: ; preds = %if.end - %43 = load i64, i64* %i, align 8 - %inc22 = add nsw i64 %43, 1 - store i64 %inc22, i64* %i, align 8 - br label %for.cond4 - -for.end23: ; preds = %for.cond4 - %44 = load i64, i64* %k, align 8 - %45 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %num24 = getelementptr inbounds %struct.Points, %struct.Points* %45, i32 0, i32 0 - store i64 %44, i64* %num24, align 8 - %46 = load i8*, i8** %is_a_median, align 8 - call void @free(i8* %46) #2 - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local i8* @_Z14localSearchSubPv(i8* %arg_) #3 { -entry: - %arg_.addr = alloca i8*, align 8 - %arg = alloca %struct.pkmedian_arg_t*, align 8 - store i8* %arg_, i8** %arg_.addr, align 8 - %0 = load i8*, i8** %arg_.addr, align 8 - %1 = bitcast i8* %0 to %struct.pkmedian_arg_t* - store %struct.pkmedian_arg_t* %1, %struct.pkmedian_arg_t** %arg, align 8 - %2 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %points = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %2, i32 0, i32 0 - %3 = load %struct.Points*, %struct.Points** %points, align 8 - %4 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %kmin = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %4, i32 0, i32 1 - %5 = load i64, i64* %kmin, align 8 - %6 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %kmax = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %6, i32 0, i32 2 - %7 = load i64, i64* %kmax, align 8 - %8 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %kfinal = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %8, i32 0, i32 3 - %9 = load i64*, i64** %kfinal, align 8 - %10 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %pid = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %10, i32 0, i32 4 - %11 = load i32, i32* %pid, align 8 - %12 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %barrier = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %12, i32 0, i32 5 - %13 = load %union.pthread_barrier_t*, %union.pthread_barrier_t** %barrier, align 8 - %call = call float @_Z8pkmedianP6PointsllPliP17pthread_barrier_t(%struct.Points* %3, i64 %5, i64 %7, i64* %9, i32 %11, %union.pthread_barrier_t* %13) - ret i8* null -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z11localSearchP6PointsllPl(%struct.Points* %points, i64 %kmin, i64 %kmax, i64* %kfinal) #3 { -entry: - %points.addr = alloca %struct.Points*, align 8 - %kmin.addr = alloca i64, align 8 - %kmax.addr = alloca i64, align 8 - %kfinal.addr = alloca i64*, align 8 - %t1 = alloca double, align 8 - %barrier = alloca %union.pthread_barrier_t, align 8 - %threads = alloca i64*, align 8 - %arg = alloca %struct.pkmedian_arg_t*, align 8 - %i = alloca i32, align 4 - %i20 = alloca i32, align 4 - %t2 = alloca double, align 8 - store %struct.Points* %points, %struct.Points** %points.addr, align 8 - store i64 %kmin, i64* %kmin.addr, align 8 - store i64 %kmax, i64* %kmax.addr, align 8 - store i64* %kfinal, i64** %kfinal.addr, align 8 - %call = call double @_Z7gettimev() - store double %call, double* %t1, align 8 - %0 = load i32, i32* @_ZL5nproc, align 4 - %1 = sext i32 %0 to i64 - %2 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %1, i64 8) - %3 = extractvalue { i64, i1 } %2, 1 - %4 = extractvalue { i64, i1 } %2, 0 - %5 = select i1 %3, i64 -1, i64 %4 - %call1 = call i8* @_Znam(i64 %5) #16 - %6 = bitcast i8* %call1 to i64* - store i64* %6, i64** %threads, align 8 - %7 = load i32, i32* @_ZL5nproc, align 4 - %8 = sext i32 %7 to i64 - %9 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 %8, i64 48) - %10 = extractvalue { i64, i1 } %9, 1 - %11 = extractvalue { i64, i1 } %9, 0 - %12 = select i1 %10, i64 -1, i64 %11 - %call2 = call i8* @_Znam(i64 %12) #16 - %13 = bitcast i8* %call2 to %struct.pkmedian_arg_t* - store %struct.pkmedian_arg_t* %13, %struct.pkmedian_arg_t** %arg, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %14 = load i32, i32* %i, align 4 - %15 = load i32, i32* @_ZL5nproc, align 4 - %cmp = icmp slt i32 %14, %15 - br i1 %cmp, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %16 = load %struct.Points*, %struct.Points** %points.addr, align 8 - %17 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %18 = load i32, i32* %i, align 4 - %idxprom = sext i32 %18 to i64 - %arrayidx = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %17, i64 %idxprom - %points3 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx, i32 0, i32 0 - store %struct.Points* %16, %struct.Points** %points3, align 8 - %19 = load i64, i64* %kmin.addr, align 8 - %20 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %21 = load i32, i32* %i, align 4 - %idxprom4 = sext i32 %21 to i64 - %arrayidx5 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %20, i64 %idxprom4 - %kmin6 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx5, i32 0, i32 1 - store i64 %19, i64* %kmin6, align 8 - %22 = load i64, i64* %kmax.addr, align 8 - %23 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %24 = load i32, i32* %i, align 4 - %idxprom7 = sext i32 %24 to i64 - %arrayidx8 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %23, i64 %idxprom7 - %kmax9 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx8, i32 0, i32 2 - store i64 %22, i64* %kmax9, align 8 - %25 = load i32, i32* %i, align 4 - %26 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %27 = load i32, i32* %i, align 4 - %idxprom10 = sext i32 %27 to i64 - %arrayidx11 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %26, i64 %idxprom10 - %pid = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx11, i32 0, i32 4 - store i32 %25, i32* %pid, align 8 - %28 = load i64*, i64** %kfinal.addr, align 8 - %29 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %30 = load i32, i32* %i, align 4 - %idxprom12 = sext i32 %30 to i64 - %arrayidx13 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %29, i64 %idxprom12 - %kfinal14 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx13, i32 0, i32 3 - store i64* %28, i64** %kfinal14, align 8 - %31 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %32 = load i32, i32* %i, align 4 - %idxprom15 = sext i32 %32 to i64 - %arrayidx16 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %31, i64 %idxprom15 - %barrier17 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %arrayidx16, i32 0, i32 5 - store %union.pthread_barrier_t* %barrier, %union.pthread_barrier_t** %barrier17, align 8 - %33 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %arrayidx18 = getelementptr inbounds %struct.pkmedian_arg_t, %struct.pkmedian_arg_t* %33, i64 0 - %34 = bitcast %struct.pkmedian_arg_t* %arrayidx18 to i8* - %call19 = call i8* @_Z14localSearchSubPv(i8* %34) - br label %for.inc - -for.inc: ; preds = %for.body - %35 = load i32, i32* %i, align 4 - %inc = add nsw i32 %35, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - store i32 0, i32* %i20, align 4 - br label %for.cond21 - -for.cond21: ; preds = %for.inc24, %for.end - %36 = load i32, i32* %i20, align 4 - %37 = load i32, i32* @_ZL5nproc, align 4 - %cmp22 = icmp slt i32 %36, %37 - br i1 %cmp22, label %for.body23, label %for.end26 - -for.body23: ; preds = %for.cond21 - br label %for.inc24 - -for.inc24: ; preds = %for.body23 - %38 = load i32, i32* %i20, align 4 - %inc25 = add nsw i32 %38, 1 - store i32 %inc25, i32* %i20, align 4 - br label %for.cond21 - -for.end26: ; preds = %for.cond21 - %39 = load i64*, i64** %threads, align 8 - %isnull = icmp eq i64* %39, null - br i1 %isnull, label %delete.end, label %delete.notnull - -delete.notnull: ; preds = %for.end26 - %40 = bitcast i64* %39 to i8* - call void @_ZdaPv(i8* %40) #17 - br label %delete.end - -delete.end: ; preds = %delete.notnull, %for.end26 - %41 = load %struct.pkmedian_arg_t*, %struct.pkmedian_arg_t** %arg, align 8 - %isnull27 = icmp eq %struct.pkmedian_arg_t* %41, null - br i1 %isnull27, label %delete.end29, label %delete.notnull28 - -delete.notnull28: ; preds = %delete.end - %42 = bitcast %struct.pkmedian_arg_t* %41 to i8* - call void @_ZdaPv(i8* %42) #17 - br label %delete.end29 - -delete.end29: ; preds = %delete.notnull28, %delete.end - %call30 = call double @_Z7gettimev() - store double %call30, double* %t2, align 8 - %43 = load double, double* %t2, align 8 - %44 = load double, double* %t1, align 8 - %sub = fsub contract double %43, %44 - %45 = load double, double* @time_local_search, align 8 - %add = fadd contract double %45, %sub - store double %add, double* @time_local_search, align 8 - ret void -} - -; Function Attrs: nounwind readnone speculatable willreturn -declare { i64, i1 } @llvm.umul.with.overflow.i64(i64, i64) #8 - -; Function Attrs: nobuiltin -declare dso_local noalias i8* @_Znam(i64) #9 - -; Function Attrs: nobuiltin nounwind -declare dso_local void @_ZdaPv(i8*) #10 - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z12outcenterIDsP6PointsPlPc(%struct.Points* %centers, i64* %centerIDs, i8* %outfile) #3 { -entry: - %centers.addr = alloca %struct.Points*, align 8 - %centerIDs.addr = alloca i64*, align 8 - %outfile.addr = alloca i8*, align 8 - %fp = alloca %struct._IO_FILE*, align 8 - %is_a_median = alloca i32*, align 8 - %i = alloca i32, align 4 - %i6 = alloca i32, align 4 - %k = alloca i32, align 4 - store %struct.Points* %centers, %struct.Points** %centers.addr, align 8 - store i64* %centerIDs, i64** %centerIDs.addr, align 8 - store i8* %outfile, i8** %outfile.addr, align 8 - %0 = load i8*, i8** %outfile.addr, align 8 - %call = call %struct._IO_FILE* @fopen(i8* %0, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.3, i64 0, i64 0)) - store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 - %1 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %cmp = icmp eq %struct._IO_FILE* %1, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %2 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %3 = load i8*, i8** %outfile.addr, align 8 - %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %2, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.5, i64 0, i64 0), i8* %3) - call void @exit(i32 1) #15 - unreachable - -if.end: ; preds = %entry - %4 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %num = getelementptr inbounds %struct.Points, %struct.Points* %4, i32 0, i32 0 - %5 = load i64, i64* %num, align 8 - %call2 = call noalias i8* @calloc(i64 4, i64 %5) #2 - %6 = bitcast i8* %call2 to i32* - store i32* %6, i32** %is_a_median, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end - %7 = load i32, i32* %i, align 4 - %conv = sext i32 %7 to i64 - %8 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %num3 = getelementptr inbounds %struct.Points, %struct.Points* %8, i32 0, i32 0 - %9 = load i64, i64* %num3, align 8 - %cmp4 = icmp slt i64 %conv, %9 - br i1 %cmp4, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %10 = load i32*, i32** %is_a_median, align 8 - %11 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %p = getelementptr inbounds %struct.Points, %struct.Points* %11, i32 0, i32 2 - %12 = load %struct.Point*, %struct.Point** %p, align 8 - %13 = load i32, i32* %i, align 4 - %idxprom = sext i32 %13 to i64 - %arrayidx = getelementptr inbounds %struct.Point, %struct.Point* %12, i64 %idxprom - %assign = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx, i32 0, i32 2 - %14 = load i64, i64* %assign, align 8 - %arrayidx5 = getelementptr inbounds i32, i32* %10, i64 %14 - store i32 1, i32* %arrayidx5, align 4 - br label %for.inc - -for.inc: ; preds = %for.body - %15 = load i32, i32* %i, align 4 - %inc = add nsw i32 %15, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - store i32 0, i32* %i6, align 4 - br label %for.cond7 - -for.cond7: ; preds = %for.inc38, %for.end - %16 = load i32, i32* %i6, align 4 - %conv8 = sext i32 %16 to i64 - %17 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %num9 = getelementptr inbounds %struct.Points, %struct.Points* %17, i32 0, i32 0 - %18 = load i64, i64* %num9, align 8 - %cmp10 = icmp slt i64 %conv8, %18 - br i1 %cmp10, label %for.body11, label %for.end40 - -for.body11: ; preds = %for.cond7 - %19 = load i32*, i32** %is_a_median, align 8 - %20 = load i32, i32* %i6, align 4 - %idxprom12 = sext i32 %20 to i64 - %arrayidx13 = getelementptr inbounds i32, i32* %19, i64 %idxprom12 - %21 = load i32, i32* %arrayidx13, align 4 - %tobool = icmp ne i32 %21, 0 - br i1 %tobool, label %if.then14, label %if.end37 - -if.then14: ; preds = %for.body11 - %22 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %23 = load i64*, i64** %centerIDs.addr, align 8 - %24 = load i32, i32* %i6, align 4 - %idxprom15 = sext i32 %24 to i64 - %arrayidx16 = getelementptr inbounds i64, i64* %23, i64 %idxprom15 - %25 = load i64, i64* %arrayidx16, align 8 - %call17 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %22, i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.6, i64 0, i64 0), i64 %25) - %26 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %27 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %p18 = getelementptr inbounds %struct.Points, %struct.Points* %27, i32 0, i32 2 - %28 = load %struct.Point*, %struct.Point** %p18, align 8 - %29 = load i32, i32* %i6, align 4 - %idxprom19 = sext i32 %29 to i64 - %arrayidx20 = getelementptr inbounds %struct.Point, %struct.Point* %28, i64 %idxprom19 - %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx20, i32 0, i32 0 - %30 = load float, float* %weight, align 8 - %conv21 = fpext float %30 to double - %call22 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %26, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.7, i64 0, i64 0), double %conv21) - store i32 0, i32* %k, align 4 - br label %for.cond23 - -for.cond23: ; preds = %for.inc33, %if.then14 - %31 = load i32, i32* %k, align 4 - %32 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %dim = getelementptr inbounds %struct.Points, %struct.Points* %32, i32 0, i32 1 - %33 = load i32, i32* %dim, align 8 - %cmp24 = icmp slt i32 %31, %33 - br i1 %cmp24, label %for.body25, label %for.end35 - -for.body25: ; preds = %for.cond23 - %34 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %35 = load %struct.Points*, %struct.Points** %centers.addr, align 8 - %p26 = getelementptr inbounds %struct.Points, %struct.Points* %35, i32 0, i32 2 - %36 = load %struct.Point*, %struct.Point** %p26, align 8 - %37 = load i32, i32* %i6, align 4 - %idxprom27 = sext i32 %37 to i64 - %arrayidx28 = getelementptr inbounds %struct.Point, %struct.Point* %36, i64 %idxprom27 - %coord = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx28, i32 0, i32 1 - %38 = load float*, float** %coord, align 8 - %39 = load i32, i32* %k, align 4 - %idxprom29 = sext i32 %39 to i64 - %arrayidx30 = getelementptr inbounds float, float* %38, i64 %idxprom29 - %40 = load float, float* %arrayidx30, align 4 - %conv31 = fpext float %40 to double - %call32 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %34, i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.8, i64 0, i64 0), double %conv31) - br label %for.inc33 - -for.inc33: ; preds = %for.body25 - %41 = load i32, i32* %k, align 4 - %inc34 = add nsw i32 %41, 1 - store i32 %inc34, i32* %k, align 4 - br label %for.cond23 - -for.end35: ; preds = %for.cond23 - %42 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call36 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %42, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.9, i64 0, i64 0)) - br label %if.end37 - -if.end37: ; preds = %for.end35, %for.body11 - br label %for.inc38 - -for.inc38: ; preds = %if.end37 - %43 = load i32, i32* %i6, align 4 - %inc39 = add nsw i32 %43, 1 - store i32 %inc39, i32* %i6, align 4 - br label %for.cond7 - -for.end40: ; preds = %for.cond7 - %44 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call41 = call i32 @fclose(%struct._IO_FILE* %44) - ret void -} - -; Function Attrs: noinline optnone uwtable -define dso_local void @_Z13streamClusterP7PStreamllillPc(%class.PStream* %stream, i64 %kmin, i64 %kmax, i32 %dim, i64 %chunksize, i64 %centersize, i8* %outfile) #3 { -entry: - %stream.addr = alloca %class.PStream*, align 8 - %kmin.addr = alloca i64, align 8 - %kmax.addr = alloca i64, align 8 - %dim.addr = alloca i32, align 4 - %chunksize.addr = alloca i64, align 8 - %centersize.addr = alloca i64, align 8 - %outfile.addr = alloca i8*, align 8 - %block = alloca float*, align 8 - %centerBlock = alloca float*, align 8 - %centerIDs = alloca i64*, align 8 - %points = alloca %struct.Points, align 8 - %i = alloca i32, align 4 - %centers = alloca %struct.Points, align 8 - %i25 = alloca i32, align 4 - %IDoffset = alloca i64, align 8 - %kfinal = alloca i64, align 8 - %numRead = alloca i64, align 8 - %i60 = alloca i32, align 4 - store %class.PStream* %stream, %class.PStream** %stream.addr, align 8 - store i64 %kmin, i64* %kmin.addr, align 8 - store i64 %kmax, i64* %kmax.addr, align 8 - store i32 %dim, i32* %dim.addr, align 4 - store i64 %chunksize, i64* %chunksize.addr, align 8 - store i64 %centersize, i64* %centersize.addr, align 8 - store i8* %outfile, i8** %outfile.addr, align 8 - %0 = load i64, i64* %chunksize.addr, align 8 - %1 = load i32, i32* %dim.addr, align 4 - %conv = sext i32 %1 to i64 - %mul = mul nsw i64 %0, %conv - %mul1 = mul i64 %mul, 4 - %call = call noalias i8* @malloc(i64 %mul1) #2 - %2 = bitcast i8* %call to float* - store float* %2, float** %block, align 8 - %3 = load i64, i64* %centersize.addr, align 8 - %4 = load i32, i32* %dim.addr, align 4 - %conv2 = sext i32 %4 to i64 - %mul3 = mul nsw i64 %3, %conv2 - %mul4 = mul i64 %mul3, 4 - %call5 = call noalias i8* @malloc(i64 %mul4) #2 - %5 = bitcast i8* %call5 to float* - store float* %5, float** %centerBlock, align 8 - %6 = load i64, i64* %centersize.addr, align 8 - %7 = load i32, i32* %dim.addr, align 4 - %conv6 = sext i32 %7 to i64 - %mul7 = mul nsw i64 %6, %conv6 - %mul8 = mul i64 %mul7, 8 - %call9 = call noalias i8* @malloc(i64 %mul8) #2 - %8 = bitcast i8* %call9 to i64* - store i64* %8, i64** %centerIDs, align 8 - %9 = load float*, float** %block, align 8 - %cmp = icmp eq float* %9, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %10 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call10 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @.str.10, i64 0, i64 0)) - call void @exit(i32 1) #15 - unreachable - -if.end: ; preds = %entry - %11 = load i32, i32* %dim.addr, align 4 - %dim11 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 1 - store i32 %11, i32* %dim11, align 8 - %12 = load i64, i64* %chunksize.addr, align 8 - %num = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 - store i64 %12, i64* %num, align 8 - %13 = load i64, i64* %chunksize.addr, align 8 - %mul12 = mul i64 %13, 32 - %call13 = call noalias i8* @malloc(i64 %mul12) #2 - %14 = bitcast i8* %call13 to %struct.Point* - %p = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 2 - store %struct.Point* %14, %struct.Point** %p, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %if.end - %15 = load i32, i32* %i, align 4 - %conv14 = sext i32 %15 to i64 - %16 = load i64, i64* %chunksize.addr, align 8 - %cmp15 = icmp slt i64 %conv14, %16 - br i1 %cmp15, label %for.body, label %for.end - -for.body: ; preds = %for.cond - %17 = load float*, float** %block, align 8 - %18 = load i32, i32* %i, align 4 - %19 = load i32, i32* %dim.addr, align 4 - %mul16 = mul nsw i32 %18, %19 - %idxprom = sext i32 %mul16 to i64 - %arrayidx = getelementptr inbounds float, float* %17, i64 %idxprom - %p17 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 2 - %20 = load %struct.Point*, %struct.Point** %p17, align 8 - %21 = load i32, i32* %i, align 4 - %idxprom18 = sext i32 %21 to i64 - %arrayidx19 = getelementptr inbounds %struct.Point, %struct.Point* %20, i64 %idxprom18 - %coord = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx19, i32 0, i32 1 - store float* %arrayidx, float** %coord, align 8 - br label %for.inc - -for.inc: ; preds = %for.body - %22 = load i32, i32* %i, align 4 - %inc = add nsw i32 %22, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - %23 = load i32, i32* %dim.addr, align 4 - %dim20 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 1 - store i32 %23, i32* %dim20, align 8 - %24 = load i64, i64* %centersize.addr, align 8 - %mul21 = mul i64 %24, 32 - %call22 = call noalias i8* @malloc(i64 %mul21) #2 - %25 = bitcast i8* %call22 to %struct.Point* - %p23 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 2 - store %struct.Point* %25, %struct.Point** %p23, align 8 - %num24 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 0 - store i64 0, i64* %num24, align 8 - store i32 0, i32* %i25, align 4 - br label %for.cond26 - -for.cond26: ; preds = %for.inc40, %for.end - %26 = load i32, i32* %i25, align 4 - %conv27 = sext i32 %26 to i64 - %27 = load i64, i64* %centersize.addr, align 8 - %cmp28 = icmp slt i64 %conv27, %27 - br i1 %cmp28, label %for.body29, label %for.end42 - -for.body29: ; preds = %for.cond26 - %28 = load float*, float** %centerBlock, align 8 - %29 = load i32, i32* %i25, align 4 - %30 = load i32, i32* %dim.addr, align 4 - %mul30 = mul nsw i32 %29, %30 - %idxprom31 = sext i32 %mul30 to i64 - %arrayidx32 = getelementptr inbounds float, float* %28, i64 %idxprom31 - %p33 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 2 - %31 = load %struct.Point*, %struct.Point** %p33, align 8 - %32 = load i32, i32* %i25, align 4 - %idxprom34 = sext i32 %32 to i64 - %arrayidx35 = getelementptr inbounds %struct.Point, %struct.Point* %31, i64 %idxprom34 - %coord36 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx35, i32 0, i32 1 - store float* %arrayidx32, float** %coord36, align 8 - %p37 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 2 - %33 = load %struct.Point*, %struct.Point** %p37, align 8 - %34 = load i32, i32* %i25, align 4 - %idxprom38 = sext i32 %34 to i64 - %arrayidx39 = getelementptr inbounds %struct.Point, %struct.Point* %33, i64 %idxprom38 - %weight = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx39, i32 0, i32 0 - store float 1.000000e+00, float* %weight, align 8 - br label %for.inc40 - -for.inc40: ; preds = %for.body29 - %35 = load i32, i32* %i25, align 4 - %inc41 = add nsw i32 %35, 1 - store i32 %inc41, i32* %i25, align 4 - br label %for.cond26 - -for.end42: ; preds = %for.cond26 - store i64 0, i64* %IDoffset, align 8 - br label %while.body - -while.body: ; preds = %for.end42, %if.end94 - %36 = load %class.PStream*, %class.PStream** %stream.addr, align 8 - %37 = load float*, float** %block, align 8 - %38 = load i32, i32* %dim.addr, align 4 - %39 = load i64, i64* %chunksize.addr, align 8 - %conv43 = trunc i64 %39 to i32 - %40 = bitcast %class.PStream* %36 to i64 (%class.PStream*, float*, i32, i32)*** - %vtable = load i64 (%class.PStream*, float*, i32, i32)**, i64 (%class.PStream*, float*, i32, i32)*** %40, align 8 - %vfn = getelementptr inbounds i64 (%class.PStream*, float*, i32, i32)*, i64 (%class.PStream*, float*, i32, i32)** %vtable, i64 0 - %41 = load i64 (%class.PStream*, float*, i32, i32)*, i64 (%class.PStream*, float*, i32, i32)** %vfn, align 8 - %call44 = call i64 %41(%class.PStream* %36, float* %37, i32 %38, i32 %conv43) - store i64 %call44, i64* %numRead, align 8 - %42 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %43 = load i64, i64* %numRead, align 8 - %call45 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %42, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.11, i64 0, i64 0), i64 %43) - %44 = load %class.PStream*, %class.PStream** %stream.addr, align 8 - %45 = bitcast %class.PStream* %44 to i32 (%class.PStream*)*** - %vtable46 = load i32 (%class.PStream*)**, i32 (%class.PStream*)*** %45, align 8 - %vfn47 = getelementptr inbounds i32 (%class.PStream*)*, i32 (%class.PStream*)** %vtable46, i64 1 - %46 = load i32 (%class.PStream*)*, i32 (%class.PStream*)** %vfn47, align 8 - %call48 = call i32 %46(%class.PStream* %44) - %tobool = icmp ne i32 %call48, 0 - br i1 %tobool, label %if.then56, label %lor.lhs.false - -lor.lhs.false: ; preds = %while.body - %47 = load i64, i64* %numRead, align 8 - %48 = load i64, i64* %chunksize.addr, align 8 - %conv49 = trunc i64 %48 to i32 - %conv50 = zext i32 %conv49 to i64 - %cmp51 = icmp ult i64 %47, %conv50 - br i1 %cmp51, label %land.lhs.true, label %if.end58 - -land.lhs.true: ; preds = %lor.lhs.false - %49 = load %class.PStream*, %class.PStream** %stream.addr, align 8 - %50 = bitcast %class.PStream* %49 to i32 (%class.PStream*)*** - %vtable52 = load i32 (%class.PStream*)**, i32 (%class.PStream*)*** %50, align 8 - %vfn53 = getelementptr inbounds i32 (%class.PStream*)*, i32 (%class.PStream*)** %vtable52, i64 2 - %51 = load i32 (%class.PStream*)*, i32 (%class.PStream*)** %vfn53, align 8 - %call54 = call i32 %51(%class.PStream* %49) - %tobool55 = icmp ne i32 %call54, 0 - br i1 %tobool55, label %if.end58, label %if.then56 - -if.then56: ; preds = %land.lhs.true, %while.body - %52 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call57 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %52, i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.12, i64 0, i64 0)) - call void @exit(i32 1) #15 - unreachable - -if.end58: ; preds = %land.lhs.true, %lor.lhs.false - %53 = load i64, i64* %numRead, align 8 - %num59 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 - store i64 %53, i64* %num59, align 8 - store i32 0, i32* %i60, align 4 - br label %for.cond61 - -for.cond61: ; preds = %for.inc70, %if.end58 - %54 = load i32, i32* %i60, align 4 - %conv62 = sext i32 %54 to i64 - %num63 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 - %55 = load i64, i64* %num63, align 8 - %cmp64 = icmp slt i64 %conv62, %55 - br i1 %cmp64, label %for.body65, label %for.end72 - -for.body65: ; preds = %for.cond61 - %p66 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 2 - %56 = load %struct.Point*, %struct.Point** %p66, align 8 - %57 = load i32, i32* %i60, align 4 - %idxprom67 = sext i32 %57 to i64 - %arrayidx68 = getelementptr inbounds %struct.Point, %struct.Point* %56, i64 %idxprom67 - %weight69 = getelementptr inbounds %struct.Point, %struct.Point* %arrayidx68, i32 0, i32 0 - store float 1.000000e+00, float* %weight69, align 8 - br label %for.inc70 - -for.inc70: ; preds = %for.body65 - %58 = load i32, i32* %i60, align 4 - %inc71 = add nsw i32 %58, 1 - store i32 %inc71, i32* %i60, align 4 - br label %for.cond61 - -for.end72: ; preds = %for.cond61 - %num73 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 - %59 = load i64, i64* %num73, align 8 - %mul74 = mul i64 %59, 1 - %call75 = call noalias i8* @malloc(i64 %mul74) #2 - store i8* %call75, i8** @_ZL17switch_membership, align 8 - %num76 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 - %60 = load i64, i64* %num76, align 8 - %call77 = call noalias i8* @calloc(i64 %60, i64 1) #2 - store i8* %call77, i8** @_ZL9is_center, align 8 - %num78 = getelementptr inbounds %struct.Points, %struct.Points* %points, i32 0, i32 0 - %61 = load i64, i64* %num78, align 8 - %mul79 = mul i64 %61, 4 - %call80 = call noalias i8* @malloc(i64 %mul79) #2 - %62 = bitcast i8* %call80 to i32* - store i32* %62, i32** @_ZL12center_table, align 8 - %63 = load i64, i64* %kmin.addr, align 8 - %64 = load i64, i64* %kmax.addr, align 8 - call void @_Z11localSearchP6PointsllPl(%struct.Points* %points, i64 %63, i64 %64, i64* %kfinal) - %65 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call81 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %65, i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.13, i64 0, i64 0)) - %call82 = call i32 @_Z11contcentersP6Points(%struct.Points* %points) - store i8 1, i8* @isCoordChanged, align 1 - %66 = load i64, i64* %kfinal, align 8 - %num83 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 0 - %67 = load i64, i64* %num83, align 8 - %add = add nsw i64 %66, %67 - %68 = load i64, i64* %centersize.addr, align 8 - %cmp84 = icmp sgt i64 %add, %68 - br i1 %cmp84, label %if.then85, label %if.end87 - -if.then85: ; preds = %for.end72 - %69 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call86 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %69, i8* getelementptr inbounds ([33 x i8], [33 x i8]* @.str.14, i64 0, i64 0)) - call void @exit(i32 1) #15 - unreachable - -if.end87: ; preds = %for.end72 - %70 = load i64*, i64** %centerIDs, align 8 - %71 = load i64, i64* %IDoffset, align 8 - call void @_Z11copycentersP6PointsS0_Pll(%struct.Points* %points, %struct.Points* %centers, i64* %70, i64 %71) - %72 = load i64, i64* %numRead, align 8 - %73 = load i64, i64* %IDoffset, align 8 - %add88 = add i64 %73, %72 - store i64 %add88, i64* %IDoffset, align 8 - %74 = load i8*, i8** @_ZL9is_center, align 8 - call void @free(i8* %74) #2 - %75 = load i8*, i8** @_ZL17switch_membership, align 8 - call void @free(i8* %75) #2 - %76 = load i32*, i32** @_ZL12center_table, align 8 - %77 = bitcast i32* %76 to i8* - call void @free(i8* %77) #2 - %78 = load %class.PStream*, %class.PStream** %stream.addr, align 8 - %79 = bitcast %class.PStream* %78 to i32 (%class.PStream*)*** - %vtable89 = load i32 (%class.PStream*)**, i32 (%class.PStream*)*** %79, align 8 - %vfn90 = getelementptr inbounds i32 (%class.PStream*)*, i32 (%class.PStream*)** %vtable89, i64 2 - %80 = load i32 (%class.PStream*)*, i32 (%class.PStream*)** %vfn90, align 8 - %call91 = call i32 %80(%class.PStream* %78) - %tobool92 = icmp ne i32 %call91, 0 - br i1 %tobool92, label %if.then93, label %if.end94 - -if.then93: ; preds = %if.end87 - br label %while.end - -if.end94: ; preds = %if.end87 - br label %while.body - -while.end: ; preds = %if.then93 - %num95 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 0 - %81 = load i64, i64* %num95, align 8 - %mul96 = mul i64 %81, 1 - %call97 = call noalias i8* @malloc(i64 %mul96) #2 - store i8* %call97, i8** @_ZL17switch_membership, align 8 - %num98 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 0 - %82 = load i64, i64* %num98, align 8 - %call99 = call noalias i8* @calloc(i64 %82, i64 1) #2 - store i8* %call99, i8** @_ZL9is_center, align 8 - %num100 = getelementptr inbounds %struct.Points, %struct.Points* %centers, i32 0, i32 0 - %83 = load i64, i64* %num100, align 8 - %mul101 = mul i64 %83, 4 - %call102 = call noalias i8* @malloc(i64 %mul101) #2 - %84 = bitcast i8* %call102 to i32* - store i32* %84, i32** @_ZL12center_table, align 8 - %85 = load i64, i64* %kmin.addr, align 8 - %86 = load i64, i64* %kmax.addr, align 8 - call void @_Z11localSearchP6PointsllPl(%struct.Points* %centers, i64 %85, i64 %86, i64* %kfinal) - %call103 = call i32 @_Z11contcentersP6Points(%struct.Points* %centers) - %87 = load i64*, i64** %centerIDs, align 8 - %88 = load i8*, i8** %outfile.addr, align 8 - call void @_Z12outcenterIDsP6PointsPlPc(%struct.Points* %centers, i64* %87, i8* %88) - ret void -} - -; Function Attrs: noinline norecurse optnone uwtable -define dso_local i32 @main(i32 %argc, i8** %argv) #11 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %retval = alloca i32, align 4 - %argc.addr = alloca i32, align 4 - %argv.addr = alloca i8**, align 8 - %outfilename = alloca i8*, align 8 - %infilename = alloca i8*, align 8 - %kmin = alloca i64, align 8 - %kmax = alloca i64, align 8 - %n = alloca i64, align 8 - %chunksize = alloca i64, align 8 - %clustersize = alloca i64, align 8 - %dim = alloca i32, align 4 - %stream = alloca %class.PStream*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - %t1 = alloca double, align 8 - %t2 = alloca double, align 8 - store i32 0, i32* %retval, align 4 - store i32 %argc, i32* %argc.addr, align 4 - store i8** %argv, i8*** %argv.addr, align 8 - %call = call i32 @cudaSetDevice(i32 0) - %call1 = call i8* @_Znam(i64 1024) #16 - store i8* %call1, i8** %outfilename, align 8 - %call2 = call i8* @_Znam(i64 1024) #16 - store i8* %call2, i8** %infilename, align 8 - %call3 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.15, i64 0, i64 0)) - %call4 = call i32 @fflush(%struct._IO_FILE* null) - %0 = load i32, i32* %argc.addr, align 4 - %cmp = icmp slt i32 %0, 10 - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %entry - %1 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %2 = load i8**, i8*** %argv.addr, align 8 - %arrayidx = getelementptr inbounds i8*, i8** %2, i64 0 - %3 = load i8*, i8** %arrayidx, align 8 - %call5 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %1, i8* getelementptr inbounds ([64 x i8], [64 x i8]* @.str.16, i64 0, i64 0), i8* %3) - %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call6 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.17, i64 0, i64 0)) - %5 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call7 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %5, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.18, i64 0, i64 0)) - %6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call8 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([45 x i8], [45 x i8]* @.str.19, i64 0, i64 0)) - %7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call9 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([38 x i8], [38 x i8]* @.str.20, i64 0, i64 0)) - %8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call10 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([57 x i8], [57 x i8]* @.str.21, i64 0, i64 0)) - %9 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call11 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %9, i8* getelementptr inbounds ([55 x i8], [55 x i8]* @.str.22, i64 0, i64 0)) - %10 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call12 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %10, i8* getelementptr inbounds ([37 x i8], [37 x i8]* @.str.23, i64 0, i64 0)) - %11 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call13 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %11, i8* getelementptr inbounds ([28 x i8], [28 x i8]* @.str.24, i64 0, i64 0)) - %12 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call14 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %12, i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.25, i64 0, i64 0)) - %13 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call15 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %13, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.26, i64 0, i64 0)) - %14 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %call16 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %14, i8* getelementptr inbounds ([77 x i8], [77 x i8]* @.str.27, i64 0, i64 0)) - call void @exit(i32 1) #15 - unreachable - -if.end: ; preds = %entry - %15 = load i8**, i8*** %argv.addr, align 8 - %arrayidx17 = getelementptr inbounds i8*, i8** %15, i64 1 - %16 = load i8*, i8** %arrayidx17, align 8 - %call18 = call i32 @atoi(i8* %16) #18 - %conv = sext i32 %call18 to i64 - store i64 %conv, i64* %kmin, align 8 - %17 = load i8**, i8*** %argv.addr, align 8 - %arrayidx19 = getelementptr inbounds i8*, i8** %17, i64 2 - %18 = load i8*, i8** %arrayidx19, align 8 - %call20 = call i32 @atoi(i8* %18) #18 - %conv21 = sext i32 %call20 to i64 - store i64 %conv21, i64* %kmax, align 8 - %19 = load i8**, i8*** %argv.addr, align 8 - %arrayidx22 = getelementptr inbounds i8*, i8** %19, i64 3 - %20 = load i8*, i8** %arrayidx22, align 8 - %call23 = call i32 @atoi(i8* %20) #18 - store i32 %call23, i32* %dim, align 4 - %21 = load i8**, i8*** %argv.addr, align 8 - %arrayidx24 = getelementptr inbounds i8*, i8** %21, i64 4 - %22 = load i8*, i8** %arrayidx24, align 8 - %call25 = call i32 @atoi(i8* %22) #18 - %conv26 = sext i32 %call25 to i64 - store i64 %conv26, i64* %n, align 8 - %23 = load i8**, i8*** %argv.addr, align 8 - %arrayidx27 = getelementptr inbounds i8*, i8** %23, i64 5 - %24 = load i8*, i8** %arrayidx27, align 8 - %call28 = call i32 @atoi(i8* %24) #18 - %conv29 = sext i32 %call28 to i64 - store i64 %conv29, i64* %chunksize, align 8 - %25 = load i8**, i8*** %argv.addr, align 8 - %arrayidx30 = getelementptr inbounds i8*, i8** %25, i64 6 - %26 = load i8*, i8** %arrayidx30, align 8 - %call31 = call i32 @atoi(i8* %26) #18 - %conv32 = sext i32 %call31 to i64 - store i64 %conv32, i64* %clustersize, align 8 - %27 = load i8*, i8** %infilename, align 8 - %28 = load i8**, i8*** %argv.addr, align 8 - %arrayidx33 = getelementptr inbounds i8*, i8** %28, i64 7 - %29 = load i8*, i8** %arrayidx33, align 8 - %call34 = call i8* @strcpy(i8* %27, i8* %29) - %30 = load i8*, i8** %outfilename, align 8 - %31 = load i8**, i8*** %argv.addr, align 8 - %arrayidx35 = getelementptr inbounds i8*, i8** %31, i64 8 - %32 = load i8*, i8** %arrayidx35, align 8 - %call36 = call i8* @strcpy(i8* %30, i8* %32) - %33 = load i8**, i8*** %argv.addr, align 8 - %arrayidx37 = getelementptr inbounds i8*, i8** %33, i64 9 - %34 = load i8*, i8** %arrayidx37, align 8 - %call38 = call i32 @atoi(i8* %34) #18 - store i32 %call38, i32* @_ZL5nproc, align 4 - call void @srand48(i64 1) #2 - %35 = load i64, i64* %n, align 8 - %cmp39 = icmp sgt i64 %35, 0 - br i1 %cmp39, label %if.then40, label %if.else - -if.then40: ; preds = %if.end - %call41 = call i8* @_Znwm(i64 16) #16 - %36 = bitcast i8* %call41 to %class.SimStream* - %37 = load i64, i64* %n, align 8 - invoke void @_ZN9SimStreamC2El(%class.SimStream* %36, i64 %37) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %if.then40 - %38 = bitcast %class.SimStream* %36 to %class.PStream* - store %class.PStream* %38, %class.PStream** %stream, align 8 - br label %if.end45 - -lpad: ; preds = %if.then40 - %39 = landingpad { i8*, i32 } - cleanup - %40 = extractvalue { i8*, i32 } %39, 0 - store i8* %40, i8** %exn.slot, align 8 - %41 = extractvalue { i8*, i32 } %39, 1 - store i32 %41, i32* %ehselector.slot, align 4 - call void @_ZdlPv(i8* %call41) #17 - br label %eh.resume - -if.else: ; preds = %if.end - %call42 = call i8* @_Znwm(i64 16) #16 - %42 = bitcast i8* %call42 to %class.FileStream* - %43 = load i8*, i8** %infilename, align 8 - invoke void @_ZN10FileStreamC2EPc(%class.FileStream* %42, i8* %43) - to label %invoke.cont44 unwind label %lpad43 - -invoke.cont44: ; preds = %if.else - %44 = bitcast %class.FileStream* %42 to %class.PStream* - store %class.PStream* %44, %class.PStream** %stream, align 8 - br label %if.end45 - -lpad43: ; preds = %if.else - %45 = landingpad { i8*, i32 } - cleanup - %46 = extractvalue { i8*, i32 } %45, 0 - store i8* %46, i8** %exn.slot, align 8 - %47 = extractvalue { i8*, i32 } %45, 1 - store i32 %47, i32* %ehselector.slot, align 4 - call void @_ZdlPv(i8* %call42) #17 - br label %eh.resume - -if.end45: ; preds = %invoke.cont44, %invoke.cont - %call46 = call double @_Z7gettimev() - store double %call46, double* %t1, align 8 - store double 0.000000e+00, double* @serial_t, align 8 - store double 0.000000e+00, double* @cpu_to_gpu_t, align 8 - store double 0.000000e+00, double* @gpu_to_cpu_t, align 8 - store double 0.000000e+00, double* @alloc_t, align 8 - store double 0.000000e+00, double* @free_t, align 8 - store double 0.000000e+00, double* @kernel_t, align 8 - store i8 0, i8* @isCoordChanged, align 1 - %48 = load %class.PStream*, %class.PStream** %stream, align 8 - %49 = load i64, i64* %kmin, align 8 - %50 = load i64, i64* %kmax, align 8 - %51 = load i32, i32* %dim, align 4 - %52 = load i64, i64* %chunksize, align 8 - %53 = load i64, i64* %clustersize, align 8 - %54 = load i8*, i8** %outfilename, align 8 - call void @_Z13streamClusterP7PStreamllillPc(%class.PStream* %48, i64 %49, i64 %50, i32 %51, i64 %52, i64 %53, i8* %54) - call void @_Z10freeDevMemv() - call void @_Z11freeHostMemv() - %call47 = call double @_Z7gettimev() - store double %call47, double* %t2, align 8 - %55 = load double, double* %t2, align 8 - %56 = load double, double* %t1, align 8 - %sub = fsub contract double %55, %56 - %call48 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str.28, i64 0, i64 0), double %sub) - %57 = load %class.PStream*, %class.PStream** %stream, align 8 - %isnull = icmp eq %class.PStream* %57, null - br i1 %isnull, label %delete.end, label %delete.notnull - -delete.notnull: ; preds = %if.end45 - %58 = bitcast %class.PStream* %57 to void (%class.PStream*)*** - %vtable = load void (%class.PStream*)**, void (%class.PStream*)*** %58, align 8 - %vfn = getelementptr inbounds void (%class.PStream*)*, void (%class.PStream*)** %vtable, i64 4 - %59 = load void (%class.PStream*)*, void (%class.PStream*)** %vfn, align 8 - call void %59(%class.PStream* %57) - br label %delete.end - -delete.end: ; preds = %delete.notnull, %if.end45 - %60 = load double, double* @time_gain, align 8 - %call49 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.29, i64 0, i64 0), double %60) - %61 = load double, double* @time_gain_dist, align 8 - %call50 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.30, i64 0, i64 0), double %61) - %62 = load double, double* @time_gain_init, align 8 - %call51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.31, i64 0, i64 0), double %62) - %63 = load double, double* @time_select_feasible, align 8 - %call52 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.32, i64 0, i64 0), double %63) - %64 = load double, double* @time_speedy, align 8 - %call53 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.33, i64 0, i64 0), double %64) - %65 = load double, double* @time_shuffle, align 8 - %call54 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.34, i64 0, i64 0), double %65) - %66 = load double, double* @time_local_search, align 8 - %call55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([25 x i8], [25 x i8]* @.str.35, i64 0, i64 0), double %66) - %call56 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.9, i64 0, i64 0)) - %call57 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.36, i64 0, i64 0)) - %67 = load double, double* @serial_t, align 8 - %div = fdiv double %67, 1.000000e+03 - %call58 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.37, i64 0, i64 0), double %div) - %68 = load double, double* @cpu_to_gpu_t, align 8 - %div59 = fdiv double %68, 1.000000e+03 - %call60 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.38, i64 0, i64 0), double %div59) - %69 = load double, double* @gpu_to_cpu_t, align 8 - %div61 = fdiv double %69, 1.000000e+03 - %call62 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([41 x i8], [41 x i8]* @.str.39, i64 0, i64 0), double %div61) - %70 = load double, double* @alloc_t, align 8 - %div63 = fdiv double %70, 1.000000e+03 - %call64 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.40, i64 0, i64 0), double %div63) - %71 = load double, double* @free_t, align 8 - %div65 = fdiv double %71, 1.000000e+03 - %call66 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([22 x i8], [22 x i8]* @.str.41, i64 0, i64 0), double %div65) - %72 = load double, double* @kernel_t, align 8 - %div67 = fdiv double %72, 1.000000e+03 - %call68 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([20 x i8], [20 x i8]* @.str.42, i64 0, i64 0), double %div67) - ret i32 0 - -eh.resume: ; preds = %lpad43, %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val69 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val69 -} - -declare dso_local i32 @cudaSetDevice(i32) #1 - -declare dso_local i32 @fflush(%struct._IO_FILE*) #1 - -; Function Attrs: nounwind readonly -declare dso_local i32 @atoi(i8*) #12 - -declare dso_local i8* @strcpy(i8*, i8*) #1 - -; Function Attrs: nounwind -declare dso_local void @srand48(i64) #7 - -; Function Attrs: nobuiltin -declare dso_local noalias i8* @_Znwm(i64) #9 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9SimStreamC2El(%class.SimStream* %this, i64 %n_) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.SimStream*, align 8 - %n_.addr = alloca i64, align 8 - store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 - store i64 %n_, i64* %n_.addr, align 8 - %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 - %0 = bitcast %class.SimStream* %this1 to %class.PStream* - call void @_ZN7PStreamC2Ev(%class.PStream* %0) #2 - %1 = bitcast %class.SimStream* %this1 to i32 (...)*** - store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*] }, { [7 x i8*] }* @_ZTV9SimStream, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %1, align 8 - %2 = load i64, i64* %n_.addr, align 8 - %n = getelementptr inbounds %class.SimStream, %class.SimStream* %this1, i32 0, i32 1 - store i64 %2, i64* %n, align 8 - ret void -} - -declare dso_local i32 @__gxx_personality_v0(...) - -; Function Attrs: nobuiltin nounwind -declare dso_local void @_ZdlPv(i8*) #10 - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN10FileStreamC2EPc(%class.FileStream* %this, i8* %filename) unnamed_addr #3 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %class.FileStream*, align 8 - %filename.addr = alloca i8*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 - store i8* %filename, i8** %filename.addr, align 8 - %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 - %0 = bitcast %class.FileStream* %this1 to %class.PStream* - call void @_ZN7PStreamC2Ev(%class.PStream* %0) #2 - %1 = bitcast %class.FileStream* %this1 to i32 (...)*** - store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*] }, { [7 x i8*] }* @_ZTV10FileStream, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %1, align 8 - %2 = load i8*, i8** %filename.addr, align 8 - %call = invoke %struct._IO_FILE* @fopen(i8* %2, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.43, i64 0, i64 0)) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %fp = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 - store %struct._IO_FILE* %call, %struct._IO_FILE** %fp, align 8 - %fp2 = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 - %3 = load %struct._IO_FILE*, %struct._IO_FILE** %fp2, align 8 - %cmp = icmp eq %struct._IO_FILE* %3, null - br i1 %cmp, label %if.then, label %if.end - -if.then: ; preds = %invoke.cont - %4 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8 - %5 = load i8*, i8** %filename.addr, align 8 - %call4 = invoke i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.44, i64 0, i64 0), i8* %5) - to label %invoke.cont3 unwind label %lpad - -invoke.cont3: ; preds = %if.then - call void @exit(i32 1) #15 - unreachable - -lpad: ; preds = %if.then, %entry - %6 = landingpad { i8*, i32 } - cleanup - %7 = extractvalue { i8*, i32 } %6, 0 - store i8* %7, i8** %exn.slot, align 8 - %8 = extractvalue { i8*, i32 } %6, 1 - store i32 %8, i32* %ehselector.slot, align 4 - %9 = bitcast %class.FileStream* %this1 to %class.PStream* - invoke void @_ZN7PStreamD2Ev(%class.PStream* %9) - to label %invoke.cont5 unwind label %terminate.lpad - -if.end: ; preds = %invoke.cont - ret void - -invoke.cont5: ; preds = %lpad - br label %eh.resume - -eh.resume: ; preds = %invoke.cont5 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val6 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val6 - -terminate.lpad: ; preds = %lpad - %10 = landingpad { i8*, i32 } - catch i8* null - %11 = extractvalue { i8*, i32 } %10, 0 - call void @__clang_call_terminate(i8* %11) #15 - unreachable -} - -; Function Attrs: nounwind -declare dso_local float @logf(float) #7 - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN7PStreamC2Ev(%class.PStream* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.PStream*, align 8 - store %class.PStream* %this, %class.PStream** %this.addr, align 8 - %this1 = load %class.PStream*, %class.PStream** %this.addr, align 8 - %0 = bitcast %class.PStream* %this1 to i32 (...)*** - store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*] }, { [7 x i8*] }* @_ZTV7PStream, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %0, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i64 @_ZN9SimStream4readEPfii(%class.SimStream* %this, float* %dest, i32 %dim, i32 %num) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.SimStream*, align 8 - %dest.addr = alloca float*, align 8 - %dim.addr = alloca i32, align 4 - %num.addr = alloca i32, align 4 - %count = alloca i64, align 8 - %i = alloca i32, align 4 - %k = alloca i32, align 4 - store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 - store float* %dest, float** %dest.addr, align 8 - store i32 %dim, i32* %dim.addr, align 4 - store i32 %num, i32* %num.addr, align 4 - %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 - store i64 0, i64* %count, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc8, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %num.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %land.rhs, label %land.end - -land.rhs: ; preds = %for.cond - %n = getelementptr inbounds %class.SimStream, %class.SimStream* %this1, i32 0, i32 1 - %2 = load i64, i64* %n, align 8 - %cmp2 = icmp sgt i64 %2, 0 - br label %land.end - -land.end: ; preds = %land.rhs, %for.cond - %3 = phi i1 [ false, %for.cond ], [ %cmp2, %land.rhs ] - br i1 %3, label %for.body, label %for.end10 - -for.body: ; preds = %land.end - store i32 0, i32* %k, align 4 - br label %for.cond3 - -for.cond3: ; preds = %for.inc, %for.body - %4 = load i32, i32* %k, align 4 - %5 = load i32, i32* %dim.addr, align 4 - %cmp4 = icmp slt i32 %4, %5 - br i1 %cmp4, label %for.body5, label %for.end - -for.body5: ; preds = %for.cond3 - %call = call i64 @lrand48() #2 - %conv = sitofp i64 %call to float - %div = fdiv float %conv, 0x41E0000000000000 - %6 = load float*, float** %dest.addr, align 8 - %7 = load i32, i32* %i, align 4 - %8 = load i32, i32* %dim.addr, align 4 - %mul = mul nsw i32 %7, %8 - %9 = load i32, i32* %k, align 4 - %add = add nsw i32 %mul, %9 - %idxprom = sext i32 %add to i64 - %arrayidx = getelementptr inbounds float, float* %6, i64 %idxprom - store float %div, float* %arrayidx, align 4 - br label %for.inc - -for.inc: ; preds = %for.body5 - %10 = load i32, i32* %k, align 4 - %inc = add nsw i32 %10, 1 - store i32 %inc, i32* %k, align 4 - br label %for.cond3 - -for.end: ; preds = %for.cond3 - %n6 = getelementptr inbounds %class.SimStream, %class.SimStream* %this1, i32 0, i32 1 - %11 = load i64, i64* %n6, align 8 - %dec = add nsw i64 %11, -1 - store i64 %dec, i64* %n6, align 8 - %12 = load i64, i64* %count, align 8 - %inc7 = add i64 %12, 1 - store i64 %inc7, i64* %count, align 8 - br label %for.inc8 - -for.inc8: ; preds = %for.end - %13 = load i32, i32* %i, align 4 - %inc9 = add nsw i32 %13, 1 - store i32 %inc9, i32* %i, align 4 - br label %for.cond - -for.end10: ; preds = %land.end - %14 = load i64, i64* %count, align 8 - ret i64 %14 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i32 @_ZN9SimStream6ferrorEv(%class.SimStream* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.SimStream*, align 8 - store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 - %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 - ret i32 0 -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i32 @_ZN9SimStream4feofEv(%class.SimStream* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.SimStream*, align 8 - store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 - %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 - %n = getelementptr inbounds %class.SimStream, %class.SimStream* %this1, i32 0, i32 1 - %0 = load i64, i64* %n, align 8 - %cmp = icmp sle i64 %0, 0 - %conv = zext i1 %cmp to i32 - ret i32 %conv -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN9SimStreamD2Ev(%class.SimStream* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.SimStream*, align 8 - store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 - %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 - %0 = bitcast %class.SimStream* %this1 to %class.PStream* - call void @_ZN7PStreamD2Ev(%class.PStream* %0) - ret void -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN9SimStreamD0Ev(%class.SimStream* %this) unnamed_addr #3 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %class.SimStream*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %class.SimStream* %this, %class.SimStream** %this.addr, align 8 - %this1 = load %class.SimStream*, %class.SimStream** %this.addr, align 8 - invoke void @_ZN9SimStreamD2Ev(%class.SimStream* %this1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %0 = bitcast %class.SimStream* %this1 to i8* - call void @_ZdlPv(i8* %0) #17 - ret void - -lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } - cleanup - %2 = extractvalue { i8*, i32 } %1, 0 - store i8* %2, i8** %exn.slot, align 8 - %3 = extractvalue { i8*, i32 } %1, 1 - store i32 %3, i32* %ehselector.slot, align 4 - %4 = bitcast %class.SimStream* %this1 to i8* - call void @_ZdlPv(i8* %4) #17 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val2 -} - -declare dso_local void @__cxa_pure_virtual() unnamed_addr - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN7PStreamD2Ev(%class.PStream* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.PStream*, align 8 - store %class.PStream* %this, %class.PStream** %this.addr, align 8 - %this1 = load %class.PStream*, %class.PStream** %this.addr, align 8 - ret void -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local void @_ZN7PStreamD0Ev(%class.PStream* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.PStream*, align 8 - store %class.PStream* %this, %class.PStream** %this.addr, align 8 - %this1 = load %class.PStream*, %class.PStream** %this.addr, align 8 - call void @llvm.trap() #15 - unreachable -} - -; Function Attrs: cold noreturn nounwind -declare void @llvm.trap() #13 - -; Function Attrs: noinline noreturn nounwind -define linkonce_odr hidden void @__clang_call_terminate(i8* %0) #14 comdat { - %2 = call i8* @__cxa_begin_catch(i8* %0) #2 - call void @_ZSt9terminatev() #15 - unreachable -} - -declare dso_local i8* @__cxa_begin_catch(i8*) - -declare dso_local void @_ZSt9terminatev() - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local i64 @_ZN10FileStream4readEPfii(%class.FileStream* %this, float* %dest, i32 %dim, i32 %num) unnamed_addr #3 comdat align 2 { -entry: - %this.addr = alloca %class.FileStream*, align 8 - %dest.addr = alloca float*, align 8 - %dim.addr = alloca i32, align 4 - %num.addr = alloca i32, align 4 - store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 - store float* %dest, float** %dest.addr, align 8 - store i32 %dim, i32* %dim.addr, align 4 - store i32 %num, i32* %num.addr, align 4 - %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 - %0 = load float*, float** %dest.addr, align 8 - %1 = bitcast float* %0 to i8* - %2 = load i32, i32* %dim.addr, align 4 - %conv = sext i32 %2 to i64 - %mul = mul i64 4, %conv - %3 = load i32, i32* %num.addr, align 4 - %conv2 = sext i32 %3 to i64 - %fp = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 - %4 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call = call i64 @fread(i8* %1, i64 %mul, i64 %conv2, %struct._IO_FILE* %4) - ret i64 %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i32 @_ZN10FileStream6ferrorEv(%class.FileStream* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.FileStream*, align 8 - store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 - %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 - %fp = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 - %0 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call = call i32 @ferror(%struct._IO_FILE* %0) #2 - ret i32 %call -} - -; Function Attrs: noinline nounwind optnone uwtable -define linkonce_odr dso_local i32 @_ZN10FileStream4feofEv(%class.FileStream* %this) unnamed_addr #6 comdat align 2 { -entry: - %this.addr = alloca %class.FileStream*, align 8 - store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 - %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 - %fp = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 - %0 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call = call i32 @feof(%struct._IO_FILE* %0) #2 - ret i32 %call -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN10FileStreamD2Ev(%class.FileStream* %this) unnamed_addr #3 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %class.FileStream*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 - %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 - %0 = bitcast %class.FileStream* %this1 to i32 (...)*** - store i32 (...)** bitcast (i8** getelementptr inbounds ({ [7 x i8*] }, { [7 x i8*] }* @_ZTV10FileStream, i32 0, inrange i32 0, i32 2) to i32 (...)**), i32 (...)*** %0, align 8 - %call = invoke i32 (i8*, ...) @printf(i8* getelementptr inbounds ([21 x i8], [21 x i8]* @.str.45, i64 0, i64 0)) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %fp = getelementptr inbounds %class.FileStream, %class.FileStream* %this1, i32 0, i32 1 - %1 = load %struct._IO_FILE*, %struct._IO_FILE** %fp, align 8 - %call3 = invoke i32 @fclose(%struct._IO_FILE* %1) - to label %invoke.cont2 unwind label %lpad - -invoke.cont2: ; preds = %invoke.cont - %2 = bitcast %class.FileStream* %this1 to %class.PStream* - call void @_ZN7PStreamD2Ev(%class.PStream* %2) - ret void - -lpad: ; preds = %invoke.cont, %entry - %3 = landingpad { i8*, i32 } - cleanup - %4 = extractvalue { i8*, i32 } %3, 0 - store i8* %4, i8** %exn.slot, align 8 - %5 = extractvalue { i8*, i32 } %3, 1 - store i32 %5, i32* %ehselector.slot, align 4 - %6 = bitcast %class.FileStream* %this1 to %class.PStream* - invoke void @_ZN7PStreamD2Ev(%class.PStream* %6) - to label %invoke.cont4 unwind label %terminate.lpad - -invoke.cont4: ; preds = %lpad - br label %eh.resume - -eh.resume: ; preds = %invoke.cont4 - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val5 - -terminate.lpad: ; preds = %lpad - %7 = landingpad { i8*, i32 } - catch i8* null - %8 = extractvalue { i8*, i32 } %7, 0 - call void @__clang_call_terminate(i8* %8) #15 - unreachable -} - -; Function Attrs: noinline optnone uwtable -define linkonce_odr dso_local void @_ZN10FileStreamD0Ev(%class.FileStream* %this) unnamed_addr #3 comdat align 2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { -entry: - %this.addr = alloca %class.FileStream*, align 8 - %exn.slot = alloca i8* - %ehselector.slot = alloca i32 - store %class.FileStream* %this, %class.FileStream** %this.addr, align 8 - %this1 = load %class.FileStream*, %class.FileStream** %this.addr, align 8 - invoke void @_ZN10FileStreamD2Ev(%class.FileStream* %this1) - to label %invoke.cont unwind label %lpad - -invoke.cont: ; preds = %entry - %0 = bitcast %class.FileStream* %this1 to i8* - call void @_ZdlPv(i8* %0) #17 - ret void - -lpad: ; preds = %entry - %1 = landingpad { i8*, i32 } - cleanup - %2 = extractvalue { i8*, i32 } %1, 0 - store i8* %2, i8** %exn.slot, align 8 - %3 = extractvalue { i8*, i32 } %1, 1 - store i32 %3, i32* %ehselector.slot, align 4 - %4 = bitcast %class.FileStream* %this1 to i8* - call void @_ZdlPv(i8* %4) #17 - br label %eh.resume - -eh.resume: ; preds = %lpad - %exn = load i8*, i8** %exn.slot, align 8 - %sel = load i32, i32* %ehselector.slot, align 4 - %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0 - %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1 - resume { i8*, i32 } %lpad.val2 -} - -declare dso_local i64 @fread(i8*, i64, i64, %struct._IO_FILE*) #1 - -; Function Attrs: nounwind -declare dso_local i32 @ferror(%struct._IO_FILE*) #7 - -; Function Attrs: nounwind -declare dso_local i32 @feof(%struct._IO_FILE*) #7 - -; Function Attrs: noinline uwtable -define internal void @_GLOBAL__sub_I_streamcluster_cuda_cpu.cu() #0 section ".text.startup" { -entry: - call void @__cxx_global_var_init() - ret void -} - -define internal void @__cuda_register_globals(i8** %0) { -entry: - %1 = call i32 @__cudaRegisterFunction(i8** %0, i8* bitcast (void (i32, i32, i64, %struct.Point*, i32, i32, float*, float*, i32*, i8*)* @_Z19kernel_compute_costiilP5PointiiPfS1_PiPb to i8*), i8* getelementptr inbounds ([45 x i8], [45 x i8]* @0, i64 0, i64 0), i8* getelementptr inbounds ([45 x i8], [45 x i8]* @0, i64 0, i64 0), i32 -1, i8* null, i8* null, i8* null, i8* null, i32* null) - ret void -} - -declare dso_local i32 @__cudaRegisterFunction(i8**, i8*, i8*, i8*, i32, i8*, i8*, i8*, i8*, i32*) - -declare dso_local i32 @__cudaRegisterVar(i8**, i8*, i8*, i8*, i32, i32, i32, i32) - -declare dso_local i8** @__cudaRegisterFatBinary(i8*) - -define internal void @__cuda_module_ctor(i8* %0) { -entry: - %1 = call i8** @__cudaRegisterFatBinary(i8* bitcast ({ i32, i32, i8*, i8* }* @__cuda_fatbin_wrapper to i8*)) - store i8** %1, i8*** @__cuda_gpubin_handle, align 8 - call void @__cuda_register_globals(i8** %1) - call void @__cudaRegisterFatBinaryEnd(i8** %1) - %2 = call i32 @atexit(void (i8*)* @__cuda_module_dtor) - ret void -} - -declare dso_local void @__cudaRegisterFatBinaryEnd(i8**) - -declare dso_local void @__cudaUnregisterFatBinary(i8**) - -define internal void @__cuda_module_dtor(i8* %0) { -entry: - %1 = load i8**, i8*** @__cuda_gpubin_handle, align 8 - call void @__cudaUnregisterFatBinary(i8** %1) - ret void -} - -declare dso_local i32 @atexit(void (i8*)*) - -attributes #0 = { noinline uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #2 = { nounwind } -attributes #3 = { noinline optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #4 = { argmemonly nounwind willreturn } -attributes #5 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #6 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #7 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #8 = { nounwind readnone speculatable willreturn } -attributes #9 = { nobuiltin "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #10 = { nobuiltin nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #11 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #12 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #13 = { cold noreturn nounwind } -attributes #14 = { noinline noreturn nounwind } -attributes #15 = { noreturn nounwind } -attributes #16 = { builtin } -attributes #17 = { builtin nounwind } -attributes #18 = { nounwind readonly } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]} -!1 = !{i32 1, !"wchar_size", i32 4} -!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"} diff --git a/examples/streamcluster/streamcluster_cuda_cpu.cu b/examples/streamcluster/streamcluster_cuda_cpu.cu deleted file mode 100644 index 55486f0..0000000 --- a/examples/streamcluster/streamcluster_cuda_cpu.cu +++ /dev/null @@ -1,963 +0,0 @@ -/*********************************************** - streamcluster.cpp - : original source code of streamcluster with minor - modification regarding function calls - - - original code from PARSEC Benchmark Suite - - parallelization with CUDA API has been applied by - - Sang-Ha (a.k.a Shawn) Lee - sl4ge@virginia.edu - University of Virginia - Department of Electrical and Computer Engineering - Department of Computer Science - -***********************************************/ - -#include "streamcluster_cuda.cu" -#include "streamcluster_header.h" - -using namespace std; - -#define MAXNAMESIZE 1024 // max filename length -#define SEED 1 -#define SP 1 // number of repetitions of speedy must be >=1 -#define ITER 3 // iterate ITER* k log k times; ITER >= 1 -//#define PRINTINFO // Enables printing output -#define PROFILE // Enables timing info -//#define ENABLE_THREADS // Enables parallel execution -//#define INSERT_WASTE // Enables waste computation in -// dist function -#define CACHE_LINE 512 // cache line in byte - -// GLOBAL -static bool *switch_membership; // whether to switch membership in pgain -static bool *is_center; // whether a point is a center -static int *center_table; // index table of centers -static int nproc; //# of threads -bool isCoordChanged; - -// GPU Timing Info -double serial_t; -double cpu_to_gpu_t; -double gpu_to_cpu_t; -double alloc_t; -double kernel_t; -double free_t; - -// instrumentation code -#ifdef PROFILE -double time_local_search; -double time_speedy; -double time_select_feasible; -double time_gain; -double time_shuffle; -double time_gain_dist; -double time_gain_init; -#endif - -void inttofile(int data, char *filename) { - FILE *fp = fopen(filename, "w"); - fprintf(fp, "%d ", data); - fclose(fp); -} - -double gettime() { - struct timeval t; - gettimeofday(&t, NULL); - return t.tv_sec + t.tv_usec * 1e-6; -} - -int isIdentical(float *i, float *j, int D) { - // tells whether two points of D dimensions are identical - - int a = 0; - int equal = 1; - - while (equal && a < D) { - if (i[a] != j[a]) - equal = 0; - else - a++; - } - if (equal) - return 1; - else - return 0; -} - -/* comparator for floating point numbers */ -static int floatcomp(const void *i, const void *j) { - float a, b; - a = *(float *)(i); - b = *(float *)(j); - if (a > b) - return (1); - if (a < b) - return (-1); - return (0); -} - -/* shuffle points into random order */ -void shuffle(Points *points) { -#ifdef PROFILE - double t1 = gettime(); -#endif - long i, j; - Point temp; - for (i = 0; i < points->num - 1; i++) { - j = (lrand48() % (points->num - i)) + i; - temp = points->p[i]; - points->p[i] = points->p[j]; - points->p[j] = temp; - } -#ifdef PROFILE - double t2 = gettime(); - time_shuffle += t2 - t1; -#endif -} - -/* shuffle an array of integers */ -void intshuffle(int *intarray, int length) { -#ifdef PROFILE - double t1 = gettime(); -#endif - long i, j; - int temp; - for (i = 0; i < length; i++) { - j = (lrand48() % (length - i)) + i; - temp = intarray[i]; - intarray[i] = intarray[j]; - intarray[j] = temp; - } -#ifdef PROFILE - double t2 = gettime(); - time_shuffle += t2 - t1; -#endif -} - -#ifdef INSERT_WASTE -float waste(float s) { - for (int i = 0; i < 4; i++) { - s += pow(s, 0.78); - } - return s; -} -#endif - -/* compute Euclidean distance squared between two points */ -float dist(Point p1, Point p2, int dim) { - int i; - float result = 0.0; - for (i = 0; i < dim; i++) - result += (p1.coord[i] - p2.coord[i]) * (p1.coord[i] - p2.coord[i]); -#ifdef INSERT_WASTE - float s = waste(result); - result += s; - result -= s; -#endif - return (result); -} - -/* run speedy on the points, return total cost of solution */ -float pspeedy(Points *points, float z, long *kcenter, int pid, - pthread_barrier_t *barrier) { -#ifdef PROFILE - double t1 = gettime(); -#endif - -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - // my block - long bsize = points->num / nproc; - long k1 = bsize * pid; - long k2 = k1 + bsize; - if (pid == nproc - 1) - k2 = points->num; - - static float totalcost; - - static bool open = false; - static float *costs; // cost for each thread. - static int i; - -#ifdef ENABLE_THREADS - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; - static pthread_cond_t cond = PTHREAD_COND_INITIALIZER; -#endif - -#ifdef PRINTINFO - if (pid == 0) { - fprintf(stderr, "Speedy: facility cost %lf\n", z); - } -#endif - - /* create center at first point, send it to itself */ - for (int k = k1; k < k2; k++) { - float distance = dist(points->p[k], points->p[0], points->dim); - points->p[k].cost = distance * points->p[k].weight; - points->p[k].assign = 0; - } - - if (pid == 0) { - *kcenter = 1; - costs = (float *)malloc(sizeof(float) * nproc); - } - - if (pid != - 0) { // we are not the master threads. we wait until a center is opened. - while (1) { -#ifdef ENABLE_THREADS - pthread_mutex_lock(&mutex); - while (!open) - pthread_cond_wait(&cond, &mutex); - pthread_mutex_unlock(&mutex); -#endif - if (i >= points->num) - break; - for (int k = k1; k < k2; k++) { - float distance = dist(points->p[i], points->p[k], points->dim); - if (distance * points->p[k].weight < points->p[k].cost) { - points->p[k].cost = distance * points->p[k].weight; - points->p[k].assign = i; - } - } -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); - pthread_barrier_wait(barrier); -#endif - } - } else { // I am the master thread. I decide whether to open a center and - // notify others if so. - for (i = 1; i < points->num; i++) { - bool to_open = - ((float)lrand48() / (float)INT_MAX) < (points->p[i].cost / z); - if (to_open) { - (*kcenter)++; -#ifdef ENABLE_THREADS - pthread_mutex_lock(&mutex); -#endif - open = true; -#ifdef ENABLE_THREADS - pthread_mutex_unlock(&mutex); - pthread_cond_broadcast(&cond); -#endif - for (int k = k1; k < k2; k++) { - float distance = dist(points->p[i], points->p[k], points->dim); - if (distance * points->p[k].weight < points->p[k].cost) { - points->p[k].cost = distance * points->p[k].weight; - points->p[k].assign = i; - } - } -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - open = false; -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - } - } -#ifdef ENABLE_THREADS - pthread_mutex_lock(&mutex); -#endif - open = true; -#ifdef ENABLE_THREADS - pthread_mutex_unlock(&mutex); - pthread_cond_broadcast(&cond); -#endif - } -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - open = false; - float mytotal = 0; - for (int k = k1; k < k2; k++) { - mytotal += points->p[k].cost; - } - costs[pid] = mytotal; -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - // aggregate costs from each thread - if (pid == 0) { - totalcost = z * (*kcenter); - for (int i = 0; i < nproc; i++) { - totalcost += costs[i]; - } - free(costs); - } -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - -#ifdef PRINTINFO - if (pid == 0) { - fprintf(stderr, "Speedy opened %d facilities for total cost %lf\n", - *kcenter, totalcost); - fprintf(stderr, "Distance Cost %lf\n", totalcost - z * (*kcenter)); - } -#endif - -#ifdef PROFILE - double t2 = gettime(); - if (pid == 0) { - time_speedy += t2 - t1; - } -#endif - return (totalcost); -} - -/* facility location on the points using local search */ -/* z is the facility cost, returns the total cost and # of centers */ -/* assumes we are seeded with a reasonable solution */ -/* cost should represent this solution's cost */ -/* halt if there is < e improvement after iter calls to gain */ -/* feasible is an array of numfeasible points which may be centers */ - -float pFL(Points *points, int *feasible, int numfeasible, float z, long *k, - int kmax, float cost, long iter, float e, int pid, - pthread_barrier_t *barrier) { -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - long i; - long x; - float change; - long numberOfPoints; - - change = cost; - /* continue until we run iter iterations without improvement */ - /* stop instead if improvement is less than e */ - while (change / cost > 1.0 * e) { - change = 0.0; - numberOfPoints = points->num; - /* randomize order in which centers are considered */ - - if (pid == 0) { - intshuffle(feasible, numfeasible); - } -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - - for (i = 0; i < iter; i++) { - x = i % numfeasible; - change += - pgain(feasible[x], points, z, k, kmax, is_center, center_table, - switch_membership, isCoordChanged, &serial_t, &cpu_to_gpu_t, - &gpu_to_cpu_t, &alloc_t, &kernel_t, &free_t); - } - - cost -= change; -#ifdef PRINTINFO - if (pid == 0) { - fprintf(stderr, "%d centers, cost %lf, total distance %lf\n", *k, cost, - cost - z * (*k)); - } -#endif -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - } - return (cost); -} - -int selectfeasible_fast(Points *points, int **feasible, int kmin, int pid, - pthread_barrier_t *barrier) { -#ifdef PROFILE - double t1 = gettime(); -#endif - - int numfeasible = points->num; - if (numfeasible > (ITER * kmin * log((float)kmin))) - numfeasible = (int)(ITER * kmin * log((float)kmin)); - *feasible = (int *)malloc(numfeasible * sizeof(int)); - - float *accumweight; - float totalweight; - - /* - Calcuate my block. - For now this routine does not seem to be the bottleneck, so it is not - parallelized. When necessary, this can be parallelized by setting k1 and k2 - to proper values and calling this routine from all threads ( it is called - only by thread 0 for now ). Note that when parallelized, the randomization - might not be the same and it might not be difficult to measure the parallel - speed-up for the whole program. - */ - // long bsize = numfeasible; - long k1 = 0; - long k2 = numfeasible; - - float w; - int l, r, k; - - /* not many points, all will be feasible */ - if (numfeasible == points->num) { - for (int i = k1; i < k2; i++) - (*feasible)[i] = i; - return numfeasible; - } - - accumweight = (float *)malloc(sizeof(float) * points->num); - accumweight[0] = points->p[0].weight; - totalweight = 0; - for (int i = 1; i < points->num; i++) { - accumweight[i] = accumweight[i - 1] + points->p[i].weight; - } - totalweight = accumweight[points->num - 1]; - - for (int i = k1; i < k2; i++) { - w = (lrand48() / (float)INT_MAX) * totalweight; - // binary search - l = 0; - r = points->num - 1; - if (accumweight[0] > w) { - (*feasible)[i] = 0; - continue; - } - while (l + 1 < r) { - k = (l + r) / 2; - if (accumweight[k] > w) { - r = k; - } else { - l = k; - } - } - (*feasible)[i] = r; - } - - free(accumweight); - -#ifdef PROFILE - double t2 = gettime(); - time_select_feasible += t2 - t1; -#endif - return numfeasible; -} - -/* compute approximate kmedian on the points */ -float pkmedian(Points *points, long kmin, long kmax, long *kfinal, int pid, - pthread_barrier_t *barrier) { - int i; - float cost; - float lastcost; - float hiz, loz, z; - - static long k; - static int *feasible; - static int numfeasible; - static float *hizs; - - if (pid == 0) - hizs = (float *)calloc(nproc, sizeof(float)); - hiz = loz = 0.0; - long numberOfPoints = points->num; - long ptDimension = points->dim; - - // my block - long bsize = points->num / nproc; - long k1 = bsize * pid; - long k2 = k1 + bsize; - if (pid == nproc - 1) - k2 = points->num; - -#ifdef PRINTINFO - if (pid == 0) { - printf("Starting Kmedian procedure\n"); - printf("%i points in %i dimensions\n", numberOfPoints, ptDimension); - } -#endif - -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - - float myhiz = 0; - for (long kk = k1; kk < k2; kk++) { - myhiz += - dist(points->p[kk], points->p[0], ptDimension) * points->p[kk].weight; - } - hizs[pid] = myhiz; - -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - - for (int i = 0; i < nproc; i++) { - hiz += hizs[i]; - } - - loz = 0.0; - z = (hiz + loz) / 2.0; - /* NEW: Check whether more centers than points! */ - if (points->num <= kmax) { - /* just return all points as facilities */ - for (long kk = k1; kk < k2; kk++) { - points->p[kk].assign = kk; - points->p[kk].cost = 0; - } - cost = 0; - if (pid == 0) { - free(hizs); - *kfinal = k; - } - return cost; - } - - if (pid == 0) - shuffle(points); - cost = pspeedy(points, z, &k, pid, barrier); - -#ifdef PRINTINFO - if (pid == 0) - printf("thread %d: Finished first call to speedy, cost=%lf, k=%i\n", pid, - cost, k); -#endif - i = 0; - /* give speedy SP chances to get at least kmin/2 facilities */ - while ((k < kmin) && (i < SP)) { - cost = pspeedy(points, z, &k, pid, barrier); - i++; - } - -#ifdef PRINTINFO - if (pid == 0) - printf("thread %d: second call to speedy, cost=%lf, k=%d\n", pid, cost, k); -#endif - /* if still not enough facilities, assume z is too high */ - while (k < kmin) { -#ifdef PRINTINFO - if (pid == 0) { - printf("%lf %lf\n", loz, hiz); - printf("Speedy indicates we should try lower z\n"); - } -#endif - if (i >= SP) { - hiz = z; - z = (hiz + loz) / 2.0; - i = 0; - } - if (pid == 0) - shuffle(points); - cost = pspeedy(points, z, &k, pid, barrier); - i++; - } - - /* now we begin the binary search for real */ - /* must designate some points as feasible centers */ - /* this creates more consistancy between FL runs */ - /* helps to guarantee correct # of centers at the end */ - - if (pid == 0) { - numfeasible = selectfeasible_fast(points, &feasible, kmin, pid, barrier); - for (int i = 0; i < points->num; i++) { - is_center[points->p[i].assign] = true; - } - } - -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - - while (1) { - -#ifdef PRINTINFO - if (pid == 0) { - printf("loz = %lf, hiz = %lf\n", loz, hiz); - printf("Running Local Search...\n"); - } -#endif - /* first get a rough estimate on the FL solution */ - // pthread_barrier_wait(barrier); - lastcost = cost; - cost = pFL(points, feasible, numfeasible, z, &k, kmax, cost, - (long)(ITER * kmax * log((float)kmax)), 0.1, pid, barrier); - - /* if number of centers seems good, try a more accurate FL */ - if (((k <= (1.1) * kmax) && (k >= (0.9) * kmin)) || - ((k <= kmax + 2) && (k >= kmin - 2))) { - -#ifdef PRINTINFO - if (pid == 0) { - printf("Trying a more accurate local search...\n"); - } -#endif - /* may need to run a little longer here before halting without - improvement */ - - cost = pFL(points, feasible, numfeasible, z, &k, kmax, cost, - (long)(ITER * kmax * log((float)kmax)), 0.001, pid, barrier); - } - - if (k > kmax) { - /* facilities too cheap */ - /* increase facility cost and up the cost accordingly */ - loz = z; - z = (hiz + loz) / 2.0; - cost += (z - loz) * k; - } - if (k < kmin) { - /* facilities too expensive */ - /* decrease facility cost and reduce the cost accordingly */ - hiz = z; - z = (hiz + loz) / 2.0; - cost += (z - hiz) * k; - } - - /* if k is good, return the result */ - /* if we're stuck, just give up and return what we have */ - if (((k <= kmax) && (k >= kmin)) || ((loz >= (0.999) * hiz))) { - break; - } -#ifdef ENABLE_THREADS - pthread_barrier_wait(barrier); -#endif - } - - // clean up... - if (pid == 0) { - free(feasible); - free(hizs); - *kfinal = k; - } - - return cost; -} - -/* compute the means for the k clusters */ -int contcenters(Points *points) { - long i, ii; - float relweight; - - for (i = 0; i < points->num; i++) { - /* compute relative weight of this point to the cluster */ - if (points->p[i].assign != i) { - relweight = points->p[points->p[i].assign].weight + points->p[i].weight; - relweight = points->p[i].weight / relweight; - for (ii = 0; ii < points->dim; ii++) { - points->p[points->p[i].assign].coord[ii] *= 1.0 - relweight; - points->p[points->p[i].assign].coord[ii] += - points->p[i].coord[ii] * relweight; - } - points->p[points->p[i].assign].weight += points->p[i].weight; - } - } - - return 0; -} - -/* copy centers from points to centers */ -void copycenters(Points *points, Points *centers, long *centerIDs, - long offset) { - long i; - long k; - - bool *is_a_median = (bool *)calloc(points->num, sizeof(bool)); - - /* mark the centers */ - for (i = 0; i < points->num; i++) { - is_a_median[points->p[i].assign] = 1; - } - - k = centers->num; - - /* count how many */ - for (i = 0; i < points->num; i++) { - if (is_a_median[i]) { - memcpy(centers->p[k].coord, points->p[i].coord, - points->dim * sizeof(float)); - centers->p[k].weight = points->p[i].weight; - centerIDs[k] = i + offset; - k++; - } - } - - centers->num = k; - - free(is_a_median); -} - -void *localSearchSub(void *arg_) { - pkmedian_arg_t *arg = (pkmedian_arg_t *)arg_; - pkmedian(arg->points, arg->kmin, arg->kmax, arg->kfinal, arg->pid, - arg->barrier); - - return NULL; -} - -void localSearch(Points *points, long kmin, long kmax, long *kfinal) { -#ifdef PROFILE - double t1 = gettime(); -#endif - - pthread_barrier_t barrier; -#ifdef ENABLE_THREADS - pthread_barrier_init(&barrier, NULL, nproc); -#endif - pthread_t *threads = new pthread_t[nproc]; - pkmedian_arg_t *arg = new pkmedian_arg_t[nproc]; - - for (int i = 0; i < nproc; i++) { - arg[i].points = points; - arg[i].kmin = kmin; - arg[i].kmax = kmax; - arg[i].pid = i; - arg[i].kfinal = kfinal; - - arg[i].barrier = &barrier; -#ifdef ENABLE_THREADS - pthread_create(threads + i, NULL, localSearchSub, (void *)&arg[i]); -#else - localSearchSub(&arg[0]); -#endif - } - - for (int i = 0; i < nproc; i++) { -#ifdef ENABLE_THREADS - pthread_join(threads[i], NULL); -#endif - } - - delete[] threads; - delete[] arg; -#ifdef ENABLE_THREADS - pthread_barrier_destroy(&barrier); -#endif - -#ifdef PROFILE - double t2 = gettime(); - time_local_search += t2 - t1; -#endif -} - -void outcenterIDs(Points *centers, long *centerIDs, char *outfile) { - FILE *fp = fopen(outfile, "w"); - if (fp == NULL) { - fprintf(stderr, "error opening %s\n", outfile); - exit(1); - } - int *is_a_median = (int *)calloc(sizeof(int), centers->num); - for (int i = 0; i < centers->num; i++) { - is_a_median[centers->p[i].assign] = 1; - } - - for (int i = 0; i < centers->num; i++) { - if (is_a_median[i]) { - fprintf(fp, "%u\n", centerIDs[i]); - fprintf(fp, "%lf\n", centers->p[i].weight); - for (int k = 0; k < centers->dim; k++) { - fprintf(fp, "%lf ", centers->p[i].coord[k]); - } - fprintf(fp, "\n\n"); - } - } - fclose(fp); -} - -void streamCluster(PStream *stream, long kmin, long kmax, int dim, - long chunksize, long centersize, char *outfile) { - float *block = (float *)malloc(chunksize * dim * sizeof(float)); - float *centerBlock = (float *)malloc(centersize * dim * sizeof(float)); - long *centerIDs = (long *)malloc(centersize * dim * sizeof(long)); - - if (block == NULL) { - fprintf(stderr, "not enough memory for a chunk!\n"); - exit(1); - } - - Points points; - points.dim = dim; - points.num = chunksize; - points.p = (Point *)malloc(chunksize * sizeof(Point)); - for (int i = 0; i < chunksize; i++) { - points.p[i].coord = &block[i * dim]; - } - - Points centers; - centers.dim = dim; - centers.p = (Point *)malloc(centersize * sizeof(Point)); - centers.num = 0; - - for (int i = 0; i < centersize; i++) { - centers.p[i].coord = ¢erBlock[i * dim]; - centers.p[i].weight = 1.0; - } - - long IDoffset = 0; - long kfinal; - while (1) { - - size_t numRead = stream->read(block, dim, chunksize); - fprintf(stderr, "read %d points\n", numRead); - - if (stream->ferror() || - numRead < (unsigned int)chunksize && !stream->feof()) { - fprintf(stderr, "error reading data!\n"); - exit(1); - } - - points.num = numRead; - for (int i = 0; i < points.num; i++) { - points.p[i].weight = 1.0; - } - - switch_membership = (bool *)malloc(points.num * sizeof(bool)); - is_center = (bool *)calloc(points.num, sizeof(bool)); - center_table = (int *)malloc(points.num * sizeof(int)); - - localSearch(&points, kmin, kmax, &kfinal); - - fprintf(stderr, "finish local search\n"); - - contcenters(&points); - isCoordChanged = true; - - if (kfinal + centers.num > centersize) { - // here we don't handle the situation where # of centers gets too large. - fprintf(stderr, "oops! no more space for centers\n"); - exit(1); - } - -#ifdef PRINTINFO - printf("finish cont center\n"); -#endif - - copycenters(&points, ¢ers, centerIDs, IDoffset); - IDoffset += numRead; - -#ifdef PRINTINFO - printf("finish copy centers\n"); -#endif - - free(is_center); - free(switch_membership); - free(center_table); - - if (stream->feof()) { - break; - } - } - - // finally cluster all temp centers - switch_membership = (bool *)malloc(centers.num * sizeof(bool)); - is_center = (bool *)calloc(centers.num, sizeof(bool)); - center_table = (int *)malloc(centers.num * sizeof(int)); - - localSearch(¢ers, kmin, kmax, &kfinal); - contcenters(¢ers); - outcenterIDs(¢ers, centerIDs, outfile); -} - -int main(int argc, char **argv) { - cudaSetDevice(0); - char *outfilename = new char[MAXNAMESIZE]; - char *infilename = new char[MAXNAMESIZE]; - long kmin, kmax, n, chunksize, clustersize; - int dim; -#ifdef PARSEC_VERSION -#define __PARSEC_STRING(x) #x -#define __PARSEC_XSTRING(x) __PARSEC_STRING(x) - printf( - "PARSEC Benchmark Suite Version "__PARSEC_XSTRING(PARSEC_VERSION) "\n"); - fflush(NULL); -#else - printf("PARSEC Benchmark Suite\n"); - fflush(NULL); -#endif // PARSEC_VERSION -#ifdef ENABLE_PARSEC_HOOKS - __parsec_bench_begin(__parsec_streamcluster); -#endif - - if (argc < 10) { - fprintf(stderr, - "usage: %s k1 k2 d n chunksize clustersize infile outfile nproc\n", - argv[0]); - fprintf(stderr, " k1: Min. number of centers allowed\n"); - fprintf(stderr, " k2: Max. number of centers allowed\n"); - fprintf(stderr, " d: Dimension of each data point\n"); - fprintf(stderr, " n: Number of data points\n"); - fprintf(stderr, - " chunksize: Number of data points to handle per step\n"); - fprintf(stderr, " clustersize: Maximum number of intermediate centers\n"); - fprintf(stderr, " infile: Input file (if n<=0)\n"); - fprintf(stderr, " outfile: Output file\n"); - fprintf(stderr, " nproc: Number of threads to use\n"); - fprintf(stderr, "\n"); - fprintf(stderr, "if n > 0, points will be randomly generated instead of " - "reading from infile.\n"); - exit(1); - } - kmin = atoi(argv[1]); - kmax = atoi(argv[2]); - dim = atoi(argv[3]); - n = atoi(argv[4]); - chunksize = atoi(argv[5]); - clustersize = atoi(argv[6]); - strcpy(infilename, argv[7]); - strcpy(outfilename, argv[8]); - nproc = atoi(argv[9]); - - srand48(SEED); - PStream *stream; - if (n > 0) { - stream = new SimStream(n); - } else { - stream = new FileStream(infilename); - } - - double t1 = gettime(); - -#ifdef ENABLE_PARSEC_HOOKS - __parsec_roi_begin(); -#endif - - serial_t = 0.0; - cpu_to_gpu_t = 0.0; - gpu_to_cpu_t = 0.0; - alloc_t = 0.0; - free_t = 0.0; - kernel_t = 0.0; - - isCoordChanged = false; - - streamCluster(stream, kmin, kmax, dim, chunksize, clustersize, outfilename); - - freeDevMem(); - freeHostMem(); - -#ifdef ENABLE_PARSEC_HOOKS - __parsec_roi_end(); -#endif - - double t2 = gettime(); - - printf("time = %lfs\n", t2 - t1); - - delete stream; - -#ifdef PROFILE - printf("time pgain = %lfs\n", time_gain); - printf("time pgain_dist = %lfs\n", time_gain_dist); - printf("time pgain_init = %lfs\n", time_gain_init); - printf("time pselect = %lfs\n", time_select_feasible); - printf("time pspeedy = %lfs\n", time_speedy); - printf("time pshuffle = %lfs\n", time_shuffle); - printf("time localSearch = %lfs\n", time_local_search); - printf("\n\n"); - printf("====CUDA Timing info (pgain)====\n"); - printf("time serial = %lfs\n", serial_t / 1000); - printf("time CPU to GPU memory copy = %lfs\n", cpu_to_gpu_t / 1000); - printf("time GPU to CPU memory copy back = %lfs\n", gpu_to_cpu_t / 1000); - printf("time GPU malloc = %lfs\n", alloc_t / 1000); - printf("time GPU free = %lfs\n", free_t / 1000); - printf("time kernel = %lfs\n", kernel_t / 1000); -#endif - -#ifdef ENABLE_PARSEC_HOOKS - __parsec_bench_end(); -#endif - - return 0; -} diff --git a/examples/streamcluster/streamcluster_header.h b/examples/streamcluster/streamcluster_header.h deleted file mode 100644 index cc9a240..0000000 --- a/examples/streamcluster/streamcluster_header.h +++ /dev/null @@ -1,143 +0,0 @@ -/************************************************ - streamcluster_cuda_header.cu - : header file to streamcluster - - - original code from PARSEC Benchmark Suite - - parallelization with CUDA API has been applied by - - Sang-Ha (a.k.a Shawn) Lee - sl4ge@virginia.edu - University of Virginia - Department of Electrical and Computer Engineering - Department of Computer Science - -***********************************************/ - -#ifndef STREAMCLUSTER_CUDA_HEADER_CU -#define STREAMCLUSTER_CUDA_HEADER_CU - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include - -#ifdef ENABLE_PARSEC_HOOKS -#include -#endif - -using namespace std; - -/* this structure represents a point */ -/* these will be passed around to avoid copying coordinates */ -typedef struct { - float weight; - float *coord; - long assign; /* number of point where this one is assigned */ - float cost; /* cost of that assignment, weight*distance */ -} Point; - -/* this is the array of points */ -typedef struct { - long num; /* number of points; may not be N if this is a sample */ - int dim; /* dimensionality */ - Point *p; /* the array itself */ -} Points; - -struct pkmedian_arg_t { - Points *points; - long kmin; - long kmax; - long *kfinal; - int pid; - pthread_barrier_t *barrier; -}; - -class PStream { -public: - virtual size_t read(float *dest, int dim, int num) = 0; - virtual int ferror() = 0; - virtual int feof() = 0; - virtual ~PStream() {} -}; - -// synthetic stream -class SimStream : public PStream { -public: - SimStream(long n_) { n = n_; } - size_t read(float *dest, int dim, int num) { - size_t count = 0; - for (int i = 0; i < num && n > 0; i++) { - for (int k = 0; k < dim; k++) { - dest[i * dim + k] = lrand48() / (float)INT_MAX; - } - n--; - count++; - } - return count; - } - int ferror() { return 0; } - int feof() { return n <= 0; } - ~SimStream() {} - -private: - long n; -}; - -class FileStream : public PStream { -public: - FileStream(char *filename) { - fp = fopen(filename, "rb"); - if (fp == NULL) { - fprintf(stderr, "error opening file %s\n.", filename); - exit(1); - } - } - size_t read(float *dest, int dim, int num) { - return std::fread(dest, sizeof(float) * dim, num, fp); - } - int ferror() { return std::ferror(fp); } - int feof() { return std::feof(fp); } - ~FileStream() { - printf("closing file stream\n"); - fclose(fp); - } - -private: - FILE *fp; -}; - -/* function prototypes */ -double gettime(); -int isIdentical(float *, float *, int); -// static int floatcomp(const void*, const void*); -void shuffle(Points *); -void intshuffle(int *, int); -float waste(float); -float dist(Point, Point, int); -float pspeedy(Points *, float, long, int, pthread_barrier_t *); -float pgain_old(long, Points *, float, long int *, int, pthread_barrier_t *); -float pFL(Points *, int *, int, float, long *, float, long, float, int, - pthread_barrier_t *); -int selectfeasible_fast(Points *, int **, int, int, pthread_barrier_t *); -float pkmedian(Points *, long, long, long *, int, pthread_barrier_t *); -int contcenters(Points *); -void copycenters(Points *, Points *, long *, long); -void *localSearchSub(void *); -void localSearch(Points *, long, long, long *); -void outcenterIDs(Points *, long *, char *); -void streamCluster(PStream *, long, long, int, long, long, char *); -float pgain(long, Points *, float, long int *, int, bool *, int *, bool *, bool, - double *, double *, double *, double *, double *, double *); -void allocDevMem(int, int, int); -void allocHostMem(int, int, int); -void freeDevMem(); -void freeHostMem(); - -#endif