CuPBoP/examples/lud/lud-host-x86_64-unknown-lin...

327 lines
17 KiB
LLVM

; ModuleID = 'lud-host-x86_64-unknown-linux-gnu.bc'
source_filename = "cuda/lud.cu"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
%struct.option = type { i8*, i32, i32*, i32 }
%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
%struct.__stopwatch_t = type { %struct.timeval, %struct.timeval }
%struct.timeval = type { i64, i64 }
@.str = private unnamed_addr constant [29 x i8] c"WG size of kernel = %d X %d\0A\00", align 1
@.str.1 = private unnamed_addr constant [8 x i8] c"::vs:i:\00", align 1
@_ZL12long_options = internal global [4 x %struct.option] [%struct.option { i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.15, i32 0, i32 0), i32 1, i32* null, i32 105 }, %struct.option { i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.16, i32 0, i32 0), i32 1, i32* null, i32 115 }, %struct.option { i8* getelementptr inbounds ([7 x i8], [7 x i8]* @.str.17, i32 0, i32 0), i32 0, i32* null, i32 118 }, %struct.option zeroinitializer], align 16
@optarg = external dso_local global i8*, align 8
@_ZL9do_verify = internal global i32 0, align 4
@.str.2 = private unnamed_addr constant [44 x i8] c"Generate input matrix internally, size =%d\0A\00", align 1
@stderr = external dso_local global %struct._IO_FILE*, align 8
@.str.3 = private unnamed_addr constant [16 x i8] c"invalid option\0A\00", align 1
@.str.4 = private unnamed_addr constant [18 x i8] c"missing argument\0A\00", align 1
@.str.5 = private unnamed_addr constant [47 x i8] c"Usage: %s [-v] [-s matrix_size|-i input_file]\0A\00", align 1
@optind = external dso_local global i32, align 4
@.str.6 = private unnamed_addr constant [29 x i8] c"Reading matrix from file %s\0A\00", align 1
@.str.7 = private unnamed_addr constant [34 x i8] c"error create matrix from file %s\0A\00", align 1
@.str.8 = private unnamed_addr constant [36 x i8] c"Creating matrix internally size=%d\0A\00", align 1
@.str.9 = private unnamed_addr constant [40 x i8] c"error create matrix internally size=%d\0A\00", align 1
@.str.10 = private unnamed_addr constant [26 x i8] c"No input file specified!\0A\00", align 1
@.str.11 = private unnamed_addr constant [12 x i8] c"Before LUD\0A\00", align 1
@.str.12 = private unnamed_addr constant [24 x i8] c"Time consumed(ms): %lf\0A\00", align 1
@.str.13 = private unnamed_addr constant [11 x i8] c"After LUD\0A\00", align 1
@.str.14 = private unnamed_addr constant [15 x i8] c">>>Verify<<<<\0A\00", align 1
@.str.15 = private unnamed_addr constant [6 x i8] c"input\00", align 1
@.str.16 = private unnamed_addr constant [5 x i8] c"size\00", align 1
@.str.17 = private unnamed_addr constant [7 x i8] c"verify\00", align 1
; Function Attrs: noinline norecurse optnone uwtable
define dso_local i32 @main(i32 %argc, i8** %argv) #0 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
%matrix_dim = alloca i32, align 4
%opt = alloca i32, align 4
%option_index = alloca i32, align 4
%ret = alloca i32, align 4
%input_file = alloca i8*, align 8
%m = alloca float*, align 8
%d_m = alloca float*, align 8
%mm = alloca float*, align 8
%sw = alloca %struct.__stopwatch_t, align 8
store i32 0, i32* %retval, align 4
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%call = call i32 @cudaSetDevice(i32 0)
%call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str, i64 0, i64 0), i32 16, i32 16)
store i32 32, i32* %matrix_dim, align 4
store i32 0, i32* %option_index, align 4
store i8* null, i8** %input_file, align 8
br label %while.cond
while.cond: ; preds = %sw.epilog, %entry
%0 = load i32, i32* %argc.addr, align 4
%1 = load i8**, i8*** %argv.addr, align 8
%call2 = call i32 @getopt_long(i32 %0, i8** %1, i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str.1, i64 0, i64 0), %struct.option* getelementptr inbounds ([4 x %struct.option], [4 x %struct.option]* @_ZL12long_options, i64 0, i64 0), i32* %option_index) #5
store i32 %call2, i32* %opt, align 4
%cmp = icmp ne i32 %call2, -1
br i1 %cmp, label %while.body, label %while.end
while.body: ; preds = %while.cond
%2 = load i32, i32* %opt, align 4
switch i32 %2, label %sw.default [
i32 105, label %sw.bb
i32 118, label %sw.bb3
i32 115, label %sw.bb4
i32 63, label %sw.bb7
i32 58, label %sw.bb9
]
sw.bb: ; preds = %while.body
%3 = load i8*, i8** @optarg, align 8
store i8* %3, i8** %input_file, align 8
br label %sw.epilog
sw.bb3: ; preds = %while.body
store i32 1, i32* @_ZL9do_verify, align 4
br label %sw.epilog
sw.bb4: ; preds = %while.body
%4 = load i8*, i8** @optarg, align 8
%call5 = call i32 @atoi(i8* %4) #6
store i32 %call5, i32* %matrix_dim, align 4
%5 = load i32, i32* %matrix_dim, align 4
%call6 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([44 x i8], [44 x i8]* @.str.2, i64 0, i64 0), i32 %5)
br label %sw.epilog
sw.bb7: ; preds = %while.body
%6 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%call8 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %6, i8* getelementptr inbounds ([16 x i8], [16 x i8]* @.str.3, i64 0, i64 0))
br label %sw.epilog
sw.bb9: ; preds = %while.body
%7 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%call10 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %7, i8* getelementptr inbounds ([18 x i8], [18 x i8]* @.str.4, i64 0, i64 0))
br label %sw.epilog
sw.default: ; preds = %while.body
%8 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%9 = load i8**, i8*** %argv.addr, align 8
%arrayidx = getelementptr inbounds i8*, i8** %9, i64 0
%10 = load i8*, i8** %arrayidx, align 8
%call11 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %8, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.5, i64 0, i64 0), i8* %10)
call void @exit(i32 1) #7
unreachable
sw.epilog: ; preds = %sw.bb9, %sw.bb7, %sw.bb4, %sw.bb3, %sw.bb
br label %while.cond
while.end: ; preds = %while.cond
%11 = load i32, i32* @optind, align 4
%12 = load i32, i32* %argc.addr, align 4
%cmp12 = icmp slt i32 %11, %12
br i1 %cmp12, label %if.then, label %lor.lhs.false
lor.lhs.false: ; preds = %while.end
%13 = load i32, i32* @optind, align 4
%cmp13 = icmp eq i32 %13, 1
br i1 %cmp13, label %if.then, label %if.end
if.then: ; preds = %lor.lhs.false, %while.end
%14 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%15 = load i8**, i8*** %argv.addr, align 8
%arrayidx14 = getelementptr inbounds i8*, i8** %15, i64 0
%16 = load i8*, i8** %arrayidx14, align 8
%call15 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %14, i8* getelementptr inbounds ([47 x i8], [47 x i8]* @.str.5, i64 0, i64 0), i8* %16)
call void @exit(i32 1) #7
unreachable
if.end: ; preds = %lor.lhs.false
%17 = load i8*, i8** %input_file, align 8
%tobool = icmp ne i8* %17, null
br i1 %tobool, label %if.then16, label %if.else
if.then16: ; preds = %if.end
%18 = load i8*, i8** %input_file, align 8
%call17 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([29 x i8], [29 x i8]* @.str.6, i64 0, i64 0), i8* %18)
%19 = load i8*, i8** %input_file, align 8
%call18 = call i32 @create_matrix_from_file(float** %m, i8* %19, i32* %matrix_dim)
store i32 %call18, i32* %ret, align 4
%20 = load i32, i32* %ret, align 4
%cmp19 = icmp ne i32 %20, 0
br i1 %cmp19, label %if.then20, label %if.end22
if.then20: ; preds = %if.then16
store float* null, float** %m, align 8
%21 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%22 = load i8*, i8** %input_file, align 8
%call21 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %21, i8* getelementptr inbounds ([34 x i8], [34 x i8]* @.str.7, i64 0, i64 0), i8* %22)
call void @exit(i32 1) #7
unreachable
if.end22: ; preds = %if.then16
br label %if.end34
if.else: ; preds = %if.end
%23 = load i32, i32* %matrix_dim, align 4
%tobool23 = icmp ne i32 %23, 0
br i1 %tobool23, label %if.then24, label %if.else31
if.then24: ; preds = %if.else
%24 = load i32, i32* %matrix_dim, align 4
%call25 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([36 x i8], [36 x i8]* @.str.8, i64 0, i64 0), i32 %24)
%25 = load i32, i32* %matrix_dim, align 4
%call26 = call i32 @create_matrix(float** %m, i32 %25)
store i32 %call26, i32* %ret, align 4
%26 = load i32, i32* %ret, align 4
%cmp27 = icmp ne i32 %26, 0
br i1 %cmp27, label %if.then28, label %if.end30
if.then28: ; preds = %if.then24
store float* null, float** %m, align 8
%27 = load %struct._IO_FILE*, %struct._IO_FILE** @stderr, align 8
%28 = load i32, i32* %matrix_dim, align 4
%call29 = call i32 (%struct._IO_FILE*, i8*, ...) @fprintf(%struct._IO_FILE* %27, i8* getelementptr inbounds ([40 x i8], [40 x i8]* @.str.9, i64 0, i64 0), i32 %28)
call void @exit(i32 1) #7
unreachable
if.end30: ; preds = %if.then24
br label %if.end33
if.else31: ; preds = %if.else
%call32 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([26 x i8], [26 x i8]* @.str.10, i64 0, i64 0))
call void @exit(i32 1) #7
unreachable
if.end33: ; preds = %if.end30
br label %if.end34
if.end34: ; preds = %if.end33, %if.end22
%29 = load i32, i32* @_ZL9do_verify, align 4
%tobool35 = icmp ne i32 %29, 0
br i1 %tobool35, label %if.then36, label %if.end38
if.then36: ; preds = %if.end34
%call37 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.11, i64 0, i64 0))
%30 = load float*, float** %m, align 8
%31 = load i32, i32* %matrix_dim, align 4
call void @matrix_duplicate(float* %30, float** %mm, i32 %31)
br label %if.end38
if.end38: ; preds = %if.then36, %if.end34
%32 = bitcast float** %d_m to i8**
%33 = load i32, i32* %matrix_dim, align 4
%34 = load i32, i32* %matrix_dim, align 4
%mul = mul nsw i32 %33, %34
%conv = sext i32 %mul to i64
%mul39 = mul i64 %conv, 4
%call40 = call i32 @cudaMalloc(i8** %32, i64 %mul39)
call void @stopwatch_start(%struct.__stopwatch_t* %sw)
%35 = load float*, float** %d_m, align 8
%36 = bitcast float* %35 to i8*
%37 = load float*, float** %m, align 8
%38 = bitcast float* %37 to i8*
%39 = load i32, i32* %matrix_dim, align 4
%40 = load i32, i32* %matrix_dim, align 4
%mul41 = mul nsw i32 %39, %40
%conv42 = sext i32 %mul41 to i64
%mul43 = mul i64 %conv42, 4
%call44 = call i32 @cudaMemcpy(i8* %36, i8* %38, i64 %mul43, i32 1)
%41 = load float*, float** %d_m, align 8
%42 = load i32, i32* %matrix_dim, align 4
call void @_Z8lud_cudaPfi(float* %41, i32 %42)
%43 = load float*, float** %m, align 8
%44 = bitcast float* %43 to i8*
%45 = load float*, float** %d_m, align 8
%46 = bitcast float* %45 to i8*
%47 = load i32, i32* %matrix_dim, align 4
%48 = load i32, i32* %matrix_dim, align 4
%mul45 = mul nsw i32 %47, %48
%conv46 = sext i32 %mul45 to i64
%mul47 = mul i64 %conv46, 4
%call48 = call i32 @cudaMemcpy(i8* %44, i8* %46, i64 %mul47, i32 2)
call void @stopwatch_stop(%struct.__stopwatch_t* %sw)
%call49 = call double @get_interval_by_sec(%struct.__stopwatch_t* %sw)
%mul50 = fmul contract double 1.000000e+03, %call49
%call51 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.12, i64 0, i64 0), double %mul50)
%49 = load float*, float** %d_m, align 8
%50 = bitcast float* %49 to i8*
%call52 = call i32 @cudaFree(i8* %50)
%51 = load i32, i32* @_ZL9do_verify, align 4
%tobool53 = icmp ne i32 %51, 0
br i1 %tobool53, label %if.then54, label %if.end58
if.then54: ; preds = %if.end38
%call55 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.13, i64 0, i64 0))
%call56 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str.14, i64 0, i64 0))
%52 = load float*, float** %mm, align 8
%53 = load float*, float** %m, align 8
%54 = load i32, i32* %matrix_dim, align 4
%call57 = call i32 @lud_verify(float* %52, float* %53, i32 %54)
%55 = load float*, float** %mm, align 8
%56 = bitcast float* %55 to i8*
call void @free(i8* %56) #5
br label %if.end58
if.end58: ; preds = %if.then54, %if.end38
%57 = load float*, float** %m, align 8
%58 = bitcast float* %57 to i8*
call void @free(i8* %58) #5
ret i32 0
}
declare dso_local i32 @cudaSetDevice(i32) #1
declare dso_local i32 @printf(i8*, ...) #1
; Function Attrs: nounwind
declare dso_local i32 @getopt_long(i32, i8**, i8*, %struct.option*, i32*) #2
; Function Attrs: nounwind readonly
declare dso_local i32 @atoi(i8*) #3
declare dso_local i32 @fprintf(%struct._IO_FILE*, i8*, ...) #1
; Function Attrs: noreturn nounwind
declare dso_local void @exit(i32) #4
declare dso_local i32 @create_matrix_from_file(float**, i8*, i32*) #1
declare dso_local i32 @create_matrix(float**, i32) #1
declare dso_local void @matrix_duplicate(float*, float**, i32) #1
declare dso_local i32 @cudaMalloc(i8**, i64) #1
declare dso_local void @stopwatch_start(%struct.__stopwatch_t*) #1
declare dso_local i32 @cudaMemcpy(i8*, i8*, i64, i32) #1
declare dso_local void @_Z8lud_cudaPfi(float*, i32) #1
declare dso_local void @stopwatch_stop(%struct.__stopwatch_t*) #1
declare dso_local double @get_interval_by_sec(%struct.__stopwatch_t*) #1
declare dso_local i32 @cudaFree(i8*) #1
declare dso_local i32 @lud_verify(float*, float*, i32) #1
; Function Attrs: nounwind
declare dso_local void @free(i8*) #2
attributes #0 = { noinline norecurse optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #3 = { nounwind readonly "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #4 = { noreturn nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #5 = { nounwind }
attributes #6 = { nounwind readonly }
attributes #7 = { noreturn nounwind }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 10, i32 1]}
!1 = !{i32 1, !"wchar_size", i32 4}
!2 = !{!"clang version 10.0.1 (https://github.com/llvm/llvm-project.git ef32c611aa214dea855364efd7ba451ec5ec3f74)"}