CuPBoP/examples/vecadd/kernel.s

103 lines
2.6 KiB
ArmAsm

.text
.file "vecadd.cu"
.globl _Z6vecAddPdS_S_i # -- Begin function _Z6vecAddPdS_S_i
.p2align 4, 0x90
.type _Z6vecAddPdS_S_i,@function
_Z6vecAddPdS_S_i: # @_Z6vecAddPdS_S_i
.L_Z6vecAddPdS_S_i$local:
# %bb.0: # %_after_block_sync_1
pushq %rbp
movq %rsp, %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
pushq %rax
movl %ecx, %ebx
movq %rdx, %r14
movq %rsi, %r15
movq %rdi, %r12
data16
leaq block_size@TLSGD(%rip), %rdi
data16
data16
rex64
callq __tls_get_addr@PLT
movl (%rax), %r13d
testl %r13d, %r13d
je .LBB0_5
# %bb.1: # %_after_block_sync_0.lr.ph
data16
leaq block_index_x@TLSGD(%rip), %rdi
data16
data16
rex64
callq __tls_get_addr@PLT
movq %rax, -48(%rbp) # 8-byte Spill
data16
leaq block_size_x@TLSGD(%rip), %rdi
data16
data16
rex64
callq __tls_get_addr@PLT
movl (%rax), %esi
movq -48(%rbp), %rax # 8-byte Reload
movl (%rax), %edi
imull %esi, %edi
xorl %ecx, %ecx
jmp .LBB0_2
.p2align 4, 0x90
.LBB0_4: # %intra_warp_inc
# in Loop: Header=BB0_2 Depth=1
incl %ecx
cmpl %r13d, %ecx
jae .LBB0_5
.LBB0_2: # %_after_block_sync_0
# =>This Inner Loop Header: Depth=1
movl %ecx, %eax
cltd
idivl %esi
addl %edi, %edx
cmpl %ebx, %edx
jge .LBB0_4
# %bb.3: # in Loop: Header=BB0_2 Depth=1
movslq %edx, %rax
movsd (%r12,%rax,8), %xmm0 # xmm0 = mem[0],zero
addsd (%r15,%rax,8), %xmm0
movsd %xmm0, (%r14,%rax,8)
jmp .LBB0_4
.LBB0_5: # %_after_block_sync_2
addq $8, %rsp
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
retq
.Lfunc_end0:
.size _Z6vecAddPdS_S_i, .Lfunc_end0-_Z6vecAddPdS_S_i
# -- End function
.globl vecAddPdS_S_i_wrapper # -- Begin function vecAddPdS_S_i_wrapper
.p2align 4, 0x90
.type vecAddPdS_S_i_wrapper,@function
vecAddPdS_S_i_wrapper: # @vecAddPdS_S_i_wrapper
# %bb.0:
movq (%rdi), %rax
movq 8(%rdi), %rcx
movq (%rax), %rax
movq (%rcx), %rsi
movq 16(%rdi), %rcx
movq (%rcx), %rdx
movq 24(%rdi), %rcx
movl (%rcx), %ecx
movq %rax, %rdi
jmp .L_Z6vecAddPdS_S_i$local # TAILCALL
.Lfunc_end1:
.size vecAddPdS_S_i_wrapper, .Lfunc_end1-vecAddPdS_S_i_wrapper
# -- End function
.ident "Ubuntu clang version 14.0.0-1ubuntu1.1"
.ident "clang version 3.8.0 (tags/RELEASE_380/final)"
.section ".note.GNU-stack","",@progbits