103 lines
2.6 KiB
ArmAsm
103 lines
2.6 KiB
ArmAsm
.text
|
|
.file "vecadd.cu"
|
|
.globl _Z6vecAddPdS_S_i # -- Begin function _Z6vecAddPdS_S_i
|
|
.p2align 4, 0x90
|
|
.type _Z6vecAddPdS_S_i,@function
|
|
_Z6vecAddPdS_S_i: # @_Z6vecAddPdS_S_i
|
|
.L_Z6vecAddPdS_S_i$local:
|
|
# %bb.0: # %_after_block_sync_1
|
|
pushq %rbp
|
|
movq %rsp, %rbp
|
|
pushq %r15
|
|
pushq %r14
|
|
pushq %r13
|
|
pushq %r12
|
|
pushq %rbx
|
|
pushq %rax
|
|
movl %ecx, %ebx
|
|
movq %rdx, %r14
|
|
movq %rsi, %r15
|
|
movq %rdi, %r12
|
|
data16
|
|
leaq block_size@TLSGD(%rip), %rdi
|
|
data16
|
|
data16
|
|
rex64
|
|
callq __tls_get_addr@PLT
|
|
movl (%rax), %r13d
|
|
testl %r13d, %r13d
|
|
je .LBB0_5
|
|
# %bb.1: # %_after_block_sync_0.lr.ph
|
|
data16
|
|
leaq block_index_x@TLSGD(%rip), %rdi
|
|
data16
|
|
data16
|
|
rex64
|
|
callq __tls_get_addr@PLT
|
|
movq %rax, -48(%rbp) # 8-byte Spill
|
|
data16
|
|
leaq block_size_x@TLSGD(%rip), %rdi
|
|
data16
|
|
data16
|
|
rex64
|
|
callq __tls_get_addr@PLT
|
|
movl (%rax), %esi
|
|
movq -48(%rbp), %rax # 8-byte Reload
|
|
movl (%rax), %edi
|
|
imull %esi, %edi
|
|
xorl %ecx, %ecx
|
|
jmp .LBB0_2
|
|
.p2align 4, 0x90
|
|
.LBB0_4: # %intra_warp_inc
|
|
# in Loop: Header=BB0_2 Depth=1
|
|
incl %ecx
|
|
cmpl %r13d, %ecx
|
|
jae .LBB0_5
|
|
.LBB0_2: # %_after_block_sync_0
|
|
# =>This Inner Loop Header: Depth=1
|
|
movl %ecx, %eax
|
|
cltd
|
|
idivl %esi
|
|
addl %edi, %edx
|
|
cmpl %ebx, %edx
|
|
jge .LBB0_4
|
|
# %bb.3: # in Loop: Header=BB0_2 Depth=1
|
|
movslq %edx, %rax
|
|
movsd (%r12,%rax,8), %xmm0 # xmm0 = mem[0],zero
|
|
addsd (%r15,%rax,8), %xmm0
|
|
movsd %xmm0, (%r14,%rax,8)
|
|
jmp .LBB0_4
|
|
.LBB0_5: # %_after_block_sync_2
|
|
addq $8, %rsp
|
|
popq %rbx
|
|
popq %r12
|
|
popq %r13
|
|
popq %r14
|
|
popq %r15
|
|
popq %rbp
|
|
retq
|
|
.Lfunc_end0:
|
|
.size _Z6vecAddPdS_S_i, .Lfunc_end0-_Z6vecAddPdS_S_i
|
|
# -- End function
|
|
.globl vecAddPdS_S_i_wrapper # -- Begin function vecAddPdS_S_i_wrapper
|
|
.p2align 4, 0x90
|
|
.type vecAddPdS_S_i_wrapper,@function
|
|
vecAddPdS_S_i_wrapper: # @vecAddPdS_S_i_wrapper
|
|
# %bb.0:
|
|
movq (%rdi), %rax
|
|
movq 8(%rdi), %rcx
|
|
movq (%rax), %rax
|
|
movq (%rcx), %rsi
|
|
movq 16(%rdi), %rcx
|
|
movq (%rcx), %rdx
|
|
movq 24(%rdi), %rcx
|
|
movl (%rcx), %ecx
|
|
movq %rax, %rdi
|
|
jmp .L_Z6vecAddPdS_S_i$local # TAILCALL
|
|
.Lfunc_end1:
|
|
.size vecAddPdS_S_i_wrapper, .Lfunc_end1-vecAddPdS_S_i_wrapper
|
|
# -- End function
|
|
.ident "Ubuntu clang version 14.0.0-1ubuntu1.1"
|
|
.ident "clang version 3.8.0 (tags/RELEASE_380/final)"
|
|
.section ".note.GNU-stack","",@progbits
|