74 lines
1.7 KiB
ArmAsm
74 lines
1.7 KiB
ArmAsm
|
//
|
||
|
// Generated by LLVM NVPTX Back-End
|
||
|
//
|
||
|
|
||
|
.version 7.5
|
||
|
.target sm_50
|
||
|
.address_size 64
|
||
|
|
||
|
// .globl _Z6vecAddPdS_S_i
|
||
|
.global .align 1 .b8 blockIdx[1];
|
||
|
.global .align 1 .b8 blockDim[1];
|
||
|
.global .align 1 .b8 threadIdx[1];
|
||
|
|
||
|
.visible .entry _Z6vecAddPdS_S_i(
|
||
|
.param .u64 _Z6vecAddPdS_S_i_param_0,
|
||
|
.param .u64 _Z6vecAddPdS_S_i_param_1,
|
||
|
.param .u64 _Z6vecAddPdS_S_i_param_2,
|
||
|
.param .u32 _Z6vecAddPdS_S_i_param_3
|
||
|
)
|
||
|
{
|
||
|
.local .align 8 .b8 __local_depot0[32];
|
||
|
.reg .b64 %SP;
|
||
|
.reg .b64 %SPL;
|
||
|
.reg .pred %p<2>;
|
||
|
.reg .b32 %r<9>;
|
||
|
.reg .b64 %rd<18>;
|
||
|
.reg .f64 %fd<4>;
|
||
|
|
||
|
mov.u64 %SPL, __local_depot0;
|
||
|
cvta.local.u64 %SP, %SPL;
|
||
|
ld.param.u32 %r1, [_Z6vecAddPdS_S_i_param_3];
|
||
|
ld.param.u64 %rd3, [_Z6vecAddPdS_S_i_param_2];
|
||
|
ld.param.u64 %rd2, [_Z6vecAddPdS_S_i_param_1];
|
||
|
ld.param.u64 %rd1, [_Z6vecAddPdS_S_i_param_0];
|
||
|
cvta.to.global.u64 %rd4, %rd3;
|
||
|
cvta.global.u64 %rd5, %rd4;
|
||
|
cvta.to.global.u64 %rd6, %rd2;
|
||
|
cvta.global.u64 %rd7, %rd6;
|
||
|
cvta.to.global.u64 %rd8, %rd1;
|
||
|
cvta.global.u64 %rd9, %rd8;
|
||
|
st.u64 [%SP+0], %rd9;
|
||
|
st.u64 [%SP+8], %rd7;
|
||
|
st.u64 [%SP+16], %rd5;
|
||
|
st.u32 [%SP+24], %r1;
|
||
|
mov.u32 %r2, %ctaid.x;
|
||
|
mov.u32 %r3, %ntid.x;
|
||
|
mul.lo.s32 %r4, %r2, %r3;
|
||
|
mov.u32 %r5, %tid.x;
|
||
|
add.s32 %r6, %r4, %r5;
|
||
|
st.u32 [%SP+28], %r6;
|
||
|
ld.u32 %r7, [%SP+28];
|
||
|
ld.u32 %r8, [%SP+24];
|
||
|
setp.ge.s32 %p1, %r7, %r8;
|
||
|
@%p1 bra LBB0_2;
|
||
|
bra.uni LBB0_1;
|
||
|
LBB0_1:
|
||
|
ld.u64 %rd10, [%SP+0];
|
||
|
ld.s32 %rd11, [%SP+28];
|
||
|
shl.b64 %rd12, %rd11, 3;
|
||
|
add.s64 %rd13, %rd10, %rd12;
|
||
|
ld.f64 %fd1, [%rd13];
|
||
|
ld.u64 %rd14, [%SP+8];
|
||
|
add.s64 %rd15, %rd14, %rd12;
|
||
|
ld.f64 %fd2, [%rd15];
|
||
|
add.rn.f64 %fd3, %fd1, %fd2;
|
||
|
ld.u64 %rd16, [%SP+16];
|
||
|
add.s64 %rd17, %rd16, %rd12;
|
||
|
st.f64 [%rd17], %fd3;
|
||
|
bra.uni LBB0_2;
|
||
|
LBB0_2:
|
||
|
ret;
|
||
|
|
||
|
}
|