// // Generated by LLVM NVPTX Back-End // .version 7.5 .target sm_50 .address_size 64 // .globl _Z6vecAddPdS_S_i .global .align 1 .b8 blockIdx[1]; .global .align 1 .b8 blockDim[1]; .global .align 1 .b8 threadIdx[1]; .visible .entry _Z6vecAddPdS_S_i( .param .u64 _Z6vecAddPdS_S_i_param_0, .param .u64 _Z6vecAddPdS_S_i_param_1, .param .u64 _Z6vecAddPdS_S_i_param_2, .param .u32 _Z6vecAddPdS_S_i_param_3 ) { .local .align 8 .b8 __local_depot0[32]; .reg .b64 %SP; .reg .b64 %SPL; .reg .pred %p<2>; .reg .b32 %r<9>; .reg .b64 %rd<18>; .reg .f64 %fd<4>; mov.u64 %SPL, __local_depot0; cvta.local.u64 %SP, %SPL; ld.param.u32 %r1, [_Z6vecAddPdS_S_i_param_3]; ld.param.u64 %rd3, [_Z6vecAddPdS_S_i_param_2]; ld.param.u64 %rd2, [_Z6vecAddPdS_S_i_param_1]; ld.param.u64 %rd1, [_Z6vecAddPdS_S_i_param_0]; cvta.to.global.u64 %rd4, %rd3; cvta.global.u64 %rd5, %rd4; cvta.to.global.u64 %rd6, %rd2; cvta.global.u64 %rd7, %rd6; cvta.to.global.u64 %rd8, %rd1; cvta.global.u64 %rd9, %rd8; st.u64 [%SP+0], %rd9; st.u64 [%SP+8], %rd7; st.u64 [%SP+16], %rd5; st.u32 [%SP+24], %r1; mov.u32 %r2, %ctaid.x; mov.u32 %r3, %ntid.x; mul.lo.s32 %r4, %r2, %r3; mov.u32 %r5, %tid.x; add.s32 %r6, %r4, %r5; st.u32 [%SP+28], %r6; ld.u32 %r7, [%SP+28]; ld.u32 %r8, [%SP+24]; setp.ge.s32 %p1, %r7, %r8; @%p1 bra LBB0_2; bra.uni LBB0_1; LBB0_1: ld.u64 %rd10, [%SP+0]; ld.s32 %rd11, [%SP+28]; shl.b64 %rd12, %rd11, 3; add.s64 %rd13, %rd10, %rd12; ld.f64 %fd1, [%rd13]; ld.u64 %rd14, [%SP+8]; add.s64 %rd15, %rd14, %rd12; ld.f64 %fd2, [%rd15]; add.rn.f64 %fd3, %fd1, %fd2; ld.u64 %rd16, [%SP+16]; add.s64 %rd17, %rd16, %rd12; st.f64 [%rd17], %fd3; bra.uni LBB0_2; LBB0_2: ret; }