[llvm] [NVPTX] Improve device function byval parameter lowering (PR #129188)

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 28 11:57:47 PST 2025


================
@@ -23,38 +22,29 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
 ;
 ; PTX-LABEL: non_kernel_function(
 ; PTX:       {
-; PTX-NEXT:    .local .align 16 .b8 __local_depot0[16];
-; PTX-NEXT:    .reg .b64 %SP;
-; PTX-NEXT:    .reg .b64 %SPL;
 ; PTX-NEXT:    .reg .pred %p<2>;
 ; PTX-NEXT:    .reg .b16 %rs<3>;
 ; PTX-NEXT:    .reg .b32 %r<11>;
-; PTX-NEXT:    .reg .b64 %rd<10>;
+; PTX-NEXT:    .reg .b64 %rd<8>;
 ; PTX-EMPTY:
 ; PTX-NEXT:  // %bb.0: // %entry
-; PTX-NEXT:    mov.u64 %SPL, __local_depot0;
-; PTX-NEXT:    cvta.local.u64 %SP, %SPL;
+; PTX-NEXT:    mov.b64 %rd1, non_kernel_function_param_0;
+; PTX-NEXT:    cvta.local.u64 %rd2, %rd1;
 ; PTX-NEXT:    ld.param.u8 %rs1, [non_kernel_function_param_1];
 ; PTX-NEXT:    and.b16 %rs2, %rs1, 1;
 ; PTX-NEXT:    setp.eq.b16 %p1, %rs2, 1;
-; PTX-NEXT:    add.u64 %rd1, %SP, 0;
-; PTX-NEXT:    add.u64 %rd2, %SPL, 0;
-; PTX-NEXT:    ld.param.s32 %rd3, [non_kernel_function_param_2];
-; PTX-NEXT:    ld.param.u64 %rd4, [non_kernel_function_param_0+8];
-; PTX-NEXT:    st.local.u64 [%rd2+8], %rd4;
----------------
Artem-B wrote:

I think elimination of this unnecessary local store will be another important effect of this change. Nice. 

https://github.com/llvm/llvm-project/pull/129188


More information about the llvm-commits mailing list