[llvm] [NVPTX] Improve copy avoidance during lowering. (PR #106423)

Artem Belevich via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 28 13:58:36 PDT 2024


================
@@ -49,41 +71,85 @@ define void @grid_const_struct(ptr byval(%struct.s) align 4 %input, ptr %out){
 define void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
 ; PTX-LABEL: grid_const_escape(
 ; PTX:       {
-; PTX-NOT:     .local
-; PTX:         cvta.param.{{.*}}
+; PTX-NEXT:    .reg .b32 %r<3>;
+; PTX-NEXT:    .reg .b64 %rd<4>;
+; PTX-EMPTY:
+; PTX-NEXT:  // %bb.0:
+; PTX-NEXT:    mov.b64 %rd1, grid_const_escape_param_0;
+; PTX-NEXT:    mov.u64 %rd2, %rd1;
+; PTX-NEXT:    cvta.param.u64 %rd3, %rd2;
+; PTX-NEXT:    { // callseq 0, 0
+; PTX-NEXT:    .param .b64 param0;
+; PTX-NEXT:    st.param.b64 [param0+0], %rd3;
+; PTX-NEXT:    .param .b32 retval0;
+; PTX-NEXT:    call.uni (retval0),
+; PTX-NEXT:    escape,
+; PTX-NEXT:    (
+; PTX-NEXT:    param0
+; PTX-NEXT:    );
+; PTX-NEXT:    ld.param.b32 %r1, [retval0+0];
+; PTX-NEXT:    } // callseq 0
+; PTX-NEXT:    ret;
 ; OPT-LABEL: define void @grid_const_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) {
-; OPT-NOT:     alloca [[STRUCT_S]]
-; OPT:         [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
-; OPT:         [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
-; OPT:         [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
-;
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]]) #[[ATTR0]] {
+; OPT-NEXT:    [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
+; OPT-NEXT:    [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
+; OPT-NEXT:    [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
+; OPT-NEXT:    ret void
   %call = call i32 @escape(ptr %input)
   ret void
 }
 
 define void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 %input, i32 %a, ptr byval(i32) align 4 %b) {
 ; PTX-LABEL: multiple_grid_const_escape(
-; PTX:         mov.{{.*}} [[RD1:%.*]], multiple_grid_const_escape_param_0;
-; PTX:         mov.{{.*}} [[RD2:%.*]], multiple_grid_const_escape_param_2;
-; PTX:         mov.{{.*}} [[RD3:%.*]], [[RD2]];
-; PTX:         mov.{{.*}} [[RD4:%.*]], [[RD1]];
-; PTX:         cvta.param.{{.*}} [[RD5:%.*]], [[RD4]];
-; PTX:         cvta.param.{{.*}} [[RD6:%.*]], [[RD3]];
-; PTX:         {
-; PTX:         st.param.b64 [param0+0], [[RD5]];
-; PTX:         st.param.b64 [param2+0], [[RD6]];
-;
+; PTX:       {
+; PTX-NEXT:    .local .align 4 .b8 __local_depot3[4];
+; PTX-NEXT:    .reg .b64 %SP;
+; PTX-NEXT:    .reg .b64 %SPL;
+; PTX-NEXT:    .reg .b32 %r<4>;
+; PTX-NEXT:    .reg .b64 %rd<9>;
+; PTX-EMPTY:
+; PTX-NEXT:  // %bb.0:
+; PTX-NEXT:    mov.u64 %SPL, __local_depot3;
+; PTX-NEXT:    cvta.local.u64 %SP, %SPL;
+; PTX-NEXT:    mov.b64 %rd1, multiple_grid_const_escape_param_0;
+; PTX-NEXT:    mov.b64 %rd2, multiple_grid_const_escape_param_2;
+; PTX-NEXT:    mov.u64 %rd3, %rd2;
+; PTX-NEXT:    ld.param.u32 %r1, [multiple_grid_const_escape_param_1];
+; PTX-NEXT:    cvta.param.u64 %rd4, %rd3;
+; PTX-NEXT:    mov.u64 %rd5, %rd1;
+; PTX-NEXT:    cvta.param.u64 %rd6, %rd5;
+; PTX-NEXT:    add.u64 %rd7, %SP, 0;
+; PTX-NEXT:    add.u64 %rd8, %SPL, 0;
+; PTX-NEXT:    st.local.u32 [%rd8], %r1;
+; PTX-NEXT:    { // callseq 1, 0
+; PTX-NEXT:    .param .b64 param0;
+; PTX-NEXT:    st.param.b64 [param0+0], %rd6;
+; PTX-NEXT:    .param .b64 param1;
+; PTX-NEXT:    st.param.b64 [param1+0], %rd7;
+; PTX-NEXT:    .param .b64 param2;
+; PTX-NEXT:    st.param.b64 [param2+0], %rd4;
+; PTX-NEXT:    .param .b32 retval0;
+; PTX-NEXT:    call.uni (retval0),
+; PTX-NEXT:    escape3,
+; PTX-NEXT:    (
+; PTX-NEXT:    param0,
+; PTX-NEXT:    param1,
+; PTX-NEXT:    param2
+; PTX-NEXT:    );
+; PTX-NEXT:    ld.param.b32 %r2, [retval0+0];
+; PTX-NEXT:    } // callseq 1
+; PTX-NEXT:    ret;
 ; OPT-LABEL: define void @multiple_grid_const_escape(
-; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) {
-; OPT:         [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
-; OPT:         [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
-; OPT-NOT:     alloca %struct.s
-; OPT:         [[A_ADDR:%.*]] = alloca i32, align 4
-; OPT:         [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
-; OPT:         [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]])
+; OPT-SAME: ptr byval([[STRUCT_S:%.*]]) align 4 [[INPUT:%.*]], i32 [[A:%.*]], ptr byval(i32) align 4 [[B:%.*]]) #[[ATTR0]] {
+; OPT-NEXT:    [[B_PARAM:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(101)
+; OPT-NEXT:    [[B_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[B_PARAM]])
+; OPT-NEXT:    [[INPUT_PARAM:%.*]] = addrspacecast ptr [[INPUT]] to ptr addrspace(101)
+; OPT-NEXT:    [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
+; OPT-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
----------------
Artem-B wrote:

Never mind. This is the same alloca that's in the original IR. Lowering still works fine.

https://github.com/llvm/llvm-project/pull/106423


More information about the llvm-commits mailing list