[llvm] AMDGPU: Fix illegal commute with frame index (PR #114497)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 31 19:29:40 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

In ca409892c5396fa3fbb8ea4dbf53d0e952f36d09, frame indexes started
being treated more like registers, rather than immediates. Update
the commute logic to avoid failing the verifier by moving illegal
SGPR operands in place of a frame index.

---
Full diff: https://github.com/llvm/llvm-project/pull/114497.diff


3 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2-3) 
- (added) llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir (+106) 
- (modified) llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir (+8-8) 


``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 89a2eb4f18946b..3e2e26251bef5e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2773,9 +2773,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     }
 
   } else if (Src0.isReg() && !Src1.isReg()) {
-    // src0 should always be able to support any operand type, so no need to
-    // check operand legality.
-    CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
+    if (isOperandLegal(MI, Src1Idx, &Src0))
+      CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
   } else if (!Src0.isReg() && Src1.isReg()) {
     if (isOperandLegal(MI, Src1Idx, &Src0))
       CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
diff --git a/llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir b/llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir
new file mode 100644
index 00000000000000..a891d8f18e861f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir
@@ -0,0 +1,106 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=machine-cse -verify-machineinstrs %s -o - | FileCheck --check-prefix=GCN %s
+
+# Check that invalid MIR is not produced with a frame index in a
+# commutable operand.
+
+---
+name: commute_frame_index__v_add_co_u32_e32__sgpr_fi
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    liveins: $sgpr4
+
+    ; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e32__sgpr_fi
+    ; GCN: liveins: $sgpr4
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[COPY]], %stack.0, implicit-def dead $vcc, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+    %0:sreg_32 = COPY $sgpr4
+    %1:vgpr_32 = V_ADD_CO_U32_e32 killed %0, %stack.0, implicit-def dead $vcc, implicit $exec
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32_e64__fi_sgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    liveins: $sgpr4
+
+    ; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e64__fi_sgpr
+    ; GCN: liveins: $sgpr4
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
+    ; GCN-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, killed [[COPY]], 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+    %0:sreg_32 = COPY $sgpr4
+    %1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 %stack.0, killed %0, 0, implicit $exec
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32__vgpr_fi
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: commute_frame_index__v_add_co_u32__vgpr_fi
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed [[COPY]], implicit-def dead $vcc, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_ADD_CO_U32_e32 killed %0, %stack.0, implicit-def dead $vcc, implicit $exec
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32__fi_vgpr
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0.entry:
+    liveins: $vgpr0
+
+    ; GCN-LABEL: name: commute_frame_index__v_add_co_u32__fi_vgpr
+    ; GCN: liveins: $vgpr0
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed [[COPY]], implicit-def dead $vcc, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed %0, implicit-def dead $vcc, implicit $exec
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32_e32__fi_fi
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+  - { id: 1, size: 8, alignment: 4, local-offset: 0 }
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e32__fi_fi
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %stack.1, implicit-def dead $vcc, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+    %0:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %stack.1, implicit-def dead $vcc, implicit $exec
+    S_ENDPGM 0, implicit %0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
index 388bbc26bbea76..7c25e5fd9c6645 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
@@ -1170,8 +1170,8 @@ body:             |
     ; MUBUF-NEXT: {{  $}}
     ; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
-    ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+    ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
     ; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
     ;
     ; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
@@ -1179,22 +1179,22 @@ body:             |
     ; MUBUFW32-NEXT: {{  $}}
     ; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
-    ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
-    ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+    ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
     ; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
     ;
     ; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
     ; FLATSCRW64: liveins: $sgpr8
     ; FLATSCRW64-NEXT: {{  $}}
-    ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
-    ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+    ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
     ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
     ;
     ; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
     ; FLATSCRW32: liveins: $sgpr8
     ; FLATSCRW32-NEXT: {{  $}}
-    ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
-    ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+    ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+    ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
     ; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
     renamable $vgpr0 = V_ADD_U32_e32 renamable $sgpr8, %stack.1, implicit $exec
     SI_RETURN implicit $vgpr0, implicit $sgpr8

``````````

</details>


https://github.com/llvm/llvm-project/pull/114497


More information about the llvm-commits mailing list