[llvm] AMDGPU: Fix illegal commute with frame index (PR #114497)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 19:29:40 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
In ca409892c5396fa3fbb8ea4dbf53d0e952f36d09, frame indexes started
being treated more like registers, rather than immediates. Update
the commute logic to avoid failing the verifier by moving illegal
SGPR operands in place of a frame index.
---
Full diff: https://github.com/llvm/llvm-project/pull/114497.diff
3 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+2-3)
- (added) llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir (+106)
- (modified) llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir (+8-8)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 89a2eb4f18946b..3e2e26251bef5e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2773,9 +2773,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
}
} else if (Src0.isReg() && !Src1.isReg()) {
- // src0 should always be able to support any operand type, so no need to
- // check operand legality.
- CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
+ if (isOperandLegal(MI, Src1Idx, &Src0))
+ CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
} else if (!Src0.isReg() && Src1.isReg()) {
if (isOperandLegal(MI, Src1Idx, &Src0))
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
diff --git a/llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir b/llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir
new file mode 100644
index 00000000000000..a891d8f18e861f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir
@@ -0,0 +1,106 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=machine-cse -verify-machineinstrs %s -o - | FileCheck --check-prefix=GCN %s
+
+# Check that invalid MIR is not produced with a frame index in a
+# commutable operand.
+
+---
+name: commute_frame_index__v_add_co_u32_e32__sgpr_fi
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+ liveins: $sgpr4
+
+ ; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e32__sgpr_fi
+ ; GCN: liveins: $sgpr4
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[COPY]], %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+ %0:sreg_32 = COPY $sgpr4
+ %1:vgpr_32 = V_ADD_CO_U32_e32 killed %0, %stack.0, implicit-def dead $vcc, implicit $exec
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32_e64__fi_sgpr
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+ liveins: $sgpr4
+
+ ; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e64__fi_sgpr
+ ; GCN: liveins: $sgpr4
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GCN-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, killed [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ %0:sreg_32 = COPY $sgpr4
+ %1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 %stack.0, killed %0, 0, implicit $exec
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32__vgpr_fi
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: commute_frame_index__v_add_co_u32__vgpr_fi
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed [[COPY]], implicit-def dead $vcc, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_ADD_CO_U32_e32 killed %0, %stack.0, implicit-def dead $vcc, implicit $exec
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32__fi_vgpr
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body: |
+ bb.0.entry:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: commute_frame_index__v_add_co_u32__fi_vgpr
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed [[COPY]], implicit-def dead $vcc, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed %0, implicit-def dead $vcc, implicit $exec
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32_e32__fi_fi
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+ - { id: 1, size: 8, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e32__fi_fi
+ ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %stack.1, implicit-def dead $vcc, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+ %0:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %stack.1, implicit-def dead $vcc, implicit $exec
+ S_ENDPGM 0, implicit %0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
index 388bbc26bbea76..7c25e5fd9c6645 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
@@ -1170,8 +1170,8 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
- ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
@@ -1179,22 +1179,22 @@ body: |
; MUBUFW32-NEXT: {{ $}}
; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
- ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
- ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
- ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
; FLATSCRW32: liveins: $sgpr8
; FLATSCRW32-NEXT: {{ $}}
- ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
- ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+ ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
renamable $vgpr0 = V_ADD_U32_e32 renamable $sgpr8, %stack.1, implicit $exec
SI_RETURN implicit $vgpr0, implicit $sgpr8
``````````
</details>
https://github.com/llvm/llvm-project/pull/114497
More information about the llvm-commits
mailing list