[llvm] AMDGPU: Fix illegal commute with frame index (PR #114497)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 19:29:10 PDT 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/114497
In ca409892c5396fa3fbb8ea4dbf53d0e952f36d09, frame indexes started
being treated more like registers, rather than immediates. Update
the commute logic to avoid failing the verifier by moving illegal
SGPR operands in place of a frame index.
>From 00aa59ff350963bf30c2d3716a6d7855576b10ae Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 31 Oct 2024 18:33:19 -0700
Subject: [PATCH] AMDGPU: Fix illegal commute with frame index
In ca409892c5396fa3fbb8ea4dbf53d0e952f36d09, frame indexes started
being treated more like registers, rather than immediates. Update
the commute logic to avoid failing the verifier by moving illegal
SGPR operands in place of a frame index.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 5 +-
.../AMDGPU/commute-frame-index-operand.mir | 106 ++++++++++++++++++
.../eliminate-frame-index-v-add-u32.mir | 16 +--
3 files changed, 116 insertions(+), 11 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 89a2eb4f18946b..3e2e26251bef5e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2773,9 +2773,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
}
} else if (Src0.isReg() && !Src1.isReg()) {
- // src0 should always be able to support any operand type, so no need to
- // check operand legality.
- CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
+ if (isOperandLegal(MI, Src1Idx, &Src0))
+ CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
} else if (!Src0.isReg() && Src1.isReg()) {
if (isOperandLegal(MI, Src1Idx, &Src0))
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
diff --git a/llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir b/llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir
new file mode 100644
index 00000000000000..a891d8f18e861f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/commute-frame-index-operand.mir
@@ -0,0 +1,106 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=machine-cse -verify-machineinstrs %s -o - | FileCheck --check-prefix=GCN %s
+
+# Check that invalid MIR is not produced with a frame index in a
+# commutable operand.
+
+---
+name: commute_frame_index__v_add_co_u32_e32__sgpr_fi
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+ liveins: $sgpr4
+
+ ; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e32__sgpr_fi
+ ; GCN: liveins: $sgpr4
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[COPY]], %stack.0, implicit-def dead $vcc, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+ %0:sreg_32 = COPY $sgpr4
+ %1:vgpr_32 = V_ADD_CO_U32_e32 killed %0, %stack.0, implicit-def dead $vcc, implicit $exec
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32_e64__fi_sgpr
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+ liveins: $sgpr4
+
+ ; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e64__fi_sgpr
+ ; GCN: liveins: $sgpr4
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr4
+ ; GCN-NEXT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 %stack.0, killed [[COPY]], 0, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
+ %0:sreg_32 = COPY $sgpr4
+ %1:vgpr_32, %2:sreg_64 = V_ADD_CO_U32_e64 %stack.0, killed %0, 0, implicit $exec
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32__vgpr_fi
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: commute_frame_index__v_add_co_u32__vgpr_fi
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed [[COPY]], implicit-def dead $vcc, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_ADD_CO_U32_e32 killed %0, %stack.0, implicit-def dead $vcc, implicit $exec
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32__fi_vgpr
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+body: |
+ bb.0.entry:
+ liveins: $vgpr0
+
+ ; GCN-LABEL: name: commute_frame_index__v_add_co_u32__fi_vgpr
+ ; GCN: liveins: $vgpr0
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; GCN-NEXT: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed [[COPY]], implicit-def dead $vcc, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, killed %0, implicit-def dead $vcc, implicit $exec
+ S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: commute_frame_index__v_add_co_u32_e32__fi_fi
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 8, alignment: 4, local-offset: 0 }
+ - { id: 1, size: 8, alignment: 4, local-offset: 0 }
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: commute_frame_index__v_add_co_u32_e32__fi_fi
+ ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %stack.1, implicit-def dead $vcc, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
+ %0:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, %stack.1, implicit-def dead $vcc, implicit $exec
+ S_ENDPGM 0, implicit %0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
index 388bbc26bbea76..7c25e5fd9c6645 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-u32.mir
@@ -1170,8 +1170,8 @@ body: |
; MUBUF-NEXT: {{ $}}
; MUBUF-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUF-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
- ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+ ; MUBUF-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUF-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; MUBUF-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; MUBUFW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
@@ -1179,22 +1179,22 @@ body: |
; MUBUFW32-NEXT: {{ $}}
; MUBUFW32-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $noreg, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
; MUBUFW32-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
- ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
- ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+ ; MUBUFW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; MUBUFW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; MUBUFW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; FLATSCRW64-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
; FLATSCRW64: liveins: $sgpr8
; FLATSCRW64-NEXT: {{ $}}
- ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
- ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+ ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
;
; FLATSCRW32-LABEL: name: v_add_u32_e32__kernel_fi_offset72__sgpr_live_after
; FLATSCRW32: liveins: $sgpr8
; FLATSCRW32-NEXT: {{ $}}
- ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr8, implicit $exec
- ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 72, killed $vgpr1, implicit $exec
+ ; FLATSCRW32-NEXT: $vgpr1 = V_MOV_B32_e32 72, implicit $exec
+ ; FLATSCRW32-NEXT: renamable $vgpr0 = V_ADD_U32_e32 $sgpr8, killed $vgpr1, implicit $exec
; FLATSCRW32-NEXT: SI_RETURN implicit $vgpr0, implicit $sgpr8
renamable $vgpr0 = V_ADD_U32_e32 renamable $sgpr8, %stack.1, implicit $exec
SI_RETURN implicit $vgpr0, implicit $sgpr8
More information about the llvm-commits
mailing list