[llvm] new test (PR #127594)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 18 00:32:46 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
new test
AMDGPU: Correct legal literal operand logic for multiple uses
The same literal can be used multiple times in an instruction,
not just once. We were not tracking the used value to verify this,
so correct this.
This helps avoid regressions in a future patch.
---
Full diff: https://github.com/llvm/llvm-project/pull/127594.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+13-2)
- (modified) llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir (+8-16)
- (added) llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir (+66)
- (modified) llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir (+1-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4ee5ebd7681b8..33bec4a561622 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5918,11 +5918,16 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
if (!MO)
MO = &MI.getOperand(OpIdx);
+ const MachineOperand *UsedLiteral = nullptr;
+
int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
- if (!MO->isReg() && !isInlineConstant(*MO, OpInfo) && !LiteralLimit--)
- return false;
+ if (!MO->isReg() && !isInlineConstant(*MO, OpInfo)) {
+ UsedLiteral = MO;
+ if (!LiteralLimit--)
+ return false;
+ }
SmallDenseSet<RegSubRegPair> SGPRsUsed;
if (MO->isReg())
@@ -5943,6 +5948,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
}
} else if (AMDGPU::isSISrcOperand(InstDesc, i) &&
!isInlineConstant(Op, InstDesc.operands()[i])) {
+ // The same literal may be used multiple times.
+ if (!UsedLiteral)
+ UsedLiteral = &Op;
+ else if (UsedLiteral->isIdenticalTo(Op))
+ continue;
+
if (!LiteralLimit--)
return false;
if (--ConstantBusLimit <= 0)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index 12e8d24cb3675..ade7b4266e9e6 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -2162,8 +2162,7 @@ body: |
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
- ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
- ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2178,8 +2177,7 @@ body: |
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
- ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
- ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2315,8 +2313,7 @@ body: |
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
- ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
- ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2332,8 +2329,7 @@ body: |
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
- ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
- ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2469,8 +2465,7 @@ body: |
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
- ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
- ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2485,8 +2480,7 @@ body: |
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
- ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
- ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2622,8 +2616,7 @@ body: |
; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
- ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
- ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+ ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2639,8 +2632,7 @@ body: |
; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
- ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
- ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+ ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir b/llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir
new file mode 100644
index 0000000000000..e71516e74f17e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir
@@ -0,0 +1,66 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=si-fold-operands -o - %s | FileCheck %s
+
+# The same literal may be used multiple times in different operands,
+# as long as it is the same value.
+
+---
+name: fold_multiple_same_literal_use_0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: fold_multiple_same_literal_use_0
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, 1178657792, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:sreg_32 = S_MOV_B32 1178657792
+ %2:vgpr_32 = COPY %1
+ %3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, %2, 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: fold_multiple_same_literal_use_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: fold_multiple_same_literal_use_1
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, 1178657792, 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:sreg_32 = S_MOV_B32 1178657792
+ %2:vgpr_32 = COPY %1
+ %3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, %2, 0, 0, implicit $mode, implicit $exec
+ S_ENDPGM 0, implicit %3
+...
+
+---
+name: no_fold_multiple_same_literal_different_value
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+
+ ; CHECK-LABEL: name: no_fold_multiple_same_literal_different_value
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1178657793, implicit $exec
+ ; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
+ %0:vgpr_32 = COPY $vgpr0
+ %1:sreg_32 = S_MOV_B32 1178657793
+ %2:vgpr_32 = COPY %1
+ %3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, %2, 0, 0, implicit $mode, implicit $exec
+ S_ENDPGM 0, implicit %3
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
index 268a8a4783d24..edd5d0a119e5f 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
@@ -55,8 +55,7 @@ body: |
# GCN-LABEL: name: fma_sgpr_sgpr_use
# GCN: %0:sgpr_32 = IMPLICIT_DEF
-# GCN-NEXT: %2:vgpr_32 = V_MOV_B32_e32 1234567, implicit $exec
-# GCN-NEXT: %3:vgpr_32 = V_FMAC_F32_e64 0, %0, 0, 1234567, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GCN: %3:vgpr_32 = V_FMA_F32_e64 0, %0, 0, 1234567, 0, 1234567, 0, 0, implicit $mode, implicit $exec
---
name: fma_sgpr_sgpr_use
body: |
``````````
</details>
https://github.com/llvm/llvm-project/pull/127594
More information about the llvm-commits
mailing list