[llvm] new test (PR #127594)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 18 00:32:17 PST 2025


https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/127594

new test

AMDGPU: Correct legal literal operand logic for multiple uses

The same literal can be used multiple times in an instruction,
not just once. We were not tracking the used value to verify this,
so correct this.

This helps avoid regressions in a future patch.

>From c325d4bb56d859e09d3574b8198907d995273a54 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 18 Feb 2025 15:27:20 +0700
Subject: [PATCH 1/2] new test

---
 .../AMDGPU/fold-literal-multiple-gfx10.mir    | 67 +++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir

diff --git a/llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir b/llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir
new file mode 100644
index 0000000000000..95929b5d97f23
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir
@@ -0,0 +1,67 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=si-fold-operands -o - %s | FileCheck %s
+
+# The same literal may be used multiple times in different operands,
+# as long as it is the same value.
+
+---
+name: fold_multiple_same_literal_use_0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: fold_multiple_same_literal_use_0
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, 1178657792, 0, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:sreg_32 = S_MOV_B32 1178657792
+    %2:vgpr_32 = COPY %1
+    %3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, %2, 0, %2, 0, %2, 0, 0, implicit $mode, implicit $exec
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: fold_multiple_same_literal_use_1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: fold_multiple_same_literal_use_1
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1178657792, implicit $exec
+    ; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:sreg_32 = S_MOV_B32 1178657792
+    %2:vgpr_32 = COPY %1
+    %3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, %2, 0, 0, implicit $mode, implicit $exec
+    S_ENDPGM 0, implicit %3
+...
+
+---
+name: no_fold_multiple_same_literal_different_value
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: no_fold_multiple_same_literal_different_value
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1178657793, implicit $exec
+    ; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
+    %0:vgpr_32 = COPY $vgpr0
+    %1:sreg_32 = S_MOV_B32 1178657793
+    %2:vgpr_32 = COPY %1
+    %3:vgpr_32, %4:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, %2, 0, 0, implicit $mode, implicit $exec
+    S_ENDPGM 0, implicit %3
+...

>From ff7f2d18a46b019ee49a06c47bee317a49e1acd9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 18 Feb 2025 15:12:12 +0700
Subject: [PATCH 2/2] AMDGPU: Correct legal literal operand logic for multiple
 uses

The same literal can be used multiple times in an instruction,
not just once. We were not tracking the used value to verify this,
so correct this.

This helps avoid regressions in a future patch.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        | 15 ++++++++++--
 .../eliminate-frame-index-v-add-co-u32.mir    | 24 +++++++------------
 .../AMDGPU/fold-literal-multiple-gfx10.mir    |  3 +--
 llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir   |  3 +--
 4 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 4ee5ebd7681b8..33bec4a561622 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5918,11 +5918,16 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
   if (!MO)
     MO = &MI.getOperand(OpIdx);
 
+  const MachineOperand *UsedLiteral = nullptr;
+
   int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
   int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
   if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
-    if (!MO->isReg() && !isInlineConstant(*MO, OpInfo) && !LiteralLimit--)
-      return false;
+    if (!MO->isReg() && !isInlineConstant(*MO, OpInfo)) {
+      UsedLiteral = MO;
+      if (!LiteralLimit--)
+        return false;
+    }
 
     SmallDenseSet<RegSubRegPair> SGPRsUsed;
     if (MO->isReg())
@@ -5943,6 +5948,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
         }
       } else if (AMDGPU::isSISrcOperand(InstDesc, i) &&
                  !isInlineConstant(Op, InstDesc.operands()[i])) {
+        // The same literal may be used multiple times.
+        if (!UsedLiteral)
+          UsedLiteral = &Op;
+        else if (UsedLiteral->isIdenticalTo(Op))
+          continue;
+
         if (!LiteralLimit--)
           return false;
         if (--ConstantBusLimit <= 0)
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index 12e8d24cb3675..ade7b4266e9e6 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -2162,8 +2162,7 @@ body:             |
     ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
     ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
     ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
-    ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
     ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
     ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
     ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2178,8 +2177,7 @@ body:             |
     ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
     ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
     ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
-    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
-    ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, dead renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
     ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
     ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
     ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2315,8 +2313,7 @@ body:             |
     ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
     ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
     ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
-    ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
     ; GFX11-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
     ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
     ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2332,8 +2329,7 @@ body:             |
     ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
     ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
     ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
-    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
-    ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
     ; GFX12-NEXT: renamable $vgpr0, renamable $sgpr8_sgpr9 = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
     ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
     ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2469,8 +2465,7 @@ body:             |
     ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
     ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
     ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
-    ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
     ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
     ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
     ; GFX11-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2485,8 +2480,7 @@ body:             |
     ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
     ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
     ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
-    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
-    ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, dead renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
     ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
     ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
     ; GFX12-NEXT: $sgpr33 = frame-destroy COPY $sgpr4
@@ -2622,8 +2616,7 @@ body:             |
     ; GFX11-NEXT: $sgpr5 = frame-setup COPY $sgpr34
     ; GFX11-NEXT: $sgpr34 = frame-setup COPY $sgpr32
     ; GFX11-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 32768, implicit-def dead $scc
-    ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
-    ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 12352, killed $vgpr1, 0, implicit $exec
+    ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 12352, 0, implicit $exec
     ; GFX11-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
     ; GFX11-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
     ; GFX11-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
@@ -2639,8 +2632,7 @@ body:             |
     ; GFX12-NEXT: $sgpr5 = frame-setup COPY $sgpr34
     ; GFX12-NEXT: $sgpr34 = frame-setup COPY $sgpr32
     ; GFX12-NEXT: $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24576, implicit-def dead $scc
-    ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr33, implicit $exec
-    ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 4160, killed $vgpr1, 0, implicit $exec
+    ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr33, 4160, 0, implicit $exec
     ; GFX12-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr0, 0, 0, implicit $exec
     ; GFX12-NEXT: $sgpr32 = frame-destroy COPY $sgpr34
     ; GFX12-NEXT: $sgpr34 = frame-destroy COPY $sgpr5
diff --git a/llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir b/llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir
index 95929b5d97f23..e71516e74f17e 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-literal-multiple-gfx10.mir
@@ -35,8 +35,7 @@ body:             |
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1178657792, implicit $exec
-    ; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; CHECK-NEXT: [[V_DIV_SCALE_F32_e64_:%[0-9]+]]:vgpr_32, [[V_DIV_SCALE_F32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_DIV_SCALE_F32_e64 0, 1178657792, 0, 1178657792, 0, 1178657792, 0, 0, implicit $mode, implicit $exec
     ; CHECK-NEXT: S_ENDPGM 0, implicit [[V_DIV_SCALE_F32_e64_]]
     %0:vgpr_32 = COPY $vgpr0
     %1:sreg_32 = S_MOV_B32 1178657792
diff --git a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
index 268a8a4783d24..edd5d0a119e5f 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-vgpr-copy.mir
@@ -55,8 +55,7 @@ body:             |
 
 # GCN-LABEL: name: fma_sgpr_sgpr_use
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
-# GCN-NEXT: %2:vgpr_32 = V_MOV_B32_e32 1234567, implicit $exec
-# GCN-NEXT: %3:vgpr_32 = V_FMAC_F32_e64 0, %0, 0, 1234567, 0, %2, 0, 0, implicit $mode, implicit $exec
+# GCN: %3:vgpr_32 = V_FMA_F32_e64 0, %0, 0, 1234567, 0, 1234567, 0, 0, implicit $mode, implicit $exec
 ---
 name: fma_sgpr_sgpr_use
 body: |



More information about the llvm-commits mailing list