[llvm-branch-commits] [llvm] AMDGPU: Remove redundant operand folding checks (PR #140587)

Wed May 21 03:01:13 PDT 2025

https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/140587

>From 82fd0c72c28b7bb3694d58fe4b9b885ec320d914 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 19 May 2025 20:02:54 +0200
Subject: [PATCH 1/2] AMDGPU: Remove redundant operand folding checks

This was pre-filtering out a specific situation from being
added to the fold candidate list. The operand legality will
ultimately be checked with isOperandLegal before the fold is
performed, so I don't see the plus in pre-filtering this one
case.
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     |  18 ----
 .../AMDGPU/fold-operands-frame-index.mir      | 101 ++++++++++++++++++
 2 files changed, 101 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index b62230d4dc28c..9bbc8e75fd31b 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -778,24 +778,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
       return true;
   }
 
-  // Check the case where we might introduce a second constant operand to a
-  // scalar instruction
-  if (TII->isSALU(MI->getOpcode())) {
-    const MCInstrDesc &InstDesc = MI->getDesc();
-    const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
-
-    // Fine if the operand can be encoded as an inline constant
-    if (!OpToFold->isReg() && !TII->isInlineConstant(*OpToFold, OpInfo)) {
-      // Otherwise check for another constant
-      for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) {
-        auto &Op = MI->getOperand(i);
-        if (OpNo != i && !Op.isReg() &&
-            !TII->isInlineConstant(Op, InstDesc.operands()[i]))
-          return false;
-      }
-    }
-  }
-
   appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
   return true;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
index 4417f205646ee..7fad2f466bc9f 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
@@ -573,3 +573,104 @@ body:             |
     S_ENDPGM 0, implicit %2
 
 ...
+
+---
+name:            no_fold_multiple_fi_s_cselect_b32
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 64, alignment: 4 }
+  - { id: 1, size: 32, alignment: 4 }
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: no_fold_multiple_fi_s_cselect_b32
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.1
+    ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_MOV_B32_]], %stack.0, implicit undef $scc
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+    %0:sreg_32 = S_MOV_B32 %stack.0
+    %1:sreg_32 = S_MOV_B32 %stack.1
+    %2:sreg_32 = S_CSELECT_B32 killed %1, killed %0, implicit undef $scc
+    S_ENDPGM 0, implicit %2
+
+...
+
+---
+name:            no_fold_multiple_fi_v_cndmask_b32_e64
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 64, alignment: 4 }
+  - { id: 1, size: 32, alignment: 4 }
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+    ; GFX9-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64
+    ; GFX9: liveins: $sgpr8_sgpr9
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+    ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[V_MOV_B32_e32_]], 0, killed [[V_MOV_B32_e32_1]], [[COPY]], implicit $exec
+    ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+    ;
+    ; GFX10-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64
+    ; GFX10: liveins: $sgpr8_sgpr9
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+    ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+    ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, %stack.0, 0, killed [[V_MOV_B32_e32_]], [[COPY]], implicit $exec
+    ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+    ;
+    ; GFX12-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64
+    ; GFX12: liveins: $sgpr8_sgpr9
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+    ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+    ; GFX12-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, %stack.0, 0, killed [[V_MOV_B32_e32_]], [[COPY]], implicit $exec
+    ; GFX12-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+    %0:sreg_64_xexec = COPY $sgpr8_sgpr9
+    %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+    %3:vgpr_32 = V_CNDMASK_B32_e64 0, killed %1, 0, killed %2, %0, implicit $exec
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name:            no_fold_multiple_fi_v_cndmask_b32_e32
+tracksRegLiveness: true
+stack:
+  - { id: 0, size: 64, alignment: 4 }
+  - { id: 1, size: 32, alignment: 4 }
+body:             |
+  bb.0:
+    liveins: $sgpr8_sgpr9
+    ; GFX9-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32
+    ; GFX9: liveins: $sgpr8_sgpr9
+    ; GFX9-NEXT: {{  $}}
+    ; GFX9-NEXT: $vcc = COPY $sgpr8_sgpr9
+    ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+    ; GFX9-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 killed [[V_MOV_B32_e32_]], killed [[V_MOV_B32_e32_1]], implicit $vcc, implicit $exec
+    ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]]
+    ;
+    ; GFX10-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32
+    ; GFX10: liveins: $sgpr8_sgpr9
+    ; GFX10-NEXT: {{  $}}
+    ; GFX10-NEXT: $vcc = COPY $sgpr8_sgpr9
+    ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+    ; GFX10-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 %stack.0, killed [[V_MOV_B32_e32_]], implicit $vcc, implicit $exec
+    ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]]
+    ;
+    ; GFX12-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32
+    ; GFX12: liveins: $sgpr8_sgpr9
+    ; GFX12-NEXT: {{  $}}
+    ; GFX12-NEXT: $vcc = COPY $sgpr8_sgpr9
+    ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+    ; GFX12-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 %stack.0, killed [[V_MOV_B32_e32_]], implicit $vcc, implicit $exec
+    ; GFX12-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]]
+    $vcc = COPY $sgpr8_sgpr9
+    %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+    %3:vgpr_32 = V_CNDMASK_B32_e32 killed %1, killed %2, implicit $vcc, implicit $exec
+    S_ENDPGM 0, implicit %3
+
+...

>From a372f44f105ffde627409890c1a6e8d6f3696ca6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 20 May 2025 21:28:58 +0200
Subject: [PATCH 2/2] no multiple fi folding

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 85276bd24bcf4..6ce1a0ce34532 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6063,6 +6063,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
           !isInlineConstant(Op, InstDesc.operands()[i]) &&
           !Op.isIdenticalTo(*MO))
         return false;
+
+      // Do not fold a frame index into an instruction that already has a frame
+      // index. The frame index handling code doesn't handle fixing up operand
+      // constraints if there are multiple indexes.
+      if (Op.isFI() && MO->isFI())
+        return false;
     }
   } else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
              isF16PseudoScalarTrans(MI.getOpcode())) {