[llvm-branch-commits] [llvm] AMDGPU: Remove redundant operand folding checks (PR #140587)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed May 21 03:01:13 PDT 2025
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/140587
>From 82fd0c72c28b7bb3694d58fe4b9b885ec320d914 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Mon, 19 May 2025 20:02:54 +0200
Subject: [PATCH 1/2] AMDGPU: Remove redundant operand folding checks
This was pre-filtering out a specific situation from being
added to the fold candidate list. The operand legality will
ultimately be checked with isOperandLegal before the fold is
performed, so I don't see the plus in pre-filtering this one
case.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 18 ----
.../AMDGPU/fold-operands-frame-index.mir | 101 ++++++++++++++++++
2 files changed, 101 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index b62230d4dc28c..9bbc8e75fd31b 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -778,24 +778,6 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
return true;
}
- // Check the case where we might introduce a second constant operand to a
- // scalar instruction
- if (TII->isSALU(MI->getOpcode())) {
- const MCInstrDesc &InstDesc = MI->getDesc();
- const MCOperandInfo &OpInfo = InstDesc.operands()[OpNo];
-
- // Fine if the operand can be encoded as an inline constant
- if (!OpToFold->isReg() && !TII->isInlineConstant(*OpToFold, OpInfo)) {
- // Otherwise check for another constant
- for (unsigned i = 0, e = InstDesc.getNumOperands(); i != e; ++i) {
- auto &Op = MI->getOperand(i);
- if (OpNo != i && !Op.isReg() &&
- !TII->isInlineConstant(Op, InstDesc.operands()[i]))
- return false;
- }
- }
- }
-
appendFoldCandidate(FoldList, MI, OpNo, OpToFold);
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
index 4417f205646ee..7fad2f466bc9f 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-operands-frame-index.mir
@@ -573,3 +573,104 @@ body: |
S_ENDPGM 0, implicit %2
...
+
+---
+name: no_fold_multiple_fi_s_cselect_b32
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 64, alignment: 4 }
+ - { id: 1, size: 32, alignment: 4 }
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: no_fold_multiple_fi_s_cselect_b32
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 %stack.1
+ ; CHECK-NEXT: [[S_CSELECT_B32_:%[0-9]+]]:sreg_32 = S_CSELECT_B32 killed [[S_MOV_B32_]], %stack.0, implicit undef $scc
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_CSELECT_B32_]]
+ %0:sreg_32 = S_MOV_B32 %stack.0
+ %1:sreg_32 = S_MOV_B32 %stack.1
+ %2:sreg_32 = S_CSELECT_B32 killed %1, killed %0, implicit undef $scc
+ S_ENDPGM 0, implicit %2
+
+...
+
+---
+name: no_fold_multiple_fi_v_cndmask_b32_e64
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 64, alignment: 4 }
+ - { id: 1, size: 32, alignment: 4 }
+body: |
+ bb.0:
+ liveins: $sgpr8_sgpr9
+ ; GFX9-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64
+ ; GFX9: liveins: $sgpr8_sgpr9
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+ ; GFX9-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[V_MOV_B32_e32_]], 0, killed [[V_MOV_B32_e32_1]], [[COPY]], implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ;
+ ; GFX10-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64
+ ; GFX10: liveins: $sgpr8_sgpr9
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+ ; GFX10-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, %stack.0, 0, killed [[V_MOV_B32_e32_]], [[COPY]], implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ ;
+ ; GFX12-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e64
+ ; GFX12: liveins: $sgpr8_sgpr9
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY $sgpr8_sgpr9
+ ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+ ; GFX12-NEXT: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, %stack.0, 0, killed [[V_MOV_B32_e32_]], [[COPY]], implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e64_]]
+ %0:sreg_64_xexec = COPY $sgpr8_sgpr9
+ %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+ %3:vgpr_32 = V_CNDMASK_B32_e64 0, killed %1, 0, killed %2, %0, implicit $exec
+ S_ENDPGM 0, implicit %3
+
+...
+
+---
+name: no_fold_multiple_fi_v_cndmask_b32_e32
+tracksRegLiveness: true
+stack:
+ - { id: 0, size: 64, alignment: 4 }
+ - { id: 1, size: 32, alignment: 4 }
+body: |
+ bb.0:
+ liveins: $sgpr8_sgpr9
+ ; GFX9-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32
+ ; GFX9: liveins: $sgpr8_sgpr9
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: $vcc = COPY $sgpr8_sgpr9
+ ; GFX9-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ ; GFX9-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+ ; GFX9-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 killed [[V_MOV_B32_e32_]], killed [[V_MOV_B32_e32_1]], implicit $vcc, implicit $exec
+ ; GFX9-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]]
+ ;
+ ; GFX10-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32
+ ; GFX10: liveins: $sgpr8_sgpr9
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: $vcc = COPY $sgpr8_sgpr9
+ ; GFX10-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+ ; GFX10-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 %stack.0, killed [[V_MOV_B32_e32_]], implicit $vcc, implicit $exec
+ ; GFX10-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]]
+ ;
+ ; GFX12-LABEL: name: no_fold_multiple_fi_v_cndmask_b32_e32
+ ; GFX12: liveins: $sgpr8_sgpr9
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vcc = COPY $sgpr8_sgpr9
+ ; GFX12-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+ ; GFX12-NEXT: [[V_CNDMASK_B32_e32_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e32 %stack.0, killed [[V_MOV_B32_e32_]], implicit $vcc, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0, implicit [[V_CNDMASK_B32_e32_]]
+ $vcc = COPY $sgpr8_sgpr9
+ %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e32 %stack.1, implicit $exec
+ %3:vgpr_32 = V_CNDMASK_B32_e32 killed %1, killed %2, implicit $vcc, implicit $exec
+ S_ENDPGM 0, implicit %3
+
+...
>From a372f44f105ffde627409890c1a6e8d6f3696ca6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Tue, 20 May 2025 21:28:58 +0200
Subject: [PATCH 2/2] no multiple fi folding
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 85276bd24bcf4..6ce1a0ce34532 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -6063,6 +6063,12 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
!isInlineConstant(Op, InstDesc.operands()[i]) &&
!Op.isIdenticalTo(*MO))
return false;
+
+ // Do not fold a frame index into an instruction that already has a frame
+ // index. The frame index handling code doesn't handle fixing up operand
+ // constraints if there are multiple indexes.
+ if (Op.isFI() && MO->isFI())
+ return false;
}
} else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
isF16PseudoScalarTrans(MI.getOpcode())) {
More information about the llvm-branch-commits
mailing list