[llvm] [AMDGPU] Simplify commuted operand handling. NFCI. (PR #71965)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 10 11:11:39 PST 2023
https://github.com/jayfoad updated https://github.com/llvm/llvm-project/pull/71965
>From cd283f02bb24415b849b0cbf3bdbf1f9cf71711b Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 10 Nov 2023 17:43:14 +0000
Subject: [PATCH 1/2] [AMDGPU] Simplify commuted operand handling. NFCI.
SIInstrInfo::commuteInstructionImpl should accept indices to commute in
either order. This simplifies SIFoldOperands::tryAddToFoldList where
OtherIdx, CommuteIdx0 and CommuteIdx1 are no longer needed.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 65 +++++++++--------------
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 3 ++
2 files changed, 28 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 1ebfa297f4fc339..3f7126c156c7365 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -429,63 +429,48 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (isUseMIInFoldList(FoldList, MI))
return false;
- unsigned CommuteOpNo = OpNo;
-
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
- unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
- unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
- bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
-
- if (CanCommute) {
- if (CommuteIdx0 == OpNo)
- CommuteOpNo = CommuteIdx1;
- else if (CommuteIdx1 == OpNo)
- CommuteOpNo = CommuteIdx0;
- }
-
+ unsigned CommuteOpNo = TargetInstrInfo::CommuteAnyOperandIndex;
+ bool CanCommute = TII->findCommutedOpIndices(*MI, OpNo, CommuteOpNo);
+ if (!CanCommute)
+ return false;
// One of operands might be an Imm operand, and OpNo may refer to it after
// the call of commuteInstruction() below. Such situations are avoided
// here explicitly as OpNo must be a register operand to be a candidate
// for memory folding.
- if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
- !MI->getOperand(CommuteIdx1).isReg()))
+ if (!MI->getOperand(OpNo).isReg() || !MI->getOperand(CommuteOpNo).isReg())
return false;
- if (!CanCommute ||
- !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
+ if (!TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo))
return false;
+ int Op32 = -1;
if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
- if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
- Opc == AMDGPU::V_SUB_CO_U32_e64 ||
- Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME
- (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
-
- // Verify the other operand is a VGPR, otherwise we would violate the
- // constant bus restriction.
- unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
- MachineOperand &OtherOp = MI->getOperand(OtherIdx);
- if (!OtherOp.isReg() ||
- !TII->getRegisterInfo().isVGPR(*MRI, OtherOp.getReg()))
- return false;
-
- assert(MI->getOperand(1).isDef());
+ if ((Opc != AMDGPU::V_ADD_CO_U32_e64 &&
+ Opc != AMDGPU::V_SUB_CO_U32_e64 &&
+ Opc != AMDGPU::V_SUBREV_CO_U32_e64) || // FIXME
+ (!OpToFold->isImm() && !OpToFold->isFI() && !OpToFold->isGlobal())) {
+ TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo);
+ return false;
+ }
- // Make sure to get the 32-bit version of the commuted opcode.
- unsigned MaybeCommutedOpc = MI->getOpcode();
- int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
+ // Verify the other operand is a VGPR, otherwise we would violate the
+ // constant bus restriction.
+ MachineOperand &OtherOp = MI->getOperand(OpNo);
+ if (!OtherOp.isReg() ||
+ !TII->getRegisterInfo().isVGPR(*MRI, OtherOp.getReg()))
+ return false;
- appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
- return true;
- }
+ assert(MI->getOperand(1).isDef());
- TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
- return false;
+ // Make sure to get the 32-bit version of the commuted opcode.
+ unsigned MaybeCommutedOpc = MI->getOpcode();
+ Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
}
- appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
+ appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0b11fd5f757cbe0..027b695c3bb1a74 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2692,6 +2692,9 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if (CommutedOpcode == -1)
return nullptr;
+ if (Src0Idx > Src1Idx)
+ std::swap(Src0Idx, Src1Idx);
+
assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
static_cast<int>(Src0Idx) &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
>From 17a0e06a150a14fa278b1785966d5b341f602de3 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 10 Nov 2023 19:11:23 +0000
Subject: [PATCH 2/2] clang-format
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 3f7126c156c7365..3ee4b99e69cddea 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -448,8 +448,7 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
int Op32 = -1;
if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
- if ((Opc != AMDGPU::V_ADD_CO_U32_e64 &&
- Opc != AMDGPU::V_SUB_CO_U32_e64 &&
+ if ((Opc != AMDGPU::V_ADD_CO_U32_e64 && Opc != AMDGPU::V_SUB_CO_U32_e64 &&
Opc != AMDGPU::V_SUBREV_CO_U32_e64) || // FIXME
(!OpToFold->isImm() && !OpToFold->isFI() && !OpToFold->isGlobal())) {
TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo);
More information about the llvm-commits
mailing list