[llvm] [AMDGPU] Simplify commuted operand handling. NFCI. (PR #71965)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 10 09:46:16 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
SIInstrInfo::commuteInstructionImpl should accept indices to commute in
either order. This simplifies SIFoldOperands::tryAddToFoldList where
OtherIdx, CommuteIdx0 and CommuteIdx1 are no longer needed.
---
Full diff: https://github.com/llvm/llvm-project/pull/71965.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+25-40)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 1ebfa297f4fc339..3f7126c156c7365 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -429,63 +429,48 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
if (isUseMIInFoldList(FoldList, MI))
return false;
- unsigned CommuteOpNo = OpNo;
-
// Operand is not legal, so try to commute the instruction to
// see if this makes it possible to fold.
- unsigned CommuteIdx0 = TargetInstrInfo::CommuteAnyOperandIndex;
- unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex;
- bool CanCommute = TII->findCommutedOpIndices(*MI, CommuteIdx0, CommuteIdx1);
-
- if (CanCommute) {
- if (CommuteIdx0 == OpNo)
- CommuteOpNo = CommuteIdx1;
- else if (CommuteIdx1 == OpNo)
- CommuteOpNo = CommuteIdx0;
- }
-
+ unsigned CommuteOpNo = TargetInstrInfo::CommuteAnyOperandIndex;
+ bool CanCommute = TII->findCommutedOpIndices(*MI, OpNo, CommuteOpNo);
+ if (!CanCommute)
+ return false;
// One of operands might be an Imm operand, and OpNo may refer to it after
// the call of commuteInstruction() below. Such situations are avoided
// here explicitly as OpNo must be a register operand to be a candidate
// for memory folding.
- if (CanCommute && (!MI->getOperand(CommuteIdx0).isReg() ||
- !MI->getOperand(CommuteIdx1).isReg()))
+ if (!MI->getOperand(OpNo).isReg() || !MI->getOperand(CommuteOpNo).isReg())
return false;
- if (!CanCommute ||
- !TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
+ if (!TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo))
return false;
+ int Op32 = -1;
if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
- if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
- Opc == AMDGPU::V_SUB_CO_U32_e64 ||
- Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME
- (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
-
- // Verify the other operand is a VGPR, otherwise we would violate the
- // constant bus restriction.
- unsigned OtherIdx = CommuteOpNo == CommuteIdx0 ? CommuteIdx1 : CommuteIdx0;
- MachineOperand &OtherOp = MI->getOperand(OtherIdx);
- if (!OtherOp.isReg() ||
- !TII->getRegisterInfo().isVGPR(*MRI, OtherOp.getReg()))
- return false;
-
- assert(MI->getOperand(1).isDef());
+ if ((Opc != AMDGPU::V_ADD_CO_U32_e64 &&
+ Opc != AMDGPU::V_SUB_CO_U32_e64 &&
+ Opc != AMDGPU::V_SUBREV_CO_U32_e64) || // FIXME
+ (!OpToFold->isImm() && !OpToFold->isFI() && !OpToFold->isGlobal())) {
+ TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo);
+ return false;
+ }
- // Make sure to get the 32-bit version of the commuted opcode.
- unsigned MaybeCommutedOpc = MI->getOpcode();
- int Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
+ // Verify the other operand is a VGPR, otherwise we would violate the
+ // constant bus restriction.
+ MachineOperand &OtherOp = MI->getOperand(OpNo);
+ if (!OtherOp.isReg() ||
+ !TII->getRegisterInfo().isVGPR(*MRI, OtherOp.getReg()))
+ return false;
- appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
- return true;
- }
+ assert(MI->getOperand(1).isDef());
- TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
- return false;
+ // Make sure to get the 32-bit version of the commuted opcode.
+ unsigned MaybeCommutedOpc = MI->getOpcode();
+ Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
}
- appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true);
+ appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0b11fd5f757cbe0..027b695c3bb1a74 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -2692,6 +2692,9 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
if (CommutedOpcode == -1)
return nullptr;
+ if (Src0Idx > Src1Idx)
+ std::swap(Src0Idx, Src1Idx);
+
assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
static_cast<int>(Src0Idx) &&
AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
``````````
</details>
https://github.com/llvm/llvm-project/pull/71965
More information about the llvm-commits
mailing list