[llvm-branch-commits] [llvm] AMDGPU: Fix tracking subreg defs when folding through reg_sequence (PR #140608)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon May 19 13:07:15 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
We weren't fully respecting the type of a def of an immediate vs.
the type at the use point. Refactor the folding logic to track the
value to fold, as well as a subregister to apply to the underlying
value. This is similar to how PeepholeOpt tracks subregisters (though
only for pure copy-like instructions, no constants).
Fixes #<!-- -->139317
---
Patch is 34.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140608.diff
4 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+242-164)
- (modified) llvm/test/CodeGen/AMDGPU/issue139317-bad-opsel-reg-sequence-fold.ll (+2-1)
- (modified) llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.gfx942.mir (+4-4)
- (modified) llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir (+2-2)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9bbc8e75fd31b..eb7fb94e25f5c 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -25,52 +25,151 @@ using namespace llvm;
namespace {
-struct FoldCandidate {
- MachineInstr *UseMI;
+/// Track a value we may want to fold into downstream users, applying
+/// subregister extracts along the way.
+struct FoldableDef {
union {
- MachineOperand *OpToFold;
+ MachineOperand *OpToFold = nullptr;
uint64_t ImmToFold;
int FrameIndexToFold;
};
- int ShrinkOpcode;
- unsigned UseOpNo;
+
+ /// Register class of the originally defined value.
+ const TargetRegisterClass *DefRC = nullptr;
+
+ /// Track the original defining instruction for the value.
+ const MachineInstr *DefMI = nullptr;
+
+ /// Subregister to apply to the value at the use point.
+ unsigned DefSubReg = AMDGPU::NoSubRegister;
+
+ /// Kind of value stored in the union.
MachineOperand::MachineOperandType Kind;
- bool Commuted;
- FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
- bool Commuted_ = false,
- int ShrinkOp = -1) :
- UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
- Kind(FoldOp->getType()),
- Commuted(Commuted_) {
- if (FoldOp->isImm()) {
- ImmToFold = FoldOp->getImm();
- } else if (FoldOp->isFI()) {
- FrameIndexToFold = FoldOp->getIndex();
+ FoldableDef() = delete;
+ FoldableDef(MachineOperand &FoldOp, const TargetRegisterClass *DefRC,
+ unsigned DefSubReg = AMDGPU::NoSubRegister)
+ : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.getType()) {
+
+ if (FoldOp.isImm()) {
+ ImmToFold = FoldOp.getImm();
+ } else if (FoldOp.isFI()) {
+ FrameIndexToFold = FoldOp.getIndex();
} else {
- assert(FoldOp->isReg() || FoldOp->isGlobal());
- OpToFold = FoldOp;
+ assert(FoldOp.isReg() || FoldOp.isGlobal());
+ OpToFold = &FoldOp;
}
+
+ DefMI = FoldOp.getParent();
}
- FoldCandidate(MachineInstr *MI, unsigned OpNo, int64_t FoldImm,
- bool Commuted_ = false, int ShrinkOp = -1)
- : UseMI(MI), ImmToFold(FoldImm), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
- Kind(MachineOperand::MO_Immediate), Commuted(Commuted_) {}
+ FoldableDef(int64_t FoldImm, const TargetRegisterClass *DefRC,
+ unsigned DefSubReg = AMDGPU::NoSubRegister)
+ : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg),
+ Kind(MachineOperand::MO_Immediate) {}
+
+ /// Copy the current def and apply \p SubReg to the value.
+ FoldableDef getWithSubReg(const SIRegisterInfo &TRI, unsigned SubReg) const {
+ FoldableDef Copy(*this);
+ Copy.DefSubReg = TRI.composeSubRegIndices(DefSubReg, SubReg);
+ return Copy;
+ }
+
+ bool isReg() const { return Kind == MachineOperand::MO_Register; }
+
+ Register getReg() const {
+ assert(isReg());
+ return OpToFold->getReg();
+ }
+
+ unsigned getSubReg() const {
+ assert(isReg());
+ return OpToFold->getSubReg();
+ }
+
+ bool isImm() const { return Kind == MachineOperand::MO_Immediate; }
bool isFI() const {
return Kind == MachineOperand::MO_FrameIndex;
}
- bool isImm() const {
- return Kind == MachineOperand::MO_Immediate;
+ int getFI() const {
+ assert(isFI());
+ return FrameIndexToFold;
}
- bool isReg() const {
- return Kind == MachineOperand::MO_Register;
+ bool isGlobal() const { return OpToFold->isGlobal(); }
+
+ /// Return the effective immediate value defined by this instruction, after
+ /// application of any subregister extracts which may exist between the use
+ /// and def instruction.
+ std::optional<int64_t> getEffectiveImmVal() const {
+ assert(isImm());
+ return SIInstrInfo::extractSubregFromImm(ImmToFold, DefSubReg);
}
- bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
+ /// Check if it is legal to fold this effective value into \p MI's \p OpNo
+ /// operand.
+ bool isOperandLegal(const SIInstrInfo &TII, const MachineInstr &MI,
+ unsigned OpIdx) const {
+ switch (Kind) {
+ case MachineOperand::MO_Immediate: {
+ std::optional<int64_t> ImmToFold = getEffectiveImmVal();
+ if (!ImmToFold)
+ return false;
+
+ // TODO: Should verify the subregister index is supported by the class
+ // TODO: Avoid the temporary MachineOperand
+ MachineOperand TmpOp = MachineOperand::CreateImm(*ImmToFold);
+ return TII.isOperandLegal(MI, OpIdx, &TmpOp);
+ }
+ case MachineOperand::MO_FrameIndex: {
+ if (DefSubReg != AMDGPU::NoSubRegister)
+ return false;
+ MachineOperand TmpOp = MachineOperand::CreateFI(FrameIndexToFold);
+ return TII.isOperandLegal(MI, OpIdx, &TmpOp);
+ }
+ default:
+ // TODO: Try to apply DefSubReg, for global address we can extract
+ // low/high.
+ if (DefSubReg != AMDGPU::NoSubRegister)
+ return false;
+ return TII.isOperandLegal(MI, OpIdx, OpToFold);
+ }
+
+ llvm_unreachable("covered MachineOperand kind switch");
+ }
+};
+
+struct FoldCandidate {
+ MachineInstr *UseMI;
+ FoldableDef Def;
+ int ShrinkOpcode;
+ unsigned UseOpNo;
+ bool Commuted;
+
+ FoldCandidate(MachineInstr *MI, unsigned OpNo, FoldableDef Def,
+ bool Commuted_ = false, int ShrinkOp = -1)
+ : UseMI(MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
+ Commuted(Commuted_) {}
+
+ bool isFI() const { return Def.Kind == MachineOperand::MO_FrameIndex; }
+
+ int getFI() const {
+ assert(isFI());
+ return Def.FrameIndexToFold;
+ }
+
+ bool isImm() const { return Def.isImm(); }
+
+ bool isReg() const { return Def.isReg(); }
+
+ Register getReg() const {
+ assert(isReg());
+ return Def.OpToFold->getReg();
+ }
+
+ bool isGlobal() const { return Def.Kind == MachineOperand::MO_GlobalAddress; }
bool needsShrink() const { return ShrinkOpcode != -1; }
};
@@ -84,7 +183,7 @@ class SIFoldOperandsImpl {
const SIMachineFunctionInfo *MFI;
bool frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
- const MachineOperand &OpToFold) const;
+ const FoldableDef &OpToFold) const;
// TODO: Just use TII::getVALUOp
unsigned convertToVALUOp(unsigned Opc, bool UseVOP3 = false) const {
@@ -112,11 +211,11 @@ class SIFoldOperandsImpl {
bool canUseImmWithOpSel(FoldCandidate &Fold) const;
- bool tryFoldImmWithOpSel(FoldCandidate &Fold) const;
+ bool tryFoldImmWithOpSel(FoldCandidate &Fold, int64_t ImmVal) const;
bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
- MachineOperand *OpToFold) const;
+ const FoldableDef &OpToFold) const;
bool isUseSafeToFold(const MachineInstr &MI,
const MachineOperand &UseMO) const;
@@ -135,12 +234,10 @@ class SIFoldOperandsImpl {
MachineOperand *SplatVal,
const TargetRegisterClass *SplatRC) const;
- bool tryToFoldACImm(MachineOperand &OpToFold, MachineInstr *UseMI,
+ bool tryToFoldACImm(const FoldableDef &OpToFold, MachineInstr *UseMI,
unsigned UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList) const;
- void foldOperand(MachineOperand &OpToFold,
- MachineInstr *UseMI,
- int UseOpIdx,
+ void foldOperand(FoldableDef OpToFold, MachineInstr *UseMI, int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
@@ -148,7 +245,7 @@ class SIFoldOperandsImpl {
bool tryConstantFoldOp(MachineInstr *MI) const;
bool tryFoldCndMask(MachineInstr &MI) const;
bool tryFoldZeroHighBits(MachineInstr &MI) const;
- bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
+ bool foldInstOperand(MachineInstr &MI, const FoldableDef &OpToFold) const;
bool foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const;
bool tryFoldFoldableCopy(MachineInstr &MI,
@@ -240,8 +337,8 @@ static unsigned macToMad(unsigned Opc) {
// TODO: Add heuristic that the frame index might not fit in the addressing mode
// immediate offset to avoid materializing in loops.
-bool SIFoldOperandsImpl::frameIndexMayFold(
- const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold) const {
+bool SIFoldOperandsImpl::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
+ const FoldableDef &OpToFold) const {
if (!OpToFold.isFI())
return false;
@@ -381,7 +478,8 @@ bool SIFoldOperandsImpl::canUseImmWithOpSel(FoldCandidate &Fold) const {
return true;
}
-bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
+bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold,
+ int64_t ImmVal) const {
MachineInstr *MI = Fold.UseMI;
MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
unsigned Opcode = MI->getOpcode();
@@ -391,8 +489,8 @@ bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
// If the literal can be inlined as-is, apply it and short-circuit the
// tests below. The main motivation for this is to avoid unintuitive
// uses of opsel.
- if (AMDGPU::isInlinableLiteralV216(Fold.ImmToFold, OpType)) {
- Old.ChangeToImmediate(Fold.ImmToFold);
+ if (AMDGPU::isInlinableLiteralV216(ImmVal, OpType)) {
+ Old.ChangeToImmediate(ImmVal);
return true;
}
@@ -415,10 +513,10 @@ bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
MachineOperand &Mod = MI->getOperand(ModIdx);
unsigned ModVal = Mod.getImm();
- uint16_t ImmLo = static_cast<uint16_t>(
- Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_0 ? 16 : 0));
- uint16_t ImmHi = static_cast<uint16_t>(
- Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_1 ? 16 : 0));
+ uint16_t ImmLo =
+ static_cast<uint16_t>(ImmVal >> (ModVal & SISrcMods::OP_SEL_0 ? 16 : 0));
+ uint16_t ImmHi =
+ static_cast<uint16_t>(ImmVal >> (ModVal & SISrcMods::OP_SEL_1 ? 16 : 0));
uint32_t Imm = (static_cast<uint32_t>(ImmHi) << 16) | ImmLo;
unsigned NewModVal = ModVal & ~(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1);
@@ -510,16 +608,20 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
assert(Old.isReg());
if (Fold.isImm() && canUseImmWithOpSel(Fold)) {
- if (tryFoldImmWithOpSel(Fold))
+ std::optional<int64_t> ImmVal = Fold.Def.getEffectiveImmVal();
+ if (!ImmVal)
+ return false;
+
+ if (tryFoldImmWithOpSel(Fold, *ImmVal))
return true;
// We can't represent the candidate as an inline constant. Try as a literal
// with the original opsel, checking constant bus limitations.
- MachineOperand New = MachineOperand::CreateImm(Fold.ImmToFold);
+ MachineOperand New = MachineOperand::CreateImm(*ImmVal);
int OpNo = MI->getOperandNo(&Old);
if (!TII->isOperandLegal(*MI, OpNo, &New))
return false;
- Old.ChangeToImmediate(Fold.ImmToFold);
+ Old.ChangeToImmediate(*ImmVal);
return true;
}
@@ -575,22 +677,34 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
MI->setDesc(TII->get(NewMFMAOpc));
MI->untieRegOperand(0);
}
- Old.ChangeToImmediate(Fold.ImmToFold);
+
+ std::optional<int64_t> ImmVal = Fold.Def.getEffectiveImmVal();
+ if (!ImmVal)
+ return false;
+
+ // TODO: Should we try to avoid adding this to the candidate list?
+ MachineOperand New = MachineOperand::CreateImm(*ImmVal);
+ int OpNo = MI->getOperandNo(&Old);
+ if (!TII->isOperandLegal(*MI, OpNo, &New))
+ return false;
+
+ Old.ChangeToImmediate(*ImmVal);
return true;
}
if (Fold.isGlobal()) {
- Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
- Fold.OpToFold->getTargetFlags());
+ Old.ChangeToGA(Fold.Def.OpToFold->getGlobal(),
+ Fold.Def.OpToFold->getOffset(),
+ Fold.Def.OpToFold->getTargetFlags());
return true;
}
if (Fold.isFI()) {
- Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+ Old.ChangeToFrameIndex(Fold.getFI());
return true;
}
- MachineOperand *New = Fold.OpToFold;
+ MachineOperand *New = Fold.Def.OpToFold;
// Rework once the VS_16 register class is updated to include proper
// 16-bit SGPRs instead of 32-bit ones.
if (Old.getSubReg() == AMDGPU::lo16 && TRI->isSGPRReg(*MRI, New->getReg()))
@@ -613,26 +727,19 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
MachineInstr *MI, unsigned OpNo,
- MachineOperand *FoldOp, bool Commuted = false,
- int ShrinkOp = -1) {
- appendFoldCandidate(FoldList,
- FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
-}
-
-static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
- MachineInstr *MI, unsigned OpNo, int64_t ImmVal,
+ const FoldableDef &FoldOp,
bool Commuted = false, int ShrinkOp = -1) {
appendFoldCandidate(FoldList,
- FoldCandidate(MI, OpNo, ImmVal, Commuted, ShrinkOp));
+ FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
}
bool SIFoldOperandsImpl::tryAddToFoldList(
SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *MI, unsigned OpNo,
- MachineOperand *OpToFold) const {
+ const FoldableDef &OpToFold) const {
const unsigned Opc = MI->getOpcode();
auto tryToFoldAsFMAAKorMK = [&]() {
- if (!OpToFold->isImm())
+ if (!OpToFold.isImm())
return false;
const bool TryAK = OpNo == 3;
@@ -665,8 +772,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
return false;
};
- bool IsLegal = TII->isOperandLegal(*MI, OpNo, OpToFold);
- if (!IsLegal && OpToFold->isImm()) {
+ bool IsLegal = OpToFold.isOperandLegal(*TII, *MI, OpNo);
+ if (!IsLegal && OpToFold.isImm()) {
FoldCandidate Fold(MI, OpNo, OpToFold);
IsLegal = canUseImmWithOpSel(Fold);
}
@@ -700,7 +807,7 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
}
// Special case for s_setreg_b32
- if (OpToFold->isImm()) {
+ if (OpToFold.isImm()) {
unsigned ImmOpc = 0;
if (Opc == AMDGPU::S_SETREG_B32)
ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
@@ -741,10 +848,10 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
return false;
int Op32 = -1;
- if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
+ if (!OpToFold.isOperandLegal(*TII, *MI, CommuteOpNo)) {
if ((Opc != AMDGPU::V_ADD_CO_U32_e64 && Opc != AMDGPU::V_SUB_CO_U32_e64 &&
Opc != AMDGPU::V_SUBREV_CO_U32_e64) || // FIXME
- (!OpToFold->isImm() && !OpToFold->isFI() && !OpToFold->isGlobal())) {
+ (!OpToFold.isImm() && !OpToFold.isFI() && !OpToFold.isGlobal())) {
TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo);
return false;
}
@@ -763,7 +870,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
}
- appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
+ appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, /*Commuted=*/true,
+ Op32);
return true;
}
@@ -938,7 +1046,7 @@ MachineOperand *SIFoldOperandsImpl::tryFoldRegSeqSplat(
}
bool SIFoldOperandsImpl::tryToFoldACImm(
- MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
+ const FoldableDef &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList) const {
const MCInstrDesc &Desc = UseMI->getDesc();
if (UseOpIdx >= Desc.getNumOperands())
@@ -949,27 +1057,9 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
return false;
MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
- if (OpToFold.isImm()) {
- if (unsigned UseSubReg = UseOp.getSubReg()) {
- std::optional<int64_t> SubImm =
- SIInstrInfo::extractSubregFromImm(OpToFold.getImm(), UseSubReg);
- if (!SubImm)
- return false;
-
- // TODO: Avoid the temporary MachineOperand
- MachineOperand TmpOp = MachineOperand::CreateImm(*SubImm);
- if (TII->isOperandLegal(*UseMI, UseOpIdx, &TmpOp)) {
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, *SubImm);
- return true;
- }
-
- return false;
- }
-
- if (TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
- return true;
- }
+ if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
+ appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
+ return true;
}
// TODO: Verify the following code handles subregisters correctly.
@@ -985,11 +1075,17 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
return false;
// Maybe it is just a COPY of an immediate itself.
+
+ // FIXME: Remove this handling. There is already special case folding of
+ // immediate into copy in foldOperand. This is looking for the def of the
+ // value the folding started from in the first place.
MachineInstr *Def = MRI->getVRegDef(UseReg);
if (Def && TII->isFoldableCopy(*Def)) {
MachineOperand &DefOp = Def->getOperand(1);
if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
- appendFoldCandidate(FoldList, UseMI, UseOpIdx, &DefOp);
+ FoldableDef FoldableImm(DefOp.getImm(), OpToFold.DefRC,
+ OpToFold.DefSubReg);
+ appendFoldCandidate(FoldList, UseMI, UseOpIdx, FoldableImm);
return true;
}
}
@@ -998,7 +1094,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
}
void SIFoldOperandsImpl::foldOperand(
- MachineOperand &OpToFold, MachineInstr *UseMI, int UseOpIdx,
+ FoldableDef OpToFold, MachineInstr *UseMI, int UseOpIdx,
SmallVectorImpl<FoldCandidate> &FoldList,
SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
const MachineOperand *UseOp = &UseMI->getOperand(UseOpIdx);
@@ -1038,18 +1134,20 @@ void SIFoldOperandsImpl::foldOperand(
if (SplatVal) {
if (MachineOperand *Foldable =
tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
- appendFoldCandidate(FoldList, RSUseMI, OpNo, Foldable);
+ FoldableDef SplatDef(*Foldable, SplatRC);
+ appendFoldCandidate(FoldList, RSUseMI, OpNo, SplatDef);
continue;
}
}
+ // TODO: Handle general compose
if (RSUse->getSubReg() != RegSeqDstSubReg)
continue;
- if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI, OpNo, FoldList))
- continue;
-
- foldOperand(OpToFold, RSUseMI, OpNo, FoldList, CopiesToReplace);
+ // FIXME: We should avoid recursing here. There should be a cleaner split
+ // between the in-place mutations and adding to the fold list.
+ foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
+ CopiesToReplace);
}
return;
@@ -1077,7 +1175,7 @@ void SIFoldOperandsImpl::foldOperand(
// A frame index will resolve to a positive constant, so it should always be
// safe to fold the addressing mode, even pre-GFX9.
- UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
+ UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getFI());
const unsigned Opc = UseMI->getOpcode();
if (TII->isFLATScratch(*UseMI) &&
@@ -1107,11 +1205,12 @@ void SIFoldOperandsImpl::foldOperand(
return;
const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
- if (!DestReg.isPhysical()) {
- if (DestRC == &AMDGPU::AGPR_32RegClass &&
- TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
+ if (!DestReg.isPhysical() && DestRC == &AMDGPU::AGPR_32RegClass) {
+ std::optional<int64_t> UseImmVal = OpToFold.getEffectiveImmVal();
+ if (UseImmVal && TII->isInlineConstant(
+ ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/140608
More information about the llvm-branch-commits
mailing list