[llvm-branch-commits] [llvm] AMDGPU: Fix tracking subreg defs when folding through reg_sequence (PR #140608)

Mon May 19 13:07:15 PDT 2025

llvmbot wrote:




@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

<details>
<summary>Changes</summary>

We weren't fully respecting the type of a def of an immediate vs.
the type at the use point. Refactor the folding logic to track the
value to fold, as well as a subregister to apply to the underlying
value. This is similar to how PeepholeOpt tracks subregisters (though
only for pure copy-like instructions, no constants).

Fixes #139317

---

Patch is 34.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/140608.diff


4 Files Affected:

- (modified) llvm/lib/Target/AMDGPU/SIFoldOperands.cpp (+242-164) 
- (modified) llvm/test/CodeGen/AMDGPU/issue139317-bad-opsel-reg-sequence-fold.ll (+2-1) 
- (modified) llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.gfx942.mir (+4-4) 
- (modified) llvm/test/CodeGen/AMDGPU/si-fold-operands-subreg-imm.mir (+2-2) 


``````````diff

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 9bbc8e75fd31b..eb7fb94e25f5c 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -25,52 +25,151 @@ using namespace llvm;
 
 namespace {
 
-struct FoldCandidate {
-  MachineInstr *UseMI;
+/// Track a value we may want to fold into downstream users, applying
+/// subregister extracts along the way.
+struct FoldableDef {
   union {
-    MachineOperand *OpToFold;
+    MachineOperand *OpToFold = nullptr;
     uint64_t ImmToFold;
     int FrameIndexToFold;
   };
-  int ShrinkOpcode;
-  unsigned UseOpNo;
+
+  /// Register class of the originally defined value.
+  const TargetRegisterClass *DefRC = nullptr;
+
+  /// Track the original defining instruction for the value.
+  const MachineInstr *DefMI = nullptr;
+
+  /// Subregister to apply to the value at the use point.
+  unsigned DefSubReg = AMDGPU::NoSubRegister;
+
+  /// Kind of value stored in the union.
   MachineOperand::MachineOperandType Kind;
-  bool Commuted;
 
-  FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
-                bool Commuted_ = false,
-                int ShrinkOp = -1) :
-    UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
-    Kind(FoldOp->getType()),
-    Commuted(Commuted_) {
-    if (FoldOp->isImm()) {
-      ImmToFold = FoldOp->getImm();
-    } else if (FoldOp->isFI()) {
-      FrameIndexToFold = FoldOp->getIndex();
+  FoldableDef() = delete;
+  FoldableDef(MachineOperand &FoldOp, const TargetRegisterClass *DefRC,
+              unsigned DefSubReg = AMDGPU::NoSubRegister)
+      : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.getType()) {
+
+    if (FoldOp.isImm()) {
+      ImmToFold = FoldOp.getImm();
+    } else if (FoldOp.isFI()) {
+      FrameIndexToFold = FoldOp.getIndex();
     } else {
-      assert(FoldOp->isReg() || FoldOp->isGlobal());
-      OpToFold = FoldOp;
+      assert(FoldOp.isReg() || FoldOp.isGlobal());
+      OpToFold = &FoldOp;
     }
+
+    DefMI = FoldOp.getParent();
   }
 
-  FoldCandidate(MachineInstr *MI, unsigned OpNo, int64_t FoldImm,
-                bool Commuted_ = false, int ShrinkOp = -1)
-      : UseMI(MI), ImmToFold(FoldImm), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
-        Kind(MachineOperand::MO_Immediate), Commuted(Commuted_) {}
+  FoldableDef(int64_t FoldImm, const TargetRegisterClass *DefRC,
+              unsigned DefSubReg = AMDGPU::NoSubRegister)
+      : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg),
+        Kind(MachineOperand::MO_Immediate) {}
+
+  /// Copy the current def and apply \p SubReg to the value.
+  FoldableDef getWithSubReg(const SIRegisterInfo &TRI, unsigned SubReg) const {
+    FoldableDef Copy(*this);
+    Copy.DefSubReg = TRI.composeSubRegIndices(DefSubReg, SubReg);
+    return Copy;
+  }
+
+  bool isReg() const { return Kind == MachineOperand::MO_Register; }
+
+  Register getReg() const {
+    assert(isReg());
+    return OpToFold->getReg();
+  }
+
+  unsigned getSubReg() const {
+    assert(isReg());
+    return OpToFold->getSubReg();
+  }
+
+  bool isImm() const { return Kind == MachineOperand::MO_Immediate; }
 
   bool isFI() const {
     return Kind == MachineOperand::MO_FrameIndex;
   }
 
-  bool isImm() const {
-    return Kind == MachineOperand::MO_Immediate;
+  int getFI() const {
+    assert(isFI());
+    return FrameIndexToFold;
   }
 
-  bool isReg() const {
-    return Kind == MachineOperand::MO_Register;
+  bool isGlobal() const { return OpToFold->isGlobal(); }
+
+  /// Return the effective immediate value defined by this instruction, after
+  /// application of any subregister extracts which may exist between the use
+  /// and def instruction.
+  std::optional<int64_t> getEffectiveImmVal() const {
+    assert(isImm());
+    return SIInstrInfo::extractSubregFromImm(ImmToFold, DefSubReg);
   }
 
-  bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
+  /// Check if it is legal to fold this effective value into \p MI's \p OpNo
+  /// operand.
+  bool isOperandLegal(const SIInstrInfo &TII, const MachineInstr &MI,
+                      unsigned OpIdx) const {
+    switch (Kind) {
+    case MachineOperand::MO_Immediate: {
+      std::optional<int64_t> ImmToFold = getEffectiveImmVal();
+      if (!ImmToFold)
+        return false;
+
+      // TODO: Should verify the subregister index is supported by the class
+      // TODO: Avoid the temporary MachineOperand
+      MachineOperand TmpOp = MachineOperand::CreateImm(*ImmToFold);
+      return TII.isOperandLegal(MI, OpIdx, &TmpOp);
+    }
+    case MachineOperand::MO_FrameIndex: {
+      if (DefSubReg != AMDGPU::NoSubRegister)
+        return false;
+      MachineOperand TmpOp = MachineOperand::CreateFI(FrameIndexToFold);
+      return TII.isOperandLegal(MI, OpIdx, &TmpOp);
+    }
+    default:
+      // TODO: Try to apply DefSubReg, for global address we can extract
+      // low/high.
+      if (DefSubReg != AMDGPU::NoSubRegister)
+        return false;
+      return TII.isOperandLegal(MI, OpIdx, OpToFold);
+    }
+
+    llvm_unreachable("covered MachineOperand kind switch");
+  }
+};
+
+struct FoldCandidate {
+  MachineInstr *UseMI;
+  FoldableDef Def;
+  int ShrinkOpcode;
+  unsigned UseOpNo;
+  bool Commuted;
+
+  FoldCandidate(MachineInstr *MI, unsigned OpNo, FoldableDef Def,
+                bool Commuted_ = false, int ShrinkOp = -1)
+      : UseMI(MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
+        Commuted(Commuted_) {}
+
+  bool isFI() const { return Def.Kind == MachineOperand::MO_FrameIndex; }
+
+  int getFI() const {
+    assert(isFI());
+    return Def.FrameIndexToFold;
+  }
+
+  bool isImm() const { return Def.isImm(); }
+
+  bool isReg() const { return Def.isReg(); }
+
+  Register getReg() const {
+    assert(isReg());
+    return Def.OpToFold->getReg();
+  }
+
+  bool isGlobal() const { return Def.Kind == MachineOperand::MO_GlobalAddress; }
 
   bool needsShrink() const { return ShrinkOpcode != -1; }
 };
@@ -84,7 +183,7 @@ class SIFoldOperandsImpl {
   const SIMachineFunctionInfo *MFI;
 
   bool frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
-                         const MachineOperand &OpToFold) const;
+                         const FoldableDef &OpToFold) const;
 
   // TODO: Just use TII::getVALUOp
   unsigned convertToVALUOp(unsigned Opc, bool UseVOP3 = false) const {
@@ -112,11 +211,11 @@ class SIFoldOperandsImpl {
 
   bool canUseImmWithOpSel(FoldCandidate &Fold) const;
 
-  bool tryFoldImmWithOpSel(FoldCandidate &Fold) const;
+  bool tryFoldImmWithOpSel(FoldCandidate &Fold, int64_t ImmVal) const;
 
   bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
                         MachineInstr *MI, unsigned OpNo,
-                        MachineOperand *OpToFold) const;
+                        const FoldableDef &OpToFold) const;
   bool isUseSafeToFold(const MachineInstr &MI,
                        const MachineOperand &UseMO) const;
 
@@ -135,12 +234,10 @@ class SIFoldOperandsImpl {
                                      MachineOperand *SplatVal,
                                      const TargetRegisterClass *SplatRC) const;
 
-  bool tryToFoldACImm(MachineOperand &OpToFold, MachineInstr *UseMI,
+  bool tryToFoldACImm(const FoldableDef &OpToFold, MachineInstr *UseMI,
                       unsigned UseOpIdx,
                       SmallVectorImpl<FoldCandidate> &FoldList) const;
-  void foldOperand(MachineOperand &OpToFold,
-                   MachineInstr *UseMI,
-                   int UseOpIdx,
+  void foldOperand(FoldableDef OpToFold, MachineInstr *UseMI, int UseOpIdx,
                    SmallVectorImpl<FoldCandidate> &FoldList,
                    SmallVectorImpl<MachineInstr *> &CopiesToReplace) const;
 
@@ -148,7 +245,7 @@ class SIFoldOperandsImpl {
   bool tryConstantFoldOp(MachineInstr *MI) const;
   bool tryFoldCndMask(MachineInstr &MI) const;
   bool tryFoldZeroHighBits(MachineInstr &MI) const;
-  bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
+  bool foldInstOperand(MachineInstr &MI, const FoldableDef &OpToFold) const;
 
   bool foldCopyToAGPRRegSequence(MachineInstr *CopyMI) const;
   bool tryFoldFoldableCopy(MachineInstr &MI,
@@ -240,8 +337,8 @@ static unsigned macToMad(unsigned Opc) {
 
 // TODO: Add heuristic that the frame index might not fit in the addressing mode
 // immediate offset to avoid materializing in loops.
-bool SIFoldOperandsImpl::frameIndexMayFold(
-    const MachineInstr &UseMI, int OpNo, const MachineOperand &OpToFold) const {
+bool SIFoldOperandsImpl::frameIndexMayFold(const MachineInstr &UseMI, int OpNo,
+                                           const FoldableDef &OpToFold) const {
   if (!OpToFold.isFI())
     return false;
 
@@ -381,7 +478,8 @@ bool SIFoldOperandsImpl::canUseImmWithOpSel(FoldCandidate &Fold) const {
   return true;
 }
 
-bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
+bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold,
+                                             int64_t ImmVal) const {
   MachineInstr *MI = Fold.UseMI;
   MachineOperand &Old = MI->getOperand(Fold.UseOpNo);
   unsigned Opcode = MI->getOpcode();
@@ -391,8 +489,8 @@ bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
   // If the literal can be inlined as-is, apply it and short-circuit the
   // tests below. The main motivation for this is to avoid unintuitive
   // uses of opsel.
-  if (AMDGPU::isInlinableLiteralV216(Fold.ImmToFold, OpType)) {
-    Old.ChangeToImmediate(Fold.ImmToFold);
+  if (AMDGPU::isInlinableLiteralV216(ImmVal, OpType)) {
+    Old.ChangeToImmediate(ImmVal);
     return true;
   }
 
@@ -415,10 +513,10 @@ bool SIFoldOperandsImpl::tryFoldImmWithOpSel(FoldCandidate &Fold) const {
   MachineOperand &Mod = MI->getOperand(ModIdx);
   unsigned ModVal = Mod.getImm();
 
-  uint16_t ImmLo = static_cast<uint16_t>(
-      Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_0 ? 16 : 0));
-  uint16_t ImmHi = static_cast<uint16_t>(
-      Fold.ImmToFold >> (ModVal & SISrcMods::OP_SEL_1 ? 16 : 0));
+  uint16_t ImmLo =
+      static_cast<uint16_t>(ImmVal >> (ModVal & SISrcMods::OP_SEL_0 ? 16 : 0));
+  uint16_t ImmHi =
+      static_cast<uint16_t>(ImmVal >> (ModVal & SISrcMods::OP_SEL_1 ? 16 : 0));
   uint32_t Imm = (static_cast<uint32_t>(ImmHi) << 16) | ImmLo;
   unsigned NewModVal = ModVal & ~(SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1);
 
@@ -510,16 +608,20 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
   assert(Old.isReg());
 
   if (Fold.isImm() && canUseImmWithOpSel(Fold)) {
-    if (tryFoldImmWithOpSel(Fold))
+    std::optional<int64_t> ImmVal = Fold.Def.getEffectiveImmVal();
+    if (!ImmVal)
+      return false;
+
+    if (tryFoldImmWithOpSel(Fold, *ImmVal))
       return true;
 
     // We can't represent the candidate as an inline constant. Try as a literal
     // with the original opsel, checking constant bus limitations.
-    MachineOperand New = MachineOperand::CreateImm(Fold.ImmToFold);
+    MachineOperand New = MachineOperand::CreateImm(*ImmVal);
     int OpNo = MI->getOperandNo(&Old);
     if (!TII->isOperandLegal(*MI, OpNo, &New))
       return false;
-    Old.ChangeToImmediate(Fold.ImmToFold);
+    Old.ChangeToImmediate(*ImmVal);
     return true;
   }
 
@@ -575,22 +677,34 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
       MI->setDesc(TII->get(NewMFMAOpc));
       MI->untieRegOperand(0);
     }
-    Old.ChangeToImmediate(Fold.ImmToFold);
+
+    std::optional<int64_t> ImmVal = Fold.Def.getEffectiveImmVal();
+    if (!ImmVal)
+      return false;
+
+    // TODO: Should we try to avoid adding this to the candidate list?
+    MachineOperand New = MachineOperand::CreateImm(*ImmVal);
+    int OpNo = MI->getOperandNo(&Old);
+    if (!TII->isOperandLegal(*MI, OpNo, &New))
+      return false;
+
+    Old.ChangeToImmediate(*ImmVal);
     return true;
   }
 
   if (Fold.isGlobal()) {
-    Old.ChangeToGA(Fold.OpToFold->getGlobal(), Fold.OpToFold->getOffset(),
-                   Fold.OpToFold->getTargetFlags());
+    Old.ChangeToGA(Fold.Def.OpToFold->getGlobal(),
+                   Fold.Def.OpToFold->getOffset(),
+                   Fold.Def.OpToFold->getTargetFlags());
     return true;
   }
 
   if (Fold.isFI()) {
-    Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+    Old.ChangeToFrameIndex(Fold.getFI());
     return true;
   }
 
-  MachineOperand *New = Fold.OpToFold;
+  MachineOperand *New = Fold.Def.OpToFold;
   // Rework once the VS_16 register class is updated to include proper
   // 16-bit SGPRs instead of 32-bit ones.
   if (Old.getSubReg() == AMDGPU::lo16 && TRI->isSGPRReg(*MRI, New->getReg()))
@@ -613,26 +727,19 @@ static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
 
 static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
                                 MachineInstr *MI, unsigned OpNo,
-                                MachineOperand *FoldOp, bool Commuted = false,
-                                int ShrinkOp = -1) {
-  appendFoldCandidate(FoldList,
-                      FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
-}
-
-static void appendFoldCandidate(SmallVectorImpl<FoldCandidate> &FoldList,
-                                MachineInstr *MI, unsigned OpNo, int64_t ImmVal,
+                                const FoldableDef &FoldOp,
                                 bool Commuted = false, int ShrinkOp = -1) {
   appendFoldCandidate(FoldList,
-                      FoldCandidate(MI, OpNo, ImmVal, Commuted, ShrinkOp));
+                      FoldCandidate(MI, OpNo, FoldOp, Commuted, ShrinkOp));
 }
 
 bool SIFoldOperandsImpl::tryAddToFoldList(
     SmallVectorImpl<FoldCandidate> &FoldList, MachineInstr *MI, unsigned OpNo,
-    MachineOperand *OpToFold) const {
+    const FoldableDef &OpToFold) const {
   const unsigned Opc = MI->getOpcode();
 
   auto tryToFoldAsFMAAKorMK = [&]() {
-    if (!OpToFold->isImm())
+    if (!OpToFold.isImm())
       return false;
 
     const bool TryAK = OpNo == 3;
@@ -665,8 +772,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
     return false;
   };
 
-  bool IsLegal = TII->isOperandLegal(*MI, OpNo, OpToFold);
-  if (!IsLegal && OpToFold->isImm()) {
+  bool IsLegal = OpToFold.isOperandLegal(*TII, *MI, OpNo);
+  if (!IsLegal && OpToFold.isImm()) {
     FoldCandidate Fold(MI, OpNo, OpToFold);
     IsLegal = canUseImmWithOpSel(Fold);
   }
@@ -700,7 +807,7 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
     }
 
     // Special case for s_setreg_b32
-    if (OpToFold->isImm()) {
+    if (OpToFold.isImm()) {
       unsigned ImmOpc = 0;
       if (Opc == AMDGPU::S_SETREG_B32)
         ImmOpc = AMDGPU::S_SETREG_IMM32_B32;
@@ -741,10 +848,10 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
       return false;
 
     int Op32 = -1;
-    if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
+    if (!OpToFold.isOperandLegal(*TII, *MI, CommuteOpNo)) {
       if ((Opc != AMDGPU::V_ADD_CO_U32_e64 && Opc != AMDGPU::V_SUB_CO_U32_e64 &&
            Opc != AMDGPU::V_SUBREV_CO_U32_e64) || // FIXME
-          (!OpToFold->isImm() && !OpToFold->isFI() && !OpToFold->isGlobal())) {
+          (!OpToFold.isImm() && !OpToFold.isFI() && !OpToFold.isGlobal())) {
         TII->commuteInstruction(*MI, false, OpNo, CommuteOpNo);
         return false;
       }
@@ -763,7 +870,8 @@ bool SIFoldOperandsImpl::tryAddToFoldList(
       Op32 = AMDGPU::getVOPe32(MaybeCommutedOpc);
     }
 
-    appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, true, Op32);
+    appendFoldCandidate(FoldList, MI, CommuteOpNo, OpToFold, /*Commuted=*/true,
+                        Op32);
     return true;
   }
 
@@ -938,7 +1046,7 @@ MachineOperand *SIFoldOperandsImpl::tryFoldRegSeqSplat(
 }
 
 bool SIFoldOperandsImpl::tryToFoldACImm(
-    MachineOperand &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
+    const FoldableDef &OpToFold, MachineInstr *UseMI, unsigned UseOpIdx,
     SmallVectorImpl<FoldCandidate> &FoldList) const {
   const MCInstrDesc &Desc = UseMI->getDesc();
   if (UseOpIdx >= Desc.getNumOperands())
@@ -949,27 +1057,9 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
     return false;
 
   MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
-  if (OpToFold.isImm()) {
-    if (unsigned UseSubReg = UseOp.getSubReg()) {
-      std::optional<int64_t> SubImm =
-          SIInstrInfo::extractSubregFromImm(OpToFold.getImm(), UseSubReg);
-      if (!SubImm)
-        return false;
-
-      // TODO: Avoid the temporary MachineOperand
-      MachineOperand TmpOp = MachineOperand::CreateImm(*SubImm);
-      if (TII->isOperandLegal(*UseMI, UseOpIdx, &TmpOp)) {
-        appendFoldCandidate(FoldList, UseMI, UseOpIdx, *SubImm);
-        return true;
-      }
-
-      return false;
-    }
-
-    if (TII->isOperandLegal(*UseMI, UseOpIdx, &OpToFold)) {
-      appendFoldCandidate(FoldList, UseMI, UseOpIdx, &OpToFold);
-      return true;
-    }
+  if (OpToFold.isImm() && OpToFold.isOperandLegal(*TII, *UseMI, UseOpIdx)) {
+    appendFoldCandidate(FoldList, UseMI, UseOpIdx, OpToFold);
+    return true;
   }
 
   // TODO: Verify the following code handles subregisters correctly.
@@ -985,11 +1075,17 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
     return false;
 
   // Maybe it is just a COPY of an immediate itself.
+
+  // FIXME: Remove this handling. There is already special case folding of
+  // immediate into copy in foldOperand. This is looking for the def of the
+  // value the folding started from in the first place.
   MachineInstr *Def = MRI->getVRegDef(UseReg);
   if (Def && TII->isFoldableCopy(*Def)) {
     MachineOperand &DefOp = Def->getOperand(1);
     if (DefOp.isImm() && TII->isOperandLegal(*UseMI, UseOpIdx, &DefOp)) {
-      appendFoldCandidate(FoldList, UseMI, UseOpIdx, &DefOp);
+      FoldableDef FoldableImm(DefOp.getImm(), OpToFold.DefRC,
+                              OpToFold.DefSubReg);
+      appendFoldCandidate(FoldList, UseMI, UseOpIdx, FoldableImm);
       return true;
     }
   }
@@ -998,7 +1094,7 @@ bool SIFoldOperandsImpl::tryToFoldACImm(
 }
 
 void SIFoldOperandsImpl::foldOperand(
-    MachineOperand &OpToFold, MachineInstr *UseMI, int UseOpIdx,
+    FoldableDef OpToFold, MachineInstr *UseMI, int UseOpIdx,
     SmallVectorImpl<FoldCandidate> &FoldList,
     SmallVectorImpl<MachineInstr *> &CopiesToReplace) const {
   const MachineOperand *UseOp = &UseMI->getOperand(UseOpIdx);
@@ -1038,18 +1134,20 @@ void SIFoldOperandsImpl::foldOperand(
       if (SplatVal) {
         if (MachineOperand *Foldable =
                 tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
-          appendFoldCandidate(FoldList, RSUseMI, OpNo, Foldable);
+          FoldableDef SplatDef(*Foldable, SplatRC);
+          appendFoldCandidate(FoldList, RSUseMI, OpNo, SplatDef);
           continue;
         }
       }
 
+      // TODO: Handle general compose
       if (RSUse->getSubReg() != RegSeqDstSubReg)
         continue;
 
-      if (tryToFoldACImm(UseMI->getOperand(0), RSUseMI, OpNo, FoldList))
-        continue;
-
-      foldOperand(OpToFold, RSUseMI, OpNo, FoldList, CopiesToReplace);
+      // FIXME: We should avoid recursing here. There should be a cleaner split
+      // between the in-place mutations and adding to the fold list.
+      foldOperand(OpToFold, RSUseMI, RSUseMI->getOperandNo(RSUse), FoldList,
+                  CopiesToReplace);
     }
 
     return;
@@ -1077,7 +1175,7 @@ void SIFoldOperandsImpl::foldOperand(
 
     // A frame index will resolve to a positive constant, so it should always be
     // safe to fold the addressing mode, even pre-GFX9.
-    UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getIndex());
+    UseMI->getOperand(UseOpIdx).ChangeToFrameIndex(OpToFold.getFI());
 
     const unsigned Opc = UseMI->getOpcode();
     if (TII->isFLATScratch(*UseMI) &&
@@ -1107,11 +1205,12 @@ void SIFoldOperandsImpl::foldOperand(
       return;
 
     const TargetRegisterClass *DestRC = TRI->getRegClassForReg(*MRI, DestReg);
-    if (!DestReg.isPhysical()) {
-      if (DestRC == &AMDGPU::AGPR_32RegClass &&
-          TII->isInlineConstant(OpToFold, AMDGPU::OPERAND_REG_INLINE_C_INT32)) {
+    if (!DestReg.isPhysical() && DestRC == &AMDGPU::AGPR_32RegClass) {
+      std::optional<int64_t> UseImmVal = OpToFold.getEffectiveImmVal();
+      if (UseImmVal && TII->isInlineConstant(
+  ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/140608