[llvm-branch-commits] [llvm] AMDGPU: Fix tracking subreg defs when folding through reg_sequence (PR #140608)
Jay Foad via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed May 28 02:11:54 PDT 2025
================
@@ -25,52 +25,151 @@ using namespace llvm;
namespace {
-struct FoldCandidate {
- MachineInstr *UseMI;
+/// Track a value we may want to fold into downstream users, applying
+/// subregister extracts along the way.
+struct FoldableDef {
union {
- MachineOperand *OpToFold;
+ MachineOperand *OpToFold = nullptr;
uint64_t ImmToFold;
int FrameIndexToFold;
};
- int ShrinkOpcode;
- unsigned UseOpNo;
+
+ /// Register class of the originally defined value.
+ const TargetRegisterClass *DefRC = nullptr;
+
+ /// Track the original defining instruction for the value.
+ const MachineInstr *DefMI = nullptr;
+
+ /// Subregister to apply to the value at the use point.
+ unsigned DefSubReg = AMDGPU::NoSubRegister;
+
+ /// Kind of value stored in the union.
MachineOperand::MachineOperandType Kind;
- bool Commuted;
- FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
- bool Commuted_ = false,
- int ShrinkOp = -1) :
- UseMI(MI), OpToFold(nullptr), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
- Kind(FoldOp->getType()),
- Commuted(Commuted_) {
- if (FoldOp->isImm()) {
- ImmToFold = FoldOp->getImm();
- } else if (FoldOp->isFI()) {
- FrameIndexToFold = FoldOp->getIndex();
+ FoldableDef() = delete;
+ FoldableDef(MachineOperand &FoldOp, const TargetRegisterClass *DefRC,
+ unsigned DefSubReg = AMDGPU::NoSubRegister)
+ : DefRC(DefRC), DefSubReg(DefSubReg), Kind(FoldOp.getType()) {
+
+ if (FoldOp.isImm()) {
+ ImmToFold = FoldOp.getImm();
+ } else if (FoldOp.isFI()) {
+ FrameIndexToFold = FoldOp.getIndex();
} else {
- assert(FoldOp->isReg() || FoldOp->isGlobal());
- OpToFold = FoldOp;
+ assert(FoldOp.isReg() || FoldOp.isGlobal());
+ OpToFold = &FoldOp;
}
+
+ DefMI = FoldOp.getParent();
}
- FoldCandidate(MachineInstr *MI, unsigned OpNo, int64_t FoldImm,
- bool Commuted_ = false, int ShrinkOp = -1)
- : UseMI(MI), ImmToFold(FoldImm), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
- Kind(MachineOperand::MO_Immediate), Commuted(Commuted_) {}
+ FoldableDef(int64_t FoldImm, const TargetRegisterClass *DefRC,
+ unsigned DefSubReg = AMDGPU::NoSubRegister)
+ : ImmToFold(FoldImm), DefRC(DefRC), DefSubReg(DefSubReg),
+ Kind(MachineOperand::MO_Immediate) {}
+
+ /// Copy the current def and apply \p SubReg to the value.
+ FoldableDef getWithSubReg(const SIRegisterInfo &TRI, unsigned SubReg) const {
+ FoldableDef Copy(*this);
+ Copy.DefSubReg = TRI.composeSubRegIndices(DefSubReg, SubReg);
+ return Copy;
+ }
+
+ bool isReg() const { return Kind == MachineOperand::MO_Register; }
+
+ Register getReg() const {
+ assert(isReg());
+ return OpToFold->getReg();
+ }
+
+ unsigned getSubReg() const {
+ assert(isReg());
+ return OpToFold->getSubReg();
+ }
+
+ bool isImm() const { return Kind == MachineOperand::MO_Immediate; }
bool isFI() const {
return Kind == MachineOperand::MO_FrameIndex;
}
- bool isImm() const {
- return Kind == MachineOperand::MO_Immediate;
+ int getFI() const {
+ assert(isFI());
+ return FrameIndexToFold;
}
- bool isReg() const {
- return Kind == MachineOperand::MO_Register;
+ bool isGlobal() const { return OpToFold->isGlobal(); }
+
+ /// Return the effective immediate value defined by this instruction, after
+ /// application of any subregister extracts which may exist between the use
+ /// and def instruction.
+ std::optional<int64_t> getEffectiveImmVal() const {
+ assert(isImm());
+ return SIInstrInfo::extractSubregFromImm(ImmToFold, DefSubReg);
}
- bool isGlobal() const { return Kind == MachineOperand::MO_GlobalAddress; }
+ /// Check if it is legal to fold this effective value into \p MI's \p OpNo
+ /// operand.
+ bool isOperandLegal(const SIInstrInfo &TII, const MachineInstr &MI,
+ unsigned OpIdx) const {
+ switch (Kind) {
+ case MachineOperand::MO_Immediate: {
+ std::optional<int64_t> ImmToFold = getEffectiveImmVal();
+ if (!ImmToFold)
+ return false;
+
+ // TODO: Should verify the subregister index is supported by the class
+ // TODO: Avoid the temporary MachineOperand
+ MachineOperand TmpOp = MachineOperand::CreateImm(*ImmToFold);
+ return TII.isOperandLegal(MI, OpIdx, &TmpOp);
+ }
+ case MachineOperand::MO_FrameIndex: {
+ if (DefSubReg != AMDGPU::NoSubRegister)
+ return false;
+ MachineOperand TmpOp = MachineOperand::CreateFI(FrameIndexToFold);
+ return TII.isOperandLegal(MI, OpIdx, &TmpOp);
+ }
+ default:
+ // TODO: Try to apply DefSubReg, for global address we can extract
+ // low/high.
+ if (DefSubReg != AMDGPU::NoSubRegister)
+ return false;
+ return TII.isOperandLegal(MI, OpIdx, OpToFold);
+ }
+
+ llvm_unreachable("covered MachineOperand kind switch");
+ }
+};
+
+struct FoldCandidate {
+ MachineInstr *UseMI;
+ FoldableDef Def;
+ int ShrinkOpcode;
+ unsigned UseOpNo;
+ bool Commuted;
+
+ FoldCandidate(MachineInstr *MI, unsigned OpNo, FoldableDef Def,
+ bool Commuted_ = false, int ShrinkOp = -1)
+ : UseMI(MI), Def(Def), ShrinkOpcode(ShrinkOp), UseOpNo(OpNo),
+ Commuted(Commuted_) {}
+
+ bool isFI() const { return Def.Kind == MachineOperand::MO_FrameIndex; }
+
+ int getFI() const {
+ assert(isFI());
+ return Def.FrameIndexToFold;
+ }
+
+ bool isImm() const { return Def.isImm(); }
+
+ bool isReg() const { return Def.isReg(); }
+
+ Register getReg() const {
+ assert(isReg());
+ return Def.OpToFold->getReg();
+ }
----------------
jayfoad wrote:
```suggestion
Register getReg() const { return Def.getReg(); }
```
https://github.com/llvm/llvm-project/pull/140608
More information about the llvm-branch-commits
mailing list