[llvm] [MCA] Use Bare Reference for InstrPostProcess (PR #160229)
Aiden Grossman via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 23 06:26:53 PDT 2025
https://github.com/boomanaiden154 updated https://github.com/llvm/llvm-project/pull/160229
>From fac775afb14c8f895d164e49b36fba86f6254510 Mon Sep 17 00:00:00 2001
From: Aiden Grossman <aidengrossman at google.com>
Date: Tue, 23 Sep 2025 04:17:43 +0000
Subject: [PATCH] [MCA] Use Bare Ference for InstrPostProcess
This patch makes it so that InstrPostProcess::postProcessInstruction
takes in a reference to a mca::Instruction rather than a reference to a
std::unique_ptr. Without this, InstrPostProcess cannot be used with MCA
instruction recycling because it needs to be called on both newly
created instructions and instructions that have been recycled. We only
have access to a raw pointer for instructions that have been recycled
rather than a reference to the std::unique_ptr that owns them.
This patch adds a call in the existing instruction recycling unit test
to ensure the API remains compatible with this use case.
---
llvm/include/llvm/MCA/CustomBehaviour.h | 3 +--
.../AMDGPU/MCA/AMDGPUCustomBehaviour.cpp | 8 +++----
.../Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h | 5 ++--
.../lib/Target/X86/MCA/X86CustomBehaviour.cpp | 24 +++++++++----------
llvm/lib/Target/X86/MCA/X86CustomBehaviour.h | 7 +++---
llvm/tools/llvm-mca/llvm-mca.cpp | 2 +-
.../tools/llvm-mca/X86/TestIncrementalMCA.cpp | 6 +++++
7 files changed, 28 insertions(+), 27 deletions(-)
diff --git a/llvm/include/llvm/MCA/CustomBehaviour.h b/llvm/include/llvm/MCA/CustomBehaviour.h
index 0ce3993be95ba..8ad674c4ecf13 100644
--- a/llvm/include/llvm/MCA/CustomBehaviour.h
+++ b/llvm/include/llvm/MCA/CustomBehaviour.h
@@ -49,8 +49,7 @@ class InstrPostProcess {
/// object after it has been lowered from the MCInst.
/// This is generally a less disruptive alternative to modifying the
/// scheduling model.
- virtual void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
- const MCInst &MCI) {}
+ virtual void postProcessInstruction(Instruction &Inst, const MCInst &MCI) {}
// The resetState() method gets invoked at the beginning of each code region
// so that targets that override this function can clear any state that they
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
index b8f43c4550b7e..afaa19013bfc2 100644
--- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.cpp
@@ -21,8 +21,8 @@
namespace llvm::mca {
-void AMDGPUInstrPostProcess::postProcessInstruction(
- std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
+void AMDGPUInstrPostProcess::postProcessInstruction(Instruction &Inst,
+ const MCInst &MCI) {
switch (MCI.getOpcode()) {
case AMDGPU::S_WAITCNT:
case AMDGPU::S_WAITCNT_soft:
@@ -44,7 +44,7 @@ void AMDGPUInstrPostProcess::postProcessInstruction(
// s_waitcnt instructions encode important information as immediate operands
// which are lost during the MCInst -> mca::Instruction lowering.
-void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
+void AMDGPUInstrPostProcess::processWaitCnt(Instruction &Inst,
const MCInst &MCI) {
for (int Idx = 0, N = MCI.size(); Idx < N; Idx++) {
MCAOperand Op;
@@ -55,7 +55,7 @@ void AMDGPUInstrPostProcess::processWaitCnt(std::unique_ptr<Instruction> &Inst,
Op = MCAOperand::createImm(MCOp.getImm());
}
Op.setIndex(Idx);
- Inst->addOperand(Op);
+ Inst.addOperand(Op);
}
}
diff --git a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
index 85b9c188b5d1a..cbc7427ce6cdf 100644
--- a/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
+++ b/llvm/lib/Target/AMDGPU/MCA/AMDGPUCustomBehaviour.h
@@ -26,7 +26,7 @@ namespace llvm {
namespace mca {
class AMDGPUInstrPostProcess : public InstrPostProcess {
- void processWaitCnt(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
+ void processWaitCnt(Instruction &Inst, const MCInst &MCI);
public:
AMDGPUInstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
@@ -34,8 +34,7 @@ class AMDGPUInstrPostProcess : public InstrPostProcess {
~AMDGPUInstrPostProcess() = default;
- void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
- const MCInst &MCI) override;
+ void postProcessInstruction(Instruction &Inst, const MCInst &MCI) override;
};
struct WaitCntInfo {
diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp
index e2a1bbf383b3c..a69a781bf070b 100644
--- a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp
+++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp
@@ -20,24 +20,22 @@
namespace llvm {
namespace mca {
-void X86InstrPostProcess::setMemBarriers(std::unique_ptr<Instruction> &Inst,
- const MCInst &MCI) {
+void X86InstrPostProcess::setMemBarriers(Instruction &Inst, const MCInst &MCI) {
switch (MCI.getOpcode()) {
case X86::MFENCE:
- Inst->setLoadBarrier(true);
- Inst->setStoreBarrier(true);
+ Inst.setLoadBarrier(true);
+ Inst.setStoreBarrier(true);
break;
case X86::LFENCE:
- Inst->setLoadBarrier(true);
+ Inst.setLoadBarrier(true);
break;
case X86::SFENCE:
- Inst->setStoreBarrier(true);
+ Inst.setStoreBarrier(true);
break;
}
}
-void X86InstrPostProcess::useStackEngine(std::unique_ptr<Instruction> &Inst,
- const MCInst &MCI) {
+void X86InstrPostProcess::useStackEngine(Instruction &Inst, const MCInst &MCI) {
// TODO(boomanaiden154): We currently do not handle PUSHF/POPF because we
// have not done the necessary benchmarking to see if they are also
// optimized by the stack engine.
@@ -46,18 +44,18 @@ void X86InstrPostProcess::useStackEngine(std::unique_ptr<Instruction> &Inst,
// delay subsequent rsp using non-stack instructions.
if (X86::isPOP(MCI.getOpcode()) || X86::isPUSH(MCI.getOpcode())) {
auto *StackRegisterDef =
- llvm::find_if(Inst->getDefs(), [](const WriteState &State) {
+ llvm::find_if(Inst.getDefs(), [](const WriteState &State) {
return State.getRegisterID() == X86::RSP;
});
assert(
- StackRegisterDef != Inst->getDefs().end() &&
+ StackRegisterDef != Inst.getDefs().end() &&
"Expected push instruction to implicitly use stack pointer register.");
- Inst->getDefs().erase(StackRegisterDef);
+ Inst.getDefs().erase(StackRegisterDef);
}
}
-void X86InstrPostProcess::postProcessInstruction(
- std::unique_ptr<Instruction> &Inst, const MCInst &MCI) {
+void X86InstrPostProcess::postProcessInstruction(Instruction &Inst,
+ const MCInst &MCI) {
// Set IsALoadBarrier and IsAStoreBarrier flags.
setMemBarriers(Inst, MCI);
useStackEngine(Inst, MCI);
diff --git a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
index c5459e42dfc9f..d6197f3344bbb 100644
--- a/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
+++ b/llvm/lib/Target/X86/MCA/X86CustomBehaviour.h
@@ -26,12 +26,12 @@ namespace mca {
class X86InstrPostProcess : public InstrPostProcess {
/// Called within X86InstrPostProcess to specify certain instructions
/// as load and store barriers.
- void setMemBarriers(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
+ void setMemBarriers(Instruction &Inst, const MCInst &MCI);
/// Called within X86InstrPostPorcess to remove some rsp read operands
/// on stack instructions to better simulate the stack engine. We currently
/// do not model features of the stack engine like sync uops.
- void useStackEngine(std::unique_ptr<Instruction> &Inst, const MCInst &MCI);
+ void useStackEngine(Instruction &Inst, const MCInst &MCI);
public:
X86InstrPostProcess(const MCSubtargetInfo &STI, const MCInstrInfo &MCII)
@@ -39,8 +39,7 @@ class X86InstrPostProcess : public InstrPostProcess {
~X86InstrPostProcess() = default;
- void postProcessInstruction(std::unique_ptr<Instruction> &Inst,
- const MCInst &MCI) override;
+ void postProcessInstruction(Instruction &Inst, const MCInst &MCI) override;
};
} // namespace mca
diff --git a/llvm/tools/llvm-mca/llvm-mca.cpp b/llvm/tools/llvm-mca/llvm-mca.cpp
index a4194da4a7b63..a64539c09b81e 100644
--- a/llvm/tools/llvm-mca/llvm-mca.cpp
+++ b/llvm/tools/llvm-mca/llvm-mca.cpp
@@ -668,7 +668,7 @@ int main(int argc, char **argv) {
return 1;
}
- IPP->postProcessInstruction(Inst.get(), MCI);
+ IPP->postProcessInstruction(*Inst.get(), MCI);
InstToInstruments.insert({&MCI, Instruments});
LoweredSequence.emplace_back(std::move(Inst.get()));
}
diff --git a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp
index 17809e7beda95..1b0073e026bae 100644
--- a/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp
+++ b/llvm/unittests/tools/llvm-mca/X86/TestIncrementalMCA.cpp
@@ -130,6 +130,10 @@ TEST_F(X86TestBase, TestInstructionRecycling) {
mca::InstrBuilder IB(*STI, *MCII, *MRI, MCIA.get(), *IM, /*CallLatency=*/100);
IB.setInstRecycleCallback(GetRecycledInst);
+ // Setup a generic IPP that does not do anything (as it is not target
+ // specific) for testing purposes.
+ auto IPP = std::make_unique<InstrPostProcess>(*STI, *MCII);
+
const SmallVector<mca::Instrument *> Instruments;
// Tile size = 7
for (unsigned i = 0U, E = MCIs.size(); i < E;) {
@@ -147,8 +151,10 @@ TEST_F(X86TestBase, TestInstructionRecycling) {
});
ASSERT_FALSE(bool(RemainingE));
ASSERT_TRUE(RecycledInst);
+ IPP->postProcessInstruction(*RecycledInst, MCIs[i]);
ISM.addRecycledInst(RecycledInst);
} else {
+ IPP->postProcessInstruction(*InstOrErr.get(), MCIs[i]);
ISM.addInst(std::move(InstOrErr.get()));
}
}
More information about the llvm-commits
mailing list