[llvm] [llvm][ARM] Add a cortex-m4f alignment hazard recognizer (PR #126991)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 18 11:18:31 PST 2025
================
@@ -266,3 +272,183 @@ void ARMBankConflictHazardRecognizer::EmitInstruction(SUnit *SU) {
void ARMBankConflictHazardRecognizer::AdvanceCycle() { Accesses.clear(); }
void ARMBankConflictHazardRecognizer::RecedeCycle() { Accesses.clear(); }
+
+#define DEBUG_TYPE "cortex-m4-alignment-hazard-rec"
+
+STATISTIC(NumNoops, "Number of noops inserted");
+
+void ARMCortexM4FAlignmentHazardRecognizer::Reset() { Offset = 0; }
+
+ARMCortexM4FAlignmentHazardRecognizer::ARMCortexM4FAlignmentHazardRecognizer(
+ const MCSubtargetInfo &STI, LookaheadCallback CB)
+ : STI(STI), MBB(nullptr), MF(nullptr), Offset(0), Advanced(false),
+ EmittingNoop(false), GetLookahead(CB) {
+ MaxLookAhead = 1;
+}
+
+void ARMCortexM4FAlignmentHazardRecognizer::EmitInstruction(SUnit *SU) {
+ if (!SU->isInstr())
+ return;
+
+ MachineInstr *MI = SU->getInstr();
+ assert(MI);
+ return EmitInstruction(MI);
+}
+
+void ARMCortexM4FAlignmentHazardRecognizer::EmitInstruction(MachineInstr *MI) {
+ if (MI->isDebugInstr())
+ return;
+
+ unsigned Size = MI->getDesc().getSize();
+ Offset += Size;
+
+ // If the previous instruction had a hazard, then we're inserting a nop. Mark
+ // it with an AsmPrinter comment.
+ if (EmittingNoop)
+ if (MachineInstr *Prev = MI->getPrevNode())
+ Prev->setAsmPrinterFlag(ARM::ALIGNMENT_HAZARD);
+
+ EmittingNoop = false;
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMCortexM4FAlignmentHazardRecognizer::getHazardType(SUnit *SU,
+ int /*Ignored*/) {
+ if (!SU->isInstr())
+ return HazardType::NoHazard;
+
+ MachineInstr *MI = SU->getInstr();
+ assert(MI);
+ return getHazardTypeAssumingOffset(MI, Offset);
+}
+
+ScheduleHazardRecognizer::HazardType
+ARMCortexM4FAlignmentHazardRecognizer::getHazardTypeAssumingOffset(
+ MachineInstr *MI, size_t AssumedOffset) {
+ if (Advanced) {
+ Advanced = false;
+ return HazardType::NoHazard;
+ }
+
+ if (AssumedOffset % 4 == 0)
+ return HazardType::NoHazard;
+
+ const MCSchedModel &SCModel = STI.getSchedModel();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const ARMBaseInstrInfo &TII =
+ *static_cast<const ARMBaseInstrInfo *>(MF->getSubtarget().getInstrInfo());
+ int Latency = SCModel.computeInstrLatency<MCSubtargetInfo, MCInstrInfo,
+ InstrItineraryData, MachineInstr>(
+ STI, TII, *MI);
+ if (!Latency)
+ return HazardType::NoHazard;
+
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+
+ // https://developer.arm.com/documentation/ka006138/latest
+ //
+ // "A long sequence of T32 single-cycle floating-point instructions aligned on
+ // odd halfword boundaries will experience a performance drop. Specifically,
+ // one stall cycle is inserted for every three instructions executed."
+ bool SingleCycleFP =
+ Latency == 1 && (Domain & (ARMII::DomainNEON | ARMII::DomainVFP));
+ if (SingleCycleFP)
+ return HazardType::NoopHazard;
+
+ // https://documentation-service.arm.com/static/5fce431be167456a35b36ade
+ //
+ // "Neighboring load and store single instructions can pipeline their address
+ // and data phases but in some cases, such as 32- bit opcodes aligned on odd
+ // halfword boundaries, they might not pipeline optimally."
+ if (MCID.getSize() == 4 && (MI->mayLoad() || MI->mayStore()))
+ return HazardType::NoopHazard;
+
+ return HazardType::NoHazard;
+}
+
+void ARMCortexM4FAlignmentHazardRecognizer::AdvanceCycle() { Advanced = true; }
+void ARMCortexM4FAlignmentHazardRecognizer::RecedeCycle() {}
+
+void ARMCortexM4FAlignmentHazardRecognizer::EmitNoop() { Offset += 2; }
+
+unsigned ARMCortexM4FAlignmentHazardRecognizer::PreEmitNoops(SUnit *SU) {
+ if (!SU->isInstr())
+ return 0;
+
+ MachineInstr *MI = SU->getInstr();
+ assert(MI);
+ return PreEmitNoops(MI);
+}
+
+unsigned ARMCortexM4FAlignmentHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
+ const MachineBasicBlock *Parent = MI->getParent();
+ const Function &F = Parent->getParent()->getFunction();
+ if (Parent != MBB) {
+ Offset = 0;
+ MBB = Parent;
+ }
+
+ int MaxLookaheadInsts;
+ int RequiredHazardCount;
+ const MachineLoop *Loop = getLoopFor(MI);
+ GetLookahead(F, Loop, MaxLookaheadInsts, RequiredHazardCount);
+ if (MaxLookaheadInsts < 0)
+ return 0;
+
+ LLVM_DEBUG(MI->dump());
+
+ // Since one stall cycle is inserted for every three such instructions aligned
+ // to an odd-halfword boundary, look for runs of 6 or more, at which point it
+ // becomes profitable to insert the nop:
+ //
+ // Insts | Bytes | Improvement
+ // -------+-------+--------------
+ // 1 T2 | 2 | -1 cycle (slower)
+ // 2 T2 | 2 | -1 cycle
+ // 3 T2 | 2 | 0 cycle (breakeven)
+ // 4 T2 | 2 | 0 cycle
+ // 5 T2 | 2 | 0 cycle
+ // 6 T2 | 2 | 1 cycle (faster)
+ //
+ MachineBasicBlock::iterator Next = MI->getIterator();
+ MachineBasicBlock::const_iterator End = Parent->end();
+ int LookaheadInsts = 0;
+ int HazardCount = 0;
+ size_t LookaheadOffset = Offset;
+ while (Next != End && LookaheadInsts < MaxLookaheadInsts &&
+ (HazardType::NoopHazard ==
+ getHazardTypeAssumingOffset(&*Next, LookaheadOffset))) {
+ LookaheadOffset += Next->getDesc().getSize();
+ Next++;
+ HazardCount++;
+ LookaheadInsts++;
+ }
+
+ if (RequiredHazardCount <= HazardCount) {
+ EmittingNoop = true;
+ NumNoops++;
+ LLVM_DEBUG(dbgs() << "\toffset=0x" << utohexstr(Offset)
+ << "\n\thas an alignment hazard, and requires a noop\n");
+ return 1;
+ }
+
+ LLVM_DEBUG(dbgs() << formatv("\toffset=0x{0}\n\tfound only {1} hazards in "
+ "the next {2} instructions\n",
+ utohexstr(Offset), HazardCount, LookaheadInsts));
+
+ return 0;
+}
+
+const MachineLoop *
+ARMCortexM4FAlignmentHazardRecognizer::getLoopFor(MachineInstr *MI) {
+ // Calculate and cache the MachineLoopInfo.
+ MachineFunction *ParentMF = MI->getParent()->getParent();
+ if (MF != ParentMF) {
+ MF = ParentMF;
+ MDT = MachineDominatorTree(*MF);
+ MLI.~MachineLoopInfo();
+ new (&MLI) MachineLoopInfo(MDT);
----------------
davemgreen wrote:
Can we avoid the placement new, possibly just use a smart pointer instead?
https://github.com/llvm/llvm-project/pull/126991
More information about the llvm-commits
mailing list