[llvm] [AMDGPU] Add option to prevent insns straddling half cache-line boundaries (PR #150239)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 23 08:49:09 PDT 2025
================
@@ -274,12 +274,72 @@ static void emitVGPRBlockComment(const MachineInstr *MI, const SIInstrInfo *TII,
OS.emitRawComment(" transferring at most " + TransferredRegs);
}
+extern cl::opt<bool> PreventHalfCacheLineStraddling;
+
+static unsigned getMCInstSizeInBytes(const MCInst &LoweredMCI,
+ const GCNSubtarget &STI,
+ MCContext &OutContext) {
+ SmallVector<MCFixup, 4> Fixups;
+ SmallVector<char, 16> CodeBytes;
+
+ std::unique_ptr<MCCodeEmitter> InstEmitter(
+ createAMDGPUMCCodeEmitter(*STI.getInstrInfo(), OutContext));
+ InstEmitter->encodeInstruction(LoweredMCI, CodeBytes, Fixups, STI);
+ return (CodeBytes.size());
+};
+
void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
// FIXME: Enable feature predicate checks once all the test pass.
// AMDGPU_MC::verifyInstructionPredicates(MI->getOpcode(),
// getSubtargetInfo().getFeatureBits());
+ auto AvoidHalfCacheLineBoundary = [this](const MachineInstr *MI,
+ const MachineFunction *MF,
+ const MCInst &LoweredMCI) -> void {
+ const GCNSubtarget &STI = MF->getSubtarget<GCNSubtarget>();
+ SIMachineFunctionInfo *MFI = const_cast<SIMachineFunctionInfo *>(
+ MF->getInfo<SIMachineFunctionInfo>());
+
+ unsigned InstSizeInBytes = STI.getInstrInfo()->getInstSizeInBytes(*MI);
+
+ // getInstrSizeInBytes convervatively overestimates the size of branches due
+ // to a NOP added for the 0x3f offset bug. Any inaccuracies in instruction
+ // sizes will cause problems when avoiding straddling half cache-line
+ // boundaries. A NOP is usually not added so remove the +4 that was added.
+ if (MI->isBranch() && STI.hasOffset3fBug())
+ InstSizeInBytes -= 4;
+ // Rarely, some MachineInstr do not have accurate instruction sizes. Try to
+ // calculate the size from the lowered MCInst.
+ else if (InstSizeInBytes == 0 && STI.isCPUStringValid(STI.getCPU()) &&
+ !(MI->getOpcode() == AMDGPU::SI_ILLEGAL_COPY ||
+ MI->getOpcode() == AMDGPU::ATOMIC_FENCE))
----------------
arsenm wrote:
Shouldn't require special casing
https://github.com/llvm/llvm-project/pull/150239
More information about the llvm-commits
mailing list