[llvm] [AArch64] Add getStreamingHazardSize() to AArch64Subtarget (PR #113679)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 25 05:12:25 PDT 2024
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/113679
This is defined by the `-aarch64-streaming-hazard-size` option or its alias `-aarch64-stack-hazard-size` (the original name). It has been renamed to be more general as this option will (for the time being) be used to detect if the current target has streaming mode memory hazards.
>From 1e80710e4651bd373d5fff1c73f20b694e6520f0 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Wed, 23 Oct 2024 14:27:11 +0000
Subject: [PATCH] [AArch64] Add getStreamingHazardSize() to AArch64Subtarget
This is defined by the -aarch64-streaming-hazard-size option or its
alias -aarch64-stack-hazard-size (the original name). It has been
renamed to be more general as this option will (for the time being) be
used to detect if the current target has streaming mode memory hazards.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 30 +++++++++++--------
llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 2 ++
llvm/lib/Target/AArch64/AArch64Subtarget.h | 7 ++++-
.../Target/AArch64/AArch64TargetMachine.cpp | 13 +++++++-
4 files changed, 38 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bbf2f267795457..59faace5bd16cf 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -126,14 +126,15 @@
// and the SME unit try to access the same area of memory, including if the
// access is to an area of the stack. To try to alleviate this we attempt to
// introduce extra padding into the stack frame between FP and GPR accesses,
-// controlled by the StackHazardSize option. Without changing the layout of the
-// stack frame in the diagram above, a stack object of size StackHazardSize is
-// added between GPR and FPR CSRs. Another is added to the stack objects
-// section, and stack objects are sorted so that FPR > Hazard padding slot >
-// GPRs (where possible). Unfortunately some things are not handled well (VLA
-// area, arguments on the stack, object with both GPR and FPR accesses), but if
-// those are controlled by the user then the entire stack frame becomes GPR at
-// the start/end with FPR in the middle, surrounded by Hazard padding.
+// controlled by the aarch64-stack-hazard-size option. Without changing the
+// layout of the stack frame in the diagram above, a stack object of size
+// aarch64-stack-hazard-size is added between GPR and FPR CSRs. Another is added
+// to the stack objects section, and stack objects are sorted so that FPR >
+// Hazard padding slot > GPRs (where possible). Unfortunately some things are
+// not handled well (VLA area, arguments on the stack, object with both GPR and
+// FPR accesses), but if those are controlled by the user then the entire stack
+// frame becomes GPR at the start/end with FPR in the middle, surrounded by
+// Hazard padding.
//
// An example of the prologue:
//
@@ -273,9 +274,6 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
cl::desc("Emit homogeneous prologue and epilogue for the size "
"optimization (default = off)"));
-// Stack hazard padding size. 0 = disabled.
-static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
- cl::init(0), cl::Hidden);
// Stack hazard size for analysis remarks. StackHazardSize takes precedence.
static cl::opt<unsigned>
StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
@@ -1614,6 +1612,10 @@ static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
+static unsigned getStackHazardSize(const MachineFunction &MF) {
+ return MF.getSubtarget<AArch64Subtarget>().getStreamingHazardSize();
+}
+
// Convenience function to determine whether I is an SVE callee save.
static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
switch (I->getOpcode()) {
@@ -2985,6 +2987,7 @@ static void computeCalleeSaveRegisterPairs(
bool IsWindows = isTargetWindows(MF);
bool NeedsWinCFI = needsWinCFI(MF);
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ unsigned StackHazardSize = getStackHazardSize(MF);
MachineFrameInfo &MFI = MF.getFrameInfo();
CallingConv::ID CC = MF.getFunction().getCallingConv();
unsigned Count = CSI.size();
@@ -3612,6 +3615,7 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
// which can be used to determine if any hazard padding is needed.
void AArch64FrameLowering::determineStackHazardSlot(
MachineFunction &MF, BitVector &SavedRegs) const {
+ unsigned StackHazardSize = getStackHazardSize(MF);
if (StackHazardSize == 0 || StackHazardSize % 16 != 0 ||
MF.getInfo<AArch64FunctionInfo>()->hasStackHazardSlotIndex())
return;
@@ -3802,7 +3806,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// StackHazardSize if so.
determineStackHazardSlot(MF, SavedRegs);
if (AFI->hasStackHazardSlotIndex())
- CSStackSize += StackHazardSize;
+ CSStackSize += getStackHazardSize(MF);
// Save number of saved regs, so we can easily update CSStackSize later.
unsigned NumSavedRegs = SavedRegs.count();
@@ -3917,6 +3921,7 @@ bool AArch64FrameLowering::assignCalleeSavedSpillSlots(
std::vector<CalleeSavedInfo> &CSI, unsigned &MinCSFrameIndex,
unsigned &MaxCSFrameIndex) const {
bool NeedsWinCFI = needsWinCFI(MF);
+ unsigned StackHazardSize = getStackHazardSize(MF);
// To match the canonical windows frame layout, reverse the list of
// callee saved registers to get them laid out by PrologEpilogInserter
// in the right order. (PrologEpilogInserter allocates stack objects top
@@ -5151,6 +5156,7 @@ void AArch64FrameLowering::emitRemarks(
if (Attrs.hasNonStreamingInterfaceAndBody())
return;
+ unsigned StackHazardSize = getStackHazardSize(MF);
const uint64_t HazardSize =
(StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 32db1e8c2477a8..70f85a3bbce1cf 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -326,6 +326,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
unsigned MinSVEVectorSizeInBitsOverride,
unsigned MaxSVEVectorSizeInBitsOverride,
bool IsStreaming, bool IsStreamingCompatible,
+ unsigned StreamingHazardSize,
bool HasMinSize)
: AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
@@ -333,6 +334,7 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian), IsStreaming(IsStreaming),
IsStreamingCompatible(IsStreamingCompatible),
+ StreamingHazardSize(StreamingHazardSize),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 9856415361e50d..a150c007f4abd2 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -84,6 +84,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool IsStreaming;
bool IsStreamingCompatible;
+ unsigned StreamingHazardSize;
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
unsigned VScaleForTuning = 2;
@@ -124,7 +125,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0,
bool IsStreaming = false, bool IsStreamingCompatible = false,
- bool HasMinSize = false);
+ unsigned StreamingHazardSize = 0, bool HasMinSize = false);
// Getters for SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
@@ -172,6 +173,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
/// Returns true if the function has a streaming-compatible body.
bool isStreamingCompatible() const { return IsStreamingCompatible; }
+ /// Returns the size of memory region that if accessed by both the CPU and
+ /// the SME unit could result in a hazard. 0 = disabled.
+ unsigned getStreamingHazardSize() const { return StreamingHazardSize; }
+
/// Returns true if the target has NEON and the function at runtime is known
/// to have NEON enabled (e.g. the function is known not to be in streaming-SVE
/// mode, which disables NEON instructions).
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index c7bd0390b65620..b2e30225b8a099 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -202,6 +202,16 @@ static cl::opt<bool> ForceStreamingCompatible(
cl::desc("Force the use of streaming-compatible code for all functions"),
cl::init(false), cl::Hidden);
+static cl::opt<unsigned> StreamingHazardSize(
+ "aarch64-streaming-hazard-size",
+ cl::desc("Hazard size for streaming mode memory accesses. 0 = disabled."),
+ cl::init(0), cl::Hidden);
+
+static cl::alias StreamingStackHazardSize(
+ "aarch64-stack-hazard-size",
+ cl::desc("alias for -aarch64-streaming-hazard-size"),
+ cl::aliasopt(StreamingHazardSize));
+
extern cl::opt<bool> EnableHomogeneousPrologEpilog;
static cl::opt<bool> EnableGISelLoadStoreOptPreLegal(
@@ -472,7 +482,8 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
resetTargetOptions(F);
I = std::make_unique<AArch64Subtarget>(
TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
- MaxSVEVectorSize, IsStreaming, IsStreamingCompatible, HasMinSize);
+ MaxSVEVectorSize, IsStreaming, IsStreamingCompatible,
+ StreamingHazardSize, HasMinSize);
}
assert((!IsStreaming || I->hasSME()) && "Expected SME to be available");
More information about the llvm-commits
mailing list