[llvm-branch-commits] [llvm] release/19.x: [AArch64] Add streaming-mode stack hazard optimization remarks (#101695) (PR #102168)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 6 08:47:32 PDT 2024
https://github.com/llvmbot created https://github.com/llvm/llvm-project/pull/102168
Backport a98a0dc
Requested by: @hazzlim
>From e572643c71072b6038298b25b09a6e9cab71f9b3 Mon Sep 17 00:00:00 2001
From: Hari Limaye <hari.limaye at arm.com>
Date: Tue, 6 Aug 2024 11:39:01 +0100
Subject: [PATCH] [AArch64] Add streaming-mode stack hazard optimization
remarks (#101695)
Emit an optimization remark when objects in the stack frame may cause
hazards in a streaming mode function. The analysis requires either the
`aarch64-stack-hazard-size` or `aarch64-stack-hazard-remark-size` flag
to be set by the user, with the former flag taking precedence.
(cherry picked from commit a98a0dcf63f54c54c5601a34c9f8c10cde0162d6)
---
.../llvm/CodeGen/TargetFrameLowering.h | 6 +
llvm/lib/CodeGen/PrologEpilogInserter.cpp | 3 +
.../Target/AArch64/AArch64FrameLowering.cpp | 204 +++++++++++++++++-
.../lib/Target/AArch64/AArch64FrameLowering.h | 6 +-
.../AArch64/ssve-stack-hazard-remarks.ll | 152 +++++++++++++
.../CodeGen/AArch64/sve-stack-frame-layout.ll | 4 +-
6 files changed, 364 insertions(+), 11 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 0656c0d739fdf..d8c9d0a432ad8 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -15,6 +15,7 @@
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/Support/TypeSize.h"
#include <vector>
@@ -473,6 +474,11 @@ class TargetFrameLowering {
/// Return the frame base information to be encoded in the DWARF subprogram
/// debug info.
virtual DwarfFrameBase getDwarfFrameBase(const MachineFunction &MF) const;
+
+ /// This method is called at the end of prolog/epilog code insertion, so
+ /// targets can emit remarks based on the final frame layout.
+ virtual void emitRemarks(const MachineFunction &MF,
+ MachineOptimizationRemarkEmitter *ORE) const {};
};
} // End llvm namespace
diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index cd5d877e53d82..f4490873cfdcd 100644
--- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -341,6 +341,9 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
<< ore::NV("Function", MF.getFunction().getName()) << "'";
});
+ // Emit any remarks implemented for the target, based on final frame layout.
+ TFI->emitRemarks(MF, ORE);
+
delete RS;
SaveBlocks.clear();
RestoreBlocks.clear();
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bd530903bb664..ba46ededc63a8 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -240,6 +240,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
// Stack hazard padding size. 0 = disabled.
static cl::opt<unsigned> StackHazardSize("aarch64-stack-hazard-size",
cl::init(0), cl::Hidden);
+// Stack hazard size for analysis remarks. StackHazardSize takes precedence.
+static cl::opt<unsigned>
+ StackHazardRemarkSize("aarch64-stack-hazard-remark-size", cl::init(0),
+ cl::Hidden);
// Whether to insert padding into non-streaming functions (for testing).
static cl::opt<bool>
StackHazardInNonStreaming("aarch64-stack-hazard-in-non-streaming",
@@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
const auto &MFI = MF.getFrameInfo();
int64_t ObjectOffset = MFI.getObjectOffset(FI);
+ StackOffset SVEStackSize = getSVEStackSize(MF);
+
+ // For VLA-area objects, just emit an offset at the end of the stack frame.
+ // Whilst not quite correct, these objects do live at the end of the frame and
+ // so it is more useful for analysis for the offset to reflect this.
+ if (MFI.isVariableSizedObjectIndex(FI)) {
+ return StackOffset::getFixed(-((int64_t)MFI.getStackSize())) - SVEStackSize;
+ }
// This is correct in the absence of any SVE stack objects.
- StackOffset SVEStackSize = getSVEStackSize(MF);
if (!SVEStackSize)
return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
@@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
return true;
}
-// Return the FrameID for a Load/Store instruction by looking at the MMO.
-static std::optional<int> getLdStFrameID(const MachineInstr &MI,
- const MachineFrameInfo &MFI) {
- if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
- return std::nullopt;
-
- MachineMemOperand *MMO = *MI.memoperands_begin();
+// Return the FrameID for a MMO.
+static std::optional<int> getMMOFrameID(MachineMemOperand *MMO,
+ const MachineFrameInfo &MFI) {
auto *PSV =
dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue());
if (PSV)
@@ -3552,6 +3560,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
return std::nullopt;
}
+// Return the FrameID for a Load/Store instruction by looking at the first MMO.
+static std::optional<int> getLdStFrameID(const MachineInstr &MI,
+ const MachineFrameInfo &MFI) {
+ if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
+ return std::nullopt;
+
+ return getMMOFrameID(*MI.memoperands_begin(), MFI);
+}
+
// Check if a Hazard slot is needed for the current function, and if so create
// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
// which can be used to determine if any hazard padding is needed.
@@ -5029,3 +5046,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
MI->eraseFromParent();
}
}
+
+struct StackAccess {
+ enum AccessType {
+ NotAccessed = 0, // Stack object not accessed by load/store instructions.
+ GPR = 1 << 0, // A general purpose register.
+ PPR = 1 << 1, // A predicate register.
+ FPR = 1 << 2, // A floating point/Neon/SVE register.
+ };
+
+ int Idx;
+ StackOffset Offset;
+ int64_t Size;
+ unsigned AccessTypes;
+
+ StackAccess() : Idx(0), Offset(), Size(0), AccessTypes(NotAccessed) {}
+
+ bool operator<(const StackAccess &Rhs) const {
+ return std::make_tuple(start(), Idx) <
+ std::make_tuple(Rhs.start(), Rhs.Idx);
+ }
+
+ bool isCPU() const {
+ // Predicate register load and store instructions execute on the CPU.
+ return AccessTypes & (AccessType::GPR | AccessType::PPR);
+ }
+ bool isSME() const { return AccessTypes & AccessType::FPR; }
+ bool isMixed() const { return isCPU() && isSME(); }
+
+ int64_t start() const { return Offset.getFixed() + Offset.getScalable(); }
+ int64_t end() const { return start() + Size; }
+
+ std::string getTypeString() const {
+ switch (AccessTypes) {
+ case AccessType::FPR:
+ return "FPR";
+ case AccessType::PPR:
+ return "PPR";
+ case AccessType::GPR:
+ return "GPR";
+ case AccessType::NotAccessed:
+ return "NA";
+ default:
+ return "Mixed";
+ }
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << getTypeString() << " stack object at [SP"
+ << (Offset.getFixed() < 0 ? "" : "+") << Offset.getFixed();
+ if (Offset.getScalable())
+ OS << (Offset.getScalable() < 0 ? "" : "+") << Offset.getScalable()
+ << " * vscale";
+ OS << "]";
+ }
+};
+
+static inline raw_ostream &operator<<(raw_ostream &OS, const StackAccess &SA) {
+ SA.print(OS);
+ return OS;
+}
+
+void AArch64FrameLowering::emitRemarks(
+ const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
+
+ SMEAttrs Attrs(MF.getFunction());
+ if (Attrs.hasNonStreamingInterfaceAndBody())
+ return;
+
+ const uint64_t HazardSize =
+ (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
+
+ if (HazardSize == 0)
+ return;
+
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ // Bail if function has no stack objects.
+ if (!MFI.hasStackObjects())
+ return;
+
+ std::vector<StackAccess> StackAccesses(MFI.getNumObjects());
+
+ size_t NumFPLdSt = 0;
+ size_t NumNonFPLdSt = 0;
+
+ // Collect stack accesses via Load/Store instructions.
+ for (const MachineBasicBlock &MBB : MF) {
+ for (const MachineInstr &MI : MBB) {
+ if (!MI.mayLoadOrStore() || MI.getNumMemOperands() < 1)
+ continue;
+ for (MachineMemOperand *MMO : MI.memoperands()) {
+ std::optional<int> FI = getMMOFrameID(MMO, MFI);
+ if (FI && !MFI.isDeadObjectIndex(*FI)) {
+ int FrameIdx = *FI;
+
+ size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects();
+ if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
+ StackAccesses[ArrIdx].Idx = FrameIdx;
+ StackAccesses[ArrIdx].Offset =
+ getFrameIndexReferenceFromSP(MF, FrameIdx);
+ StackAccesses[ArrIdx].Size = MFI.getObjectSize(FrameIdx);
+ }
+
+ unsigned RegTy = StackAccess::AccessType::GPR;
+ if (MFI.getStackID(FrameIdx) == TargetStackID::ScalableVector) {
+ if (AArch64::PPRRegClass.contains(MI.getOperand(0).getReg()))
+ RegTy = StackAccess::PPR;
+ else
+ RegTy = StackAccess::FPR;
+ } else if (AArch64InstrInfo::isFpOrNEON(MI)) {
+ RegTy = StackAccess::FPR;
+ }
+
+ StackAccesses[ArrIdx].AccessTypes |= RegTy;
+
+ if (RegTy == StackAccess::FPR)
+ ++NumFPLdSt;
+ else
+ ++NumNonFPLdSt;
+ }
+ }
+ }
+ }
+
+ if (NumFPLdSt == 0 || NumNonFPLdSt == 0)
+ return;
+
+ llvm::sort(StackAccesses);
+ StackAccesses.erase(llvm::remove_if(StackAccesses,
+ [](const StackAccess &S) {
+ return S.AccessTypes ==
+ StackAccess::NotAccessed;
+ }),
+ StackAccesses.end());
+
+ SmallVector<const StackAccess *> MixedObjects;
+ SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
+
+ if (StackAccesses.front().isMixed())
+ MixedObjects.push_back(&StackAccesses.front());
+
+ for (auto It = StackAccesses.begin(), End = std::prev(StackAccesses.end());
+ It != End; ++It) {
+ const auto &First = *It;
+ const auto &Second = *(It + 1);
+
+ if (Second.isMixed())
+ MixedObjects.push_back(&Second);
+
+ if ((First.isSME() && Second.isCPU()) ||
+ (First.isCPU() && Second.isSME())) {
+ uint64_t Distance = static_cast<uint64_t>(Second.start() - First.end());
+ if (Distance < HazardSize)
+ HazardPairs.emplace_back(&First, &Second);
+ }
+ }
+
+ auto EmitRemark = [&](llvm::StringRef Str) {
+ ORE->emit([&]() {
+ auto R = MachineOptimizationRemarkAnalysis(
+ "sme", "StackHazard", MF.getFunction().getSubprogram(), &MF.front());
+ return R << formatv("stack hazard in '{0}': ", MF.getName()).str() << Str;
+ });
+ };
+
+ for (const auto &P : HazardPairs)
+ EmitRemark(formatv("{0} is too close to {1}", *P.first, *P.second).str());
+
+ for (const auto *Obj : MixedObjects)
+ EmitRemark(
+ formatv("{0} accessed by both GP and FP instructions", *Obj).str());
+}
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 0ebab1700e9ce..c197312496208 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -13,8 +13,9 @@
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64FRAMELOWERING_H
-#include "llvm/Support/TypeSize.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/Support/TypeSize.h"
namespace llvm {
@@ -178,6 +179,9 @@ class AArch64FrameLowering : public TargetFrameLowering {
inlineStackProbeLoopExactMultiple(MachineBasicBlock::iterator MBBI,
int64_t NegProbeSize,
Register TargetReg) const;
+
+ void emitRemarks(const MachineFunction &MF,
+ MachineOptimizationRemarkEmitter *ORE) const override;
};
} // End llvm namespace
diff --git a/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll
new file mode 100644
index 0000000000000..0b6bf3892a0c2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ssve-stack-hazard-remarks.ll
@@ -0,0 +1,152 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-remark-size=64 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK
+; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 -pass-remarks-analysis=sme -aarch64-stack-hazard-size=1024 -o /dev/null < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-PADDING
+
+; Don't emit remarks for non-streaming functions.
+define float @csr_x20_stackargs_notsc(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_notsc':
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_notsc':
+entry:
+ tail call void asm sideeffect "", "~{x20}"() #1
+ ret float %i
+}
+
+; Don't emit remarks for functions that only access GPR stack objects.
+define i64 @stackargs_gpr(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h, i64 %i) #2 {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_gpr':
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_gpr':
+entry:
+ ret i64 %i
+}
+
+; Don't emit remarks for functions that only access FPR stack objects.
+define double @stackargs_fpr(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) #2 {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_fpr':
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs_fpr':
+entry:
+ ret double %i
+}
+
+; As this case is handled by addition of stack hazard padding, only emit remarks when this is not switched on.
+define i32 @csr_d8_alloci64(i64 %d) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_alloci64': FPR stack object at [SP-16] is too close to GPR stack object at [SP-8]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_alloci64':
+entry:
+ %a = alloca i64
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store i64 %d, ptr %a
+ ret i32 0
+}
+
+; As this case is handled by addition of stack hazard padding, only emit remarks when this is not switched on.
+define i32 @csr_d8_allocnxv4i32(i64 %d) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32': FPR stack object at [SP-16] is too close to GPR stack object at [SP-8]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32':
+entry:
+ %a = alloca <vscale x 4 x i32>
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store <vscale x 4 x i32> zeroinitializer, ptr %a
+ ret i32 0
+}
+
+define float @csr_x20_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs': GPR stack object at [SP-16] is too close to FPR stack object at [SP+0]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'csr_x20_stackargs': GPR stack object at [SP-16] is too close to FPR stack object at [SP+0]
+entry:
+ tail call void asm sideeffect "", "~{x20}"() #1
+ ret float %i
+}
+
+; In this case, addition of stack hazard padding triggers x29 (fp) spill, so we hazard occurs between FPR argument and GPR spill.
+define float @csr_d8_stackargs(float %a, float %b, float %c, float %d, float %e, float %f, float %g, float %h, float %i) #2 {
+; CHECK-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_stackargs':
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'csr_d8_stackargs': GPR stack object at [SP-8] is too close to FPR stack object at [SP+0]
+entry:
+ tail call void asm sideeffect "", "~{d8}"() #1
+ ret float %i
+}
+
+; SVE calling conventions
+; Predicate register spills end up in FP region, currently.
+
+define i32 @svecc_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'svecc_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'svecc_call':
+entry:
+ tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+ %call = call ptr @memset(ptr noundef nonnull %P1, i32 noundef 45, i32 noundef 37)
+ ret i32 -396142473
+}
+
+define i32 @svecc_alloca_call(<4 x i16> %P0, ptr %P1, i32 %P2, <vscale x 16 x i8> %P3, i16 %P4) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-48-258 * vscale] is too close to FPR stack object at [SP-48-256 * vscale]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': FPR stack object at [SP-48-16 * vscale] is too close to GPR stack object at [SP-48]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call': PPR stack object at [SP-1072-258 * vscale] is too close to FPR stack object at [SP-1072-256 * vscale]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'svecc_alloca_call':
+entry:
+ tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2
+ %0 = alloca [37 x i8], align 16
+ %call = call ptr @memset(ptr noundef nonnull %0, i32 noundef 45, i32 noundef 37)
+ ret i32 -396142473
+}
+declare ptr @memset(ptr, i32, i32)
+
+%struct.mixed_struct = type { i32, float }
+
+define i32 @mixed_stack_object(i32 %a, float %b) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_object': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_object': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+entry:
+ %s = alloca %struct.mixed_struct
+ %s.i = getelementptr %struct.mixed_struct, ptr %s, i32 0, i32 0
+ %s.f = getelementptr %struct.mixed_struct, ptr %s, i32 0, i32 1
+ store i32 %a, ptr %s.i
+ store float %b, ptr %s.f
+ ret i32 %a
+}
+
+define i32 @mixed_stack_objects(i32 %a, float %b) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] is too close to Mixed stack object at [SP-8]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] accessed by both GP and FP instructions
+; CHECK: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] is too close to Mixed stack object at [SP-8]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-16] accessed by both GP and FP instructions
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'mixed_stack_objects': Mixed stack object at [SP-8] accessed by both GP and FP instructions
+entry:
+ %s0 = alloca %struct.mixed_struct
+ %s0.i = getelementptr %struct.mixed_struct, ptr %s0, i32 0, i32 0
+ %s0.f = getelementptr %struct.mixed_struct, ptr %s0, i32 0, i32 1
+ store i32 %a, ptr %s0.i
+ store float %b, ptr %s0.f
+
+ %s1 = alloca %struct.mixed_struct
+ %s1.i = getelementptr %struct.mixed_struct, ptr %s1, i32 0, i32 0
+ %s1.f = getelementptr %struct.mixed_struct, ptr %s1, i32 0, i32 1
+ store i32 %a, ptr %s1.i
+ store float %b, ptr %s1.f
+
+ ret i32 %a
+}
+
+; VLA-area stack objects are not separated.
+define i32 @csr_d8_allocnxv4i32i32f64_vlai32f64(double %d, i32 %i) #2 {
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64': GPR stack object at [SP-48-16 * vscale] is too close to FPR stack object at [SP-48-16 * vscale]
+; CHECK: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64': FPR stack object at [SP-32] is too close to GPR stack object at [SP-24]
+; CHECK-PADDING: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64': GPR stack object at [SP-2096-16 * vscale] is too close to FPR stack object at [SP-2096-16 * vscale]
+; CHECK-PADDING-NOT: remark: <unknown>:0:0: stack hazard in 'csr_d8_allocnxv4i32i32f64_vlai32f64':
+entry:
+ %a = alloca <vscale x 4 x i32>
+ %0 = zext i32 %i to i64
+ %vla0 = alloca i32, i64 %0
+ %vla1 = alloca double, i64 %0
+ %c = alloca double
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store <vscale x 4 x i32> zeroinitializer, ptr %a
+ store i32 zeroinitializer, ptr %vla0
+ store double %d, ptr %vla1
+ store double %d, ptr %c
+ ret i32 0
+}
+
+attributes #2 = { "aarch64_pstate_sm_compatible" }
diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index 431c9dc76508f..ec94198a08ca7 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -150,8 +150,8 @@ entry:
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40-16 x vscale], Type: Variable, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: VariableSized, Align: 1, Size: 0
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: VariableSized, Align: 1, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-16 x vscale], Type: VariableSized, Align: 1, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-48-16 x vscale], Type: VariableSized, Align: 1, Size: 0
define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_vla:
More information about the llvm-branch-commits
mailing list