[llvm-branch-commits] [llvm] release/19.x: [StackFrameLayoutAnalysis] Support more SlotTypes (#100562) (PR #101042)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Jul 29 10:26:27 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (llvmbot)
<details>
<summary>Changes</summary>
Backport dc1c00f6b13f e31794f99d72
Requested by: @<!-- -->davemgreen
---
Patch is 40.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101042.diff
7 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetFrameLowering.h (+7)
- (modified) llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp (+54-18)
- (modified) llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp (+14)
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+35)
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.h (+2)
- (modified) llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll (+478-9)
- (modified) llvm/test/CodeGen/X86/stack-frame-layout-remarks.ll (+6-6)
``````````diff
diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 72978b2f746d7..0656c0d739fdf 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -343,6 +343,13 @@ class TargetFrameLowering {
return getFrameIndexReference(MF, FI, FrameReg);
}
+ /// getFrameIndexReferenceFromSP - This method returns the offset from the
+ /// stack pointer to the slot of the specified index. This function serves to
+ /// provide a comparable offset from a single reference point (the value of
+ /// the stack-pointer at function entry) that can be used for analysis.
+ virtual StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF,
+ int FI) const;
+
/// Returns the callee-saved registers as computed by determineCalleeSaves
/// in the BitVector \p SavedRegs.
virtual void getCalleeSaves(const MachineFunction &MF,
diff --git a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
index 940aecd1cb363..0a7a6bad4e86d 100644
--- a/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
+++ b/llvm/lib/CodeGen/StackFrameLayoutAnalysisPass.cpp
@@ -51,6 +51,8 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
enum SlotType {
Spill, // a Spill slot
+ Fixed, // a Fixed slot (e.g. arguments passed on the stack)
+ VariableSized, // a variable sized object
StackProtector, // Stack Protector slot
Variable, // a slot used to store a local data (could be a tmp)
Invalid // It's an error for a slot to have this type
@@ -60,29 +62,42 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
int Slot;
int Size;
int Align;
- int Offset;
+ StackOffset Offset;
SlotType SlotTy;
bool Scalable;
- SlotData(const MachineFrameInfo &MFI, const int ValOffset, const int Idx)
+ SlotData(const MachineFrameInfo &MFI, const StackOffset Offset,
+ const int Idx)
: Slot(Idx), Size(MFI.getObjectSize(Idx)),
- Align(MFI.getObjectAlign(Idx).value()),
- Offset(MFI.getObjectOffset(Idx) - ValOffset), SlotTy(Invalid),
- Scalable(false) {
+ Align(MFI.getObjectAlign(Idx).value()), Offset(Offset),
+ SlotTy(Invalid), Scalable(false) {
Scalable = MFI.getStackID(Idx) == TargetStackID::ScalableVector;
if (MFI.isSpillSlotObjectIndex(Idx))
SlotTy = SlotType::Spill;
- else if (Idx == MFI.getStackProtectorIndex())
+ else if (MFI.isFixedObjectIndex(Idx))
+ SlotTy = SlotType::Fixed;
+ else if (MFI.isVariableSizedObjectIndex(Idx))
+ SlotTy = SlotType::VariableSized;
+ else if (MFI.hasStackProtectorIndex() &&
+ Idx == MFI.getStackProtectorIndex())
SlotTy = SlotType::StackProtector;
else
SlotTy = SlotType::Variable;
}
+ bool isVarSize() const { return SlotTy == SlotType::VariableSized; }
+
// We use this to sort in reverse order, so that the layout is displayed
- // correctly. Scalable slots are sorted to the end of the list.
+ // correctly. Variable sized slots are sorted to the end of the list, as
+ // offsets are currently incorrect for these but they reside at the end of
+ // the stack frame. The Slot index is used to ensure deterministic order
+ // when offsets are equal.
bool operator<(const SlotData &Rhs) const {
- return std::make_tuple(!Scalable, Offset) >
- std::make_tuple(!Rhs.Scalable, Rhs.Offset);
+ return std::make_tuple(!isVarSize(),
+ Offset.getFixed() + Offset.getScalable(), Slot) >
+ std::make_tuple(!Rhs.isVarSize(),
+ Rhs.Offset.getFixed() + Rhs.Offset.getScalable(),
+ Rhs.Slot);
}
};
@@ -121,6 +136,10 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
switch (Ty) {
case SlotType::Spill:
return "Spill";
+ case SlotType::Fixed:
+ return "Fixed";
+ case SlotType::VariableSized:
+ return "VariableSized";
case SlotType::StackProtector:
return "Protector";
case SlotType::Variable:
@@ -149,15 +168,27 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
// For example we store the Offset in YAML as:
// ...
// - Offset: -8
+ // - ScalableOffset: -16
+ // Note: the ScalableOffset entries are added only for slots with non-zero
+ // scalable offsets.
//
- // But we print it to the CLI as
+ // But we print it to the CLI as:
// Offset: [SP-8]
+ //
+ // Or with non-zero scalable offset:
+ // Offset: [SP-8-16 x vscale]
// Negative offsets will print a leading `-`, so only add `+`
std::string Prefix =
- formatv("\nOffset: [SP{0}", (D.Offset < 0) ? "" : "+").str();
- Rem << Prefix << ore::NV("Offset", D.Offset)
- << "], Type: " << ore::NV("Type", getTypeString(D.SlotTy))
+ formatv("\nOffset: [SP{0}", (D.Offset.getFixed() < 0) ? "" : "+").str();
+ Rem << Prefix << ore::NV("Offset", D.Offset.getFixed());
+
+ if (D.Offset.getScalable()) {
+ Rem << ((D.Offset.getScalable() < 0) ? "" : "+")
+ << ore::NV("ScalableOffset", D.Offset.getScalable()) << " x vscale";
+ }
+
+ Rem << "], Type: " << ore::NV("Type", getTypeString(D.SlotTy))
<< ", Align: " << ore::NV("Align", D.Align)
<< ", Size: " << ore::NV("Size", ElementCount::get(D.Size, D.Scalable));
}
@@ -170,17 +201,22 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
Rem << "\n " << ore::NV("DataLoc", Loc);
}
+ StackOffset getStackOffset(const MachineFunction &MF,
+ const MachineFrameInfo &MFI,
+ const TargetFrameLowering *FI, int FrameIdx) {
+ if (!FI)
+ return StackOffset::getFixed(MFI.getObjectOffset(FrameIdx));
+
+ return FI->getFrameIndexReferenceFromSP(MF, FrameIdx);
+ }
+
void emitStackFrameLayoutRemarks(MachineFunction &MF,
MachineOptimizationRemarkAnalysis &Rem) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.hasStackObjects())
return;
- // ValOffset is the offset to the local area from the SP at function entry.
- // To display the true offset from SP, we need to subtract ValOffset from
- // MFI's ObjectOffset.
const TargetFrameLowering *FI = MF.getSubtarget().getFrameLowering();
- const int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
LLVM_DEBUG(dbgs() << "getStackProtectorIndex =="
<< MFI.getStackProtectorIndex() << "\n");
@@ -194,7 +230,7 @@ struct StackFrameLayoutAnalysisPass : public MachineFunctionPass {
Idx != EndIdx; ++Idx) {
if (MFI.isDeadObjectIndex(Idx))
continue;
- SlotInfo.emplace_back(MFI, ValOffset, Idx);
+ SlotInfo.emplace_back(MFI, getStackOffset(MF, MFI, FI, Idx), Idx);
}
// sort the ordering, to match the actual layout in memory
diff --git a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 48a2094f5d451..7d054cb7c7c71 100644
--- a/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -61,6 +61,20 @@ TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
MFI.getOffsetAdjustment());
}
+/// Returns the offset from the stack pointer to the slot of the specified
+/// index. This function serves to provide a comparable offset from a single
+/// reference point (the value of the stack-pointer at function entry) that can
+/// be used for analysis. This is the default implementation using
+/// MachineFrameInfo offsets.
+StackOffset
+TargetFrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
+ int FI) const {
+ // To display the true offset from SP, we need to subtract the offset to the
+ // local area from MFI's ObjectOffset.
+ return StackOffset::getFixed(MF.getFrameInfo().getObjectOffset(FI) -
+ getOffsetOfLocalArea());
+}
+
bool TargetFrameLowering::needsFrameIndexResolution(
const MachineFunction &MF) const {
return MF.getFrameInfo().hasStackObjects();
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index b1b83e27c5592..bd530903bb664 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2603,6 +2603,41 @@ AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
/*ForSimm=*/false);
}
+StackOffset
+AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
+ int FI) const {
+ // This function serves to provide a comparable offset from a single reference
+ // point (the value of SP at function entry) that can be used for analysis,
+ // e.g. the stack-frame-layout analysis pass. It is not guaranteed to be
+ // correct for all objects in the presence of VLA-area objects or dynamic
+ // stack re-alignment.
+
+ const auto &MFI = MF.getFrameInfo();
+
+ int64_t ObjectOffset = MFI.getObjectOffset(FI);
+
+ // This is correct in the absence of any SVE stack objects.
+ StackOffset SVEStackSize = getSVEStackSize(MF);
+ if (!SVEStackSize)
+ return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());
+
+ const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
+ if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
+ return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
+ ObjectOffset);
+ }
+
+ bool IsFixed = MFI.isFixedObjectIndex(FI);
+ bool IsCSR =
+ !IsFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));
+
+ StackOffset ScalableOffset = {};
+ if (!IsFixed && !IsCSR)
+ ScalableOffset = -SVEStackSize;
+
+ return StackOffset::getFixed(ObjectOffset) + ScalableOffset;
+}
+
StackOffset
AArch64FrameLowering::getNonLocalFrameIndexReference(const MachineFunction &MF,
int FI) const {
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index da315850d6362..0ebab1700e9ce 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -41,6 +41,8 @@ class AArch64FrameLowering : public TargetFrameLowering {
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg) const override;
+ StackOffset getFrameIndexReferenceFromSP(const MachineFunction &MF,
+ int FI) const override;
StackOffset resolveFrameIndexReference(const MachineFunction &MF, int FI,
Register &FrameReg, bool PreferFP,
bool ForSimm) const;
diff --git a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
index 34d85d1f76086..431c9dc76508f 100644
--- a/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
+++ b/llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
@@ -5,9 +5,9 @@
; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20], Type: Variable, Align: 4, Size: 4
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Variable, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20-16 x vscale], Type: Variable, Align: 4, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 8, Size: 8
define i32 @csr_d8_allocnxv4i32i32f64(double %d) "aarch64_pstate_sm_compatible" {
; CHECK-LABEL: csr_d8_allocnxv4i32i32f64:
@@ -49,8 +49,8 @@ entry:
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20], Type: Variable, Align: 4, Size: 4
; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 16, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40], Type: Variable, Align: 8, Size: 8
-; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40-16 x vscale], Type: Variable, Align: 8, Size: 8
define i32 @csr_d8_allocnxv4i32i32f64_fp(double %d) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_fp:
@@ -90,13 +90,174 @@ entry:
ret i32 0
}
+; In the presence of dynamic stack-realignment we emit correct offsets for
+; objects which are not realigned. For realigned objects, e.g. the i32 alloca
+; in this test, we emit the correct offset ignoring the re-alignment (i.e. the
+; offset if the alignment requirement is already satisfied).
+
+; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64_dynamicrealign
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Variable, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-128-16 x vscale], Type: Variable, Align: 128, Size: 4
+
+define i32 @csr_d8_allocnxv4i32i32f64_dynamicrealign(double %d) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_dynamicrealign:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: sub x9, sp, #96
+; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: addvl x9, x9, #-1
+; CHECK-NEXT: and sp, x9, #0xffffffffffffff80
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset b8, -32
+; CHECK-NEXT: mov z1.s, #0 // =0x0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: sub x8, x29, #16
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: str wzr, [sp]
+; CHECK-NEXT: stur d0, [x29, #-8]
+; CHECK-NEXT: st1w { z1.s }, p0, [x8, #-1, mul vl]
+; CHECK-NEXT: sub sp, x29, #16
+; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %a = alloca <vscale x 4 x i32>
+ %b = alloca i32, align 128
+ %c = alloca double
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store <vscale x 4 x i32> zeroinitializer, ptr %a
+ store i32 zeroinitializer, ptr %b
+ store double %d, ptr %c
+ ret i32 0
+}
+
+; In the presence of VLA-area objects, we emit correct offsets for all objects
+; except for these VLA objects.
+
+; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64_vla
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-24], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-40-16 x vscale], Type: Variable, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: VariableSized, Align: 1, Size: 0
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32], Type: VariableSized, Align: 1, Size: 0
+
+define i32 @csr_d8_allocnxv4i32i32f64_vla(double %d, i32 %i) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_vla:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str d8, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #8
+; CHECK-NEXT: str x19, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: .cfi_def_cfa w29, 24
+; CHECK-NEXT: .cfi_offset w19, -8
+; CHECK-NEXT: .cfi_offset w30, -16
+; CHECK-NEXT: .cfi_offset w29, -24
+; CHECK-NEXT: .cfi_offset b8, -32
+; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
+; CHECK-NEXT: ubfiz x8, x0, #2, #32
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: add x8, x8, #15
+; CHECK-NEXT: and x8, x8, #0x7fffffff0
+; CHECK-NEXT: sub x9, x9, x8
+; CHECK-NEXT: mov sp, x9
+; CHECK-NEXT: mov x10, sp
+; CHECK-NEXT: sub x8, x10, x8
+; CHECK-NEXT: mov sp, x8
+; CHECK-NEXT: mov z1.s, #0 // =0x0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: str wzr, [x8]
+; CHECK-NEXT: sub x8, x29, #8
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: str wzr, [x9]
+; CHECK-NEXT: st1w { z1.s }, p0, [x8, #-1, mul vl]
+; CHECK-NEXT: str d0, [x19, #8]
+; CHECK-NEXT: sub sp, x29, #8
+; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: ldr d8, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %a = alloca <vscale x 4 x i32>
+ %0 = zext i32 %i to i64
+ %vla0 = alloca i32, i64 %0
+ %vla1 = alloca i32, i64 %0
+ %c = alloca double
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store <vscale x 4 x i32> zeroinitializer, ptr %a
+ store i32 zeroinitializer, ptr %vla0
+ store i32 zeroinitializer, ptr %vla1
+ store double %d, ptr %c
+ ret i32 0
+}
+
+; CHECK-FRAMELAYOUT-LABEL: Function: csr_d8_allocnxv4i32i32f64_stackargsi32f64
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+8], Type: Fixed, Align: 8, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP+0], Type: Fixed, Align: 16, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-8], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16], Type: Spill, Align: 8, Size: 8
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-16-16 x vscale], Type: Variable, Align: 16, Size: vscale x 16
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-20-16 x vscale], Type: Variable, Align: 4, Size: 4
+; CHECK-FRAMELAYOUT-NEXT: Offset: [SP-32-16 x vscale], Type: Variable, Align: 8, Size: 8
+
+define i32 @csr_d8_allocnxv4i32i32f64_stackargsi32f64(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8) "aarch64_pstate_sm_compatible" {
+; CHECK-LABEL: csr_d8_allocnxv4i32i32f64_stackargsi32f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: str d8, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: str x29, [sp, #8] // 8-byte Folded Spill
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: addvl sp, sp, #-1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 32 + 8 * VG
+; CHECK-NEXT: .cfi_offset w29, -8
+; CHECK-NEXT: .cfi_offset b8, -16
+; CHECK-NEXT: mov z1.s, #0 // =0x0
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: add x8, sp, #16
+; CHECK-NEXT: mov w0, wzr
+; CHECK-NEXT: //APP
+; CHECK-NEXT: //NO_APP
+; CHECK-NEXT: str wzr, [sp, #12]
+; CHECK-NEXT: str d0, [sp]
+; CHECK-NEXT: st1w { z1.s }, p0, [x8]
+; CHECK-NEXT: addvl sp, sp, #1
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ldr x29, [sp, #8] // 8-byte Folded Reload
+; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+entry:
+ %a = alloca <vscale x 4 x i32>
+ %b = alloca i32
+ %c = alloca double
+ tail call void asm sideeffect "", "~{d8}"() #1
+ store <...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/101042
More information about the llvm-branch-commits
mailing list