[llvm] r373585 - [AArch64] Static (de)allocation of SVE stack objects.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 3 04:33:50 PDT 2019
Author: s.desmalen
Date: Thu Oct 3 04:33:50 2019
New Revision: 373585
URL: http://llvm.org/viewvc/llvm-project?rev=373585&view=rev
Log:
[AArch64] Static (de)allocation of SVE stack objects.
Adds support to AArch64FrameLowering to allocate fixed-stack SVE objects.
The focus of this patch is purely to allow the stack frame to
allocate/deallocate space for scalable SVE objects. More dynamic
allocation (at compile-time, i.e. determining placement of SVE objects
on the stack), or resolving frame-index references that include
scalable-sized offsets, are left for subsequent patches.
SVE objects are allocated in the stack frame as a separate region below
the callee-save area, and above the alignment gap. This is done so that
the SVE objects can be accessed directly from the FP at (runtime)
VL-based offsets to benefit from using the VL-scaled addressing modes.
The layout looks as follows:
+-------------+
| stack arg |
+-------------+
| Callee Saves|
| X29, X30 | (if available)
|-------------| <- FP (if available)
| : |
| SVE area |
| : |
+-------------+
|/////////////| alignment gap.
| : |
| Stack objs |
| : |
+-------------+ <- SP after call and frame-setup
SVE and non-SVE stack objects are distinguished using different
StackIDs. The offsets for objects with TargetStackID::SVEVector should be
interpreted as purely scalable offsets within their respective SVE region.
Reviewers: thegameg, rovka, t.p.northover, efriedma, rengolin, greened
Reviewed By: efriedma
Differential Revision: https://reviews.llvm.org/D61437
Added:
llvm/trunk/test/CodeGen/AArch64/framelayout-sve.mir
Modified:
llvm/trunk/include/llvm/CodeGen/MIRYamlMapping.h
llvm/trunk/include/llvm/CodeGen/TargetFrameLowering.h
llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.h
llvm/trunk/lib/Target/AArch64/AArch64StackOffset.h
llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/trunk/unittests/Target/AArch64/TestStackOffset.cpp
Modified: llvm/trunk/include/llvm/CodeGen/MIRYamlMapping.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MIRYamlMapping.h?rev=373585&r1=373584&r2=373585&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/MIRYamlMapping.h (original)
+++ llvm/trunk/include/llvm/CodeGen/MIRYamlMapping.h Thu Oct 3 04:33:50 2019
@@ -314,6 +314,7 @@ struct ScalarEnumerationTraits<TargetSta
static void enumeration(yaml::IO &IO, TargetStackID::Value &ID) {
IO.enumCase(ID, "default", TargetStackID::Default);
IO.enumCase(ID, "sgpr-spill", TargetStackID::SGPRSpill);
+ IO.enumCase(ID, "sve-vec", TargetStackID::SVEVector);
IO.enumCase(ID, "noalloc", TargetStackID::NoAlloc);
}
};
Modified: llvm/trunk/include/llvm/CodeGen/TargetFrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetFrameLowering.h?rev=373585&r1=373584&r2=373585&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetFrameLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetFrameLowering.h Thu Oct 3 04:33:50 2019
@@ -28,6 +28,7 @@ namespace TargetStackID {
enum Value {
Default = 0,
SGPRSpill = 1,
+ SVEVector = 2,
NoAlloc = 255
};
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=373585&r1=373584&r2=373585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Thu Oct 3 04:33:50 2019
@@ -55,6 +55,10 @@
// | callee-saved fp/simd/SVE regs |
// | |
// |-----------------------------------|
+// | |
+// | SVE stack objects |
+// | |
+// |-----------------------------------|
// |.empty.space.to.make.part.below....|
// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
// |.the.standard.16-byte.alignment....| compile time; if present)
@@ -202,6 +206,12 @@ static unsigned estimateRSStackSizeLimit
return DefaultSafeSPDisplacement;
}
+/// Returns the size of the entire SVE stackframe (calleesaves + spills).
+static StackOffset getSVEStackSize(const MachineFunction &MF) {
+ const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ return {(int64_t)AFI->getStackSizeSVE(), MVT::nxv1i8};
+}
+
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
@@ -214,7 +224,8 @@ bool AArch64FrameLowering::canUseRedZone
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
unsigned NumBytes = AFI->getLocalStackSize();
- return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
+ return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128 ||
+ getSVEStackSize(MF));
}
/// hasFP - Return true if the specified function should have a dedicated frame
@@ -456,6 +467,11 @@ bool AArch64FrameLowering::shouldCombine
if (canUseRedZone(MF))
return false;
+ // When there is an SVE area on the stack, always allocate the
+ // callee-saves and spills/locals separately.
+ if (getSVEStackSize(MF))
+ return false;
+
return true;
}
@@ -870,6 +886,8 @@ void AArch64FrameLowering::emitPrologue(
// Ideally it should match SP value after prologue.
AFI->setTaggedBasePointerOffset(MFI.getStackSize());
+ const StackOffset &SVEStackSize = getSVEStackSize(MF);
+
// getStackSize() includes all the locals in its size calculation. We don't
// include these locals when computing the stack size of a funclet, as they
// are allocated in the parent's stack frame and accessed via the frame
@@ -880,6 +898,8 @@ void AArch64FrameLowering::emitPrologue(
: (int)MFI.getStackSize();
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");
+ assert(!SVEStackSize &&
+ "unexpected function without stack frame but with SVE objects");
// All of the stack allocation is for locals.
AFI->setLocalStackSize(NumBytes);
if (!NumBytes)
@@ -926,6 +946,7 @@ void AArch64FrameLowering::emitPrologue(
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
if (CombineSPBump) {
+ assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
{-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false,
NeedsWinCFI, &HasWinCFI);
@@ -1083,6 +1104,9 @@ void AArch64FrameLowering::emitPrologue(
NumBytes = 0;
}
+ emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII,
+ MachineInstr::FrameSetup);
+
// Allocate space for the rest of the frame.
if (NumBytes) {
const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
@@ -1431,8 +1455,11 @@ void AArch64FrameLowering::emitEpilogue(
.setMIFlag(MachineInstr::FrameDestroy);
}
+ const StackOffset &SVEStackSize = getSVEStackSize(MF);
+
// If there is a single SP update, insert it before the ret and we're done.
if (CombineSPBump) {
+ assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
{NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
@@ -1446,6 +1473,12 @@ void AArch64FrameLowering::emitEpilogue(
NumBytes -= PrologueSaveSize;
assert(NumBytes >= 0 && "Negative stack allocation size!?");
+ // Deallocate the SVE area.
+ if (SVEStackSize)
+ if (!AFI->isStackRealigned())
+ emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize,
+ TII, MachineInstr::FrameDestroy);
+
if (!hasFP(MF)) {
bool RedZone = canUseRedZone(MF);
// If this was a redzone leaf function, we don't need to restore the
@@ -1595,6 +1628,11 @@ StackOffset AArch64FrameLowering::resolv
bool isCSR =
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
+ const StackOffset &SVEStackSize = getSVEStackSize(MF);
+ if (SVEStackSize)
+ llvm_unreachable("Accessing frame indices in presence of SVE "
+ "not yet supported");
+
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
// reliable as a base). Make sure useFPForScavengingIndex() does the
@@ -2175,8 +2213,19 @@ void AArch64FrameLowering::determineCall
<< ' ' << printReg(Reg, RegInfo);
dbgs() << "\n";);
+ bool HasSVEStackObjects = [&MFI]() {
+ for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
+ if (MFI.getStackID(I) == TargetStackID::SVEVector &&
+ MFI.getObjectOffset(I) < 0)
+ return true;
+ // Note: We don't take allocatable stack objects into
+ // account yet, because allocation for those is not yet
+ // implemented.
+ return false;
+ }();
+
// If any callee-saved registers are used, the frame cannot be eliminated.
- bool CanEliminateFrame = SavedRegs.count() == 0;
+ bool CanEliminateFrame = (SavedRegs.count() == 0) && !HasSVEStackObjects;
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
@@ -2239,12 +2288,34 @@ bool AArch64FrameLowering::enableStackSl
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+
+ assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
+ "Upwards growing stack unsupported");
+
+ // Process all fixed stack SVE objects.
+ int64_t Offset = 0;
+ for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
+ unsigned StackID = MFI.getStackID(I);
+ if (StackID == TargetStackID::SVEVector) {
+ int64_t FixedOffset = -MFI.getObjectOffset(I);
+ if (FixedOffset > Offset)
+ Offset = FixedOffset;
+ }
+ }
+
+ unsigned MaxAlign = getStackAlignment();
+ uint64_t SVEStackSize = alignTo(Offset, MaxAlign);
+
+ AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
+ AFI->setStackSizeSVE(SVEStackSize);
+ assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+
// If this function isn't doing Win64-style C++ EH, we don't need to do
// anything.
if (!MF.hasEHFunclets())
return;
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
- MachineFrameInfo &MFI = MF.getFrameInfo();
WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
MachineBasicBlock &MBB = MF.front();
Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h?rev=373585&r1=373584&r2=373585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h Thu Oct 3 04:33:50 2019
@@ -87,6 +87,17 @@ public:
int FI) const override;
int getSEHFrameIndexOffset(const MachineFunction &MF, int FI) const;
+ bool isSupportedStackID(TargetStackID::Value ID) const override {
+ switch (ID) {
+ default:
+ return false;
+ case TargetStackID::Default:
+ case TargetStackID::SVEVector:
+ case TargetStackID::NoAlloc:
+ return true;
+ }
+ }
+
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
unsigned StackBumpBytes) const;
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=373585&r1=373584&r2=373585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Thu Oct 3 04:33:50 2019
@@ -3046,6 +3046,16 @@ static void emitFrameOffsetAdj(MachineBa
MaxEncoding = 0xfff;
ShiftSize = 12;
break;
+ case AArch64::ADDVL_XXI:
+ case AArch64::ADDPL_XXI:
+ MaxEncoding = 31;
+ ShiftSize = 0;
+ if (Offset < 0) {
+ MaxEncoding = 32;
+ Sign = -1;
+ Offset = -Offset;
+ }
+ break;
default:
llvm_unreachable("Unsupported opcode");
}
@@ -3117,8 +3127,8 @@ void llvm::emitFrameOffset(MachineBasicB
StackOffset Offset, const TargetInstrInfo *TII,
MachineInstr::MIFlag Flag, bool SetNZCV,
bool NeedsWinCFI, bool *HasWinCFI) {
- int64_t Bytes;
- Offset.getForFrameOffset(Bytes);
+ int64_t Bytes, NumPredicateVectors, NumDataVectors;
+ Offset.getForFrameOffset(Bytes, NumPredicateVectors, NumDataVectors);
// First emit non-scalable frame offsets, or a simple 'mov'.
if (Bytes || (!Offset && SrcReg != DestReg)) {
@@ -3133,6 +3143,23 @@ void llvm::emitFrameOffset(MachineBasicB
NeedsWinCFI, HasWinCFI);
SrcReg = DestReg;
}
+
+ assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
+ "SetNZCV not supported with SVE vectors");
+ assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
+ "WinCFI not supported with SVE vectors");
+
+ if (NumDataVectors) {
+ emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
+ AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
+ SrcReg = DestReg;
+ }
+
+ if (NumPredicateVectors) {
+ assert(DestReg != AArch64::SP && "Unaligned access to SP");
+ emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
+ AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
+ }
}
MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
Modified: llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.h?rev=373585&r1=373584&r2=373585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64MachineFunctionInfo.h Thu Oct 3 04:33:50 2019
@@ -95,6 +95,13 @@ class AArch64FunctionInfo final : public
/// returned struct in a register. This field holds the virtual register into
/// which the sret argument is passed.
unsigned SRetReturnReg = 0;
+ /// SVE stack size (for predicates and data vectors) are maintained here
+ /// rather than in FrameInfo, as the placement and Stack IDs are target
+ /// specific.
+ uint64_t StackSizeSVE = 0;
+
+ /// HasCalculatedStackSizeSVE indicates whether StackSizeSVE is valid.
+ bool HasCalculatedStackSizeSVE = false;
/// Has a value when it is known whether or not the function uses a
/// redzone, and no value otherwise.
@@ -131,6 +138,15 @@ public:
ArgumentStackToRestore = bytes;
}
+ bool hasCalculatedStackSizeSVE() const { return HasCalculatedStackSizeSVE; }
+
+ void setStackSizeSVE(uint64_t S) {
+ HasCalculatedStackSizeSVE = true;
+ StackSizeSVE = S;
+ }
+
+ uint64_t getStackSizeSVE() const { return StackSizeSVE; }
+
bool hasStackFrame() const { return HasStackFrame; }
void setHasStackFrame(bool s) { HasStackFrame = s; }
Modified: llvm/trunk/lib/Target/AArch64/AArch64StackOffset.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64StackOffset.h?rev=373585&r1=373584&r2=373585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64StackOffset.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64StackOffset.h Thu Oct 3 04:33:50 2019
@@ -35,32 +35,38 @@ namespace llvm {
/// vector and a 64bit GPR.
class StackOffset {
int64_t Bytes;
+ int64_t ScalableBytes;
explicit operator int() const;
public:
using Part = std::pair<int64_t, MVT>;
- StackOffset() : Bytes(0) {}
+ StackOffset() : Bytes(0), ScalableBytes(0) {}
StackOffset(int64_t Offset, MVT::SimpleValueType T) : StackOffset() {
- assert(!MVT(T).isScalableVector() && "Scalable types not supported");
+ assert(MVT(T).getSizeInBits() % 8 == 0 &&
+ "Offset type is not a multiple of bytes");
*this += Part(Offset, T);
}
- StackOffset(const StackOffset &Other) : Bytes(Other.Bytes) {}
+ StackOffset(const StackOffset &Other)
+ : Bytes(Other.Bytes), ScalableBytes(Other.ScalableBytes) {}
StackOffset &operator=(const StackOffset &) = default;
StackOffset &operator+=(const StackOffset::Part &Other) {
- assert(Other.second.getSizeInBits() % 8 == 0 &&
- "Offset type is not a multiple of bytes");
- Bytes += Other.first * (Other.second.getSizeInBits() / 8);
+ int64_t OffsetInBytes = Other.first * (Other.second.getSizeInBits() / 8);
+ if (Other.second.isScalableVector())
+ ScalableBytes += OffsetInBytes;
+ else
+ Bytes += OffsetInBytes;
return *this;
}
StackOffset &operator+=(const StackOffset &Other) {
Bytes += Other.Bytes;
+ ScalableBytes += Other.ScalableBytes;
return *this;
}
@@ -72,6 +78,7 @@ public:
StackOffset &operator-=(const StackOffset &Other) {
Bytes -= Other.Bytes;
+ ScalableBytes -= Other.ScalableBytes;
return *this;
}
@@ -88,16 +95,42 @@ public:
return Res;
}
+ /// Returns the scalable part of the offset in bytes.
+ int64_t getScalableBytes() const { return ScalableBytes; }
+
/// Returns the non-scalable part of the offset in bytes.
int64_t getBytes() const { return Bytes; }
/// Returns the offset in parts to which this frame offset can be
/// decomposed for the purpose of describing a frame offset.
/// For non-scalable offsets this is simply its byte size.
- void getForFrameOffset(int64_t &ByteSized) const { ByteSized = Bytes; }
+ void getForFrameOffset(int64_t &NumBytes, int64_t &NumPredicateVectors,
+ int64_t &NumDataVectors) const {
+ assert(isValid() && "Invalid frame offset");
+
+ NumBytes = Bytes;
+ NumDataVectors = 0;
+ NumPredicateVectors = ScalableBytes / 2;
+ // This method is used to get the offsets to adjust the frame offset.
+ // If the function requires ADDPL to be used and needs more than two ADDPL
+ // instructions, part of the offset is folded into NumDataVectors so that it
+ // uses ADDVL for part of it, reducing the number of ADDPL instructions.
+ if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
+ NumPredicateVectors > 62) {
+ NumDataVectors = NumPredicateVectors / 8;
+ NumPredicateVectors -= NumDataVectors * 8;
+ }
+ }
/// Returns whether the offset is known zero.
- explicit operator bool() const { return Bytes; }
+ explicit operator bool() const { return Bytes || ScalableBytes; }
+
+ bool isValid() const {
+ // The smallest scalable element supported by scaled SVE addressing
+ // modes are predicates, which are 2 scalable bytes in size. So the scalable
+ // byte offset must always be a multiple of 2.
+ return ScalableBytes % 2 == 0;
+ }
};
} // end namespace llvm
Modified: llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp?rev=373585&r1=373584&r2=373585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFrameLowering.cpp Thu Oct 3 04:33:50 2019
@@ -673,6 +673,8 @@ bool SIFrameLowering::isSupportedStackID
case TargetStackID::NoAlloc:
case TargetStackID::SGPRSpill:
return true;
+ case TargetStackID::SVEVector:
+ return false;
}
llvm_unreachable("Invalid TargetStackID::Value");
}
Added: llvm/trunk/test/CodeGen/AArch64/framelayout-sve.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/framelayout-sve.mir?rev=373585&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/framelayout-sve.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/framelayout-sve.mir Thu Oct 3 04:33:50 2019
@@ -0,0 +1,121 @@
+# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
+#
+# Test allocation and deallocation of SVE objects on the stack,
+# as well as using a combination of scalable and non-scalable
+# offsets to access the SVE on the stack.
+#
+# SVE objects are allocated below the (scalar) callee saves,
+# and above spills/locals and the alignment gap, e.g.
+#
+# +-------------+
+# | stack arg |
+# +-------------+ <- SP before call
+# | Callee Saves|
+# | Frame record| (if available)
+# |-------------| <- FP (if available)
+# | SVE area |
+# +-------------+
+# |/////////////| alignment gap.
+# | : |
+# | Stack objs |
+# | : |
+# +-------------+ <- SP after call and frame-setup
+#
+--- |
+
+ define void @test_allocate_sve() nounwind { entry: unreachable }
+ define void @test_allocate_sve_gpr_callee_saves() nounwind { entry: unreachable }
+ define void @test_allocate_sve_gpr_realigned() nounwind { entry: unreachable }
+
+...
+# +----------+
+# | %fixed- | // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
+# | stack.0 | // to be materialized with 2*ADDVL (<=> 2 * n * 16bytes)
+# +----------+
+# | %stack.0 | // not scalable
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_allocate_sve
+# CHECK: stackSize: 16
+
+# CHECK: bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: RET_ReallyLR
+name: test_allocate_sve
+fixedStack:
+ - { id: 0, stack-id: sve-vec, size: 18, alignment: 2, offset: -18 }
+stack:
+ - { id: 0, stack-id: default, size: 16, alignment: 8 }
+body: |
+ bb.0.entry:
+ RET_ReallyLR
+---
+...
+# +----------+
+# | x20, x21 | // callee saves
+# +----------+
+# | %fixed- | // scalable objects
+# | stack.0 |
+# +----------+
+# | %stack.0 | // not scalable
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_allocate_sve_gpr_callee_saves
+# CHECK: stackSize: 32
+
+# CHECK: bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STPXpre killed $x21, killed $x20, $sp, -2
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $x20 = IMPLICIT_DEF
+# CHECK-NEXT: $x21 = IMPLICIT_DEF
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: $sp, $x21, $x20 = frame-destroy LDPXpost $sp, 2
+# CHECK-NEXT: RET_ReallyLR
+name: test_allocate_sve_gpr_callee_saves
+fixedStack:
+ - { id: 0, stack-id: sve-vec, size: 18, alignment: 2, offset: -18 }
+stack:
+ - { id: 0, stack-id: default, size: 16, alignment: 8 }
+body: |
+ bb.0.entry:
+ $x20 = IMPLICIT_DEF
+ $x21 = IMPLICIT_DEF
+ RET_ReallyLR
+---
+...
+# +----------+
+# | lr, fp | // frame record
+# +----------+ <- FP
+# | %fixed- | // scalable objects
+# | stack.0 |
+# +----------+
+# |//////////| // alignment gap
+# | %stack.0 | // not scalable
+# +----------+ <- SP
+# CHECK-LABEL: name: test_allocate_sve_gpr_realigned
+# CHECK: stackSize: 32
+
+# CHECK: bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2
+# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: $sp = ANDXri killed $[[TMP]]
+# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
+# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
+# CHECK-NEXT: RET_ReallyLR
+name: test_allocate_sve_gpr_realigned
+fixedStack:
+ - { id: 0, stack-id: sve-vec, size: 18, alignment: 2, offset: -18 }
+stack:
+ - { id: 0, stack-id: default, size: 16, alignment: 32 }
+body: |
+ bb.0.entry:
+ RET_ReallyLR
+---
Modified: llvm/trunk/unittests/Target/AArch64/TestStackOffset.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Target/AArch64/TestStackOffset.cpp?rev=373585&r1=373584&r2=373585&view=diff
==============================================================================
--- llvm/trunk/unittests/Target/AArch64/TestStackOffset.cpp (original)
+++ llvm/trunk/unittests/Target/AArch64/TestStackOffset.cpp Thu Oct 3 04:33:50 2019
@@ -20,6 +20,15 @@ TEST(StackOffset, MixedSize) {
StackOffset C(2, MVT::v4i64);
EXPECT_EQ(64, C.getBytes());
+
+ StackOffset D(2, MVT::nxv4i64);
+ EXPECT_EQ(64, D.getScalableBytes());
+
+ StackOffset E(2, MVT::v4i64);
+ EXPECT_EQ(0, E.getScalableBytes());
+
+ StackOffset F(2, MVT::nxv4i64);
+ EXPECT_EQ(0, F.getBytes());
}
TEST(StackOffset, Add) {
@@ -31,6 +40,11 @@ TEST(StackOffset, Add) {
StackOffset D(1, MVT::i32);
D += A;
EXPECT_EQ(12, D.getBytes());
+
+ StackOffset E(1, MVT::nxv1i32);
+ StackOffset F = C + E;
+ EXPECT_EQ(12, F.getBytes());
+ EXPECT_EQ(4, F.getScalableBytes());
}
TEST(StackOffset, Sub) {
@@ -42,6 +56,12 @@ TEST(StackOffset, Sub) {
StackOffset D(1, MVT::i64);
D -= A;
EXPECT_EQ(0, D.getBytes());
+
+ C += StackOffset(2, MVT::nxv1i32);
+ StackOffset E = StackOffset(1, MVT::nxv1i32);
+ StackOffset F = C - E;
+ EXPECT_EQ(4, F.getBytes());
+ EXPECT_EQ(4, F.getScalableBytes());
}
TEST(StackOffset, isZero) {
@@ -49,12 +69,63 @@ TEST(StackOffset, isZero) {
StackOffset B(0, MVT::i32);
EXPECT_TRUE(!A);
EXPECT_TRUE(!(A + B));
+
+ StackOffset C(0, MVT::nxv1i32);
+ EXPECT_TRUE(!(A + C));
+
+ StackOffset D(1, MVT::nxv1i32);
+ EXPECT_FALSE(!(A + D));
+}
+
+TEST(StackOffset, isValid) {
+ EXPECT_FALSE(StackOffset(1, MVT::nxv8i1).isValid());
+ EXPECT_TRUE(StackOffset(2, MVT::nxv8i1).isValid());
+
+#ifndef NDEBUG
+#ifdef GTEST_HAS_DEATH_TEST
+ EXPECT_DEATH(StackOffset(1, MVT::i1),
+ "Offset type is not a multiple of bytes");
+ EXPECT_DEATH(StackOffset(1, MVT::nxv1i1),
+ "Offset type is not a multiple of bytes");
+#endif // defined GTEST_HAS_DEATH_TEST
+#endif // not defined NDEBUG
}
TEST(StackOffset, getForFrameOffset) {
StackOffset A(1, MVT::i64);
StackOffset B(1, MVT::i32);
- int64_t ByteSized;
- (A + B).getForFrameOffset(ByteSized);
+ StackOffset C(1, MVT::nxv4i32);
+
+ // If all offsets can be materialized with only ADDVL,
+ // make sure PLSized is 0.
+ int64_t ByteSized, VLSized, PLSized;
+ (A + B + C).getForFrameOffset(ByteSized, PLSized, VLSized);
EXPECT_EQ(12, ByteSized);
+ EXPECT_EQ(1, VLSized);
+ EXPECT_EQ(0, PLSized);
+
+ // If we need an ADDPL to materialize the offset, and the number of scalable
+ // bytes fits the ADDPL immediate, fold the scalable bytes to fit in PLSized.
+ StackOffset D(1, MVT::nxv16i1);
+ (C + D).getForFrameOffset(ByteSized, PLSized, VLSized);
+ EXPECT_EQ(0, ByteSized);
+ EXPECT_EQ(0, VLSized);
+ EXPECT_EQ(9, PLSized);
+
+ StackOffset E(4, MVT::nxv4i32);
+ StackOffset F(1, MVT::nxv16i1);
+ (E + F).getForFrameOffset(ByteSized, PLSized, VLSized);
+ EXPECT_EQ(0, ByteSized);
+ EXPECT_EQ(0, VLSized);
+ EXPECT_EQ(33, PLSized);
+
+ // If the offset requires an ADDPL instruction to materialize, and would
+ // require more than two instructions, decompose it into both
+ // ADDVL (n x 16 bytes) and ADDPL (n x 2 bytes) instructions.
+ StackOffset G(8, MVT::nxv4i32);
+ StackOffset H(1, MVT::nxv16i1);
+ (G + H).getForFrameOffset(ByteSized, PLSized, VLSized);
+ EXPECT_EQ(0, ByteSized);
+ EXPECT_EQ(8, VLSized);
+ EXPECT_EQ(1, PLSized);
}
More information about the llvm-commits
mailing list