[llvm] r374772 - [AArch64] Stackframe accesses to SVE objects.

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Mon Oct 14 06:11:35 PDT 2019


Author: s.desmalen
Date: Mon Oct 14 06:11:34 2019
New Revision: 374772

URL: http://llvm.org/viewvc/llvm-project?rev=374772&view=rev
Log:
[AArch64] Stackframe accesses to SVE objects.

Materialize accesses to SVE frame objects from SP or FP, whichever is
available and beneficial.

This patch still assumes the objects are pre-allocated. The automatic
layout of SVE objects within the stackframe will be added in a separate
patch.

Reviewers: greened, cameron.mcinally, efriedma, rengolin, thegameg, rovka

Reviewed By: cameron.mcinally

Differential Revision: https://reviews.llvm.org/D67749

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
    llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/trunk/test/CodeGen/AArch64/framelayout-sve.mir

Modified: llvm/trunk/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp?rev=374772&r1=374771&r2=374772&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp Mon Oct 14 06:11:34 2019
@@ -674,7 +674,7 @@ bool AArch64ExpandPseudo::expandMI(Machi
      int BaseOffset = -AFI->getTaggedBasePointerOffset();
      unsigned FrameReg;
      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
-         MF, BaseOffset, false /*isFixed*/, FrameReg,
+         MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
          /*PreferFP=*/false,
          /*ForSimm=*/true);
      Register SrcReg = FrameReg;

Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp?rev=374772&r1=374771&r2=374772&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.cpp Mon Oct 14 06:11:34 2019
@@ -1610,12 +1610,13 @@ StackOffset AArch64FrameLowering::resolv
   const auto &MFI = MF.getFrameInfo();
   int ObjectOffset = MFI.getObjectOffset(FI);
   bool isFixed = MFI.isFixedObjectIndex(FI);
-  return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
+  bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector;
+  return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
                                      PreferFP, ForSimm);
 }
 
 StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
-    const MachineFunction &MF, int ObjectOffset, bool isFixed,
+    const MachineFunction &MF, int ObjectOffset, bool isFixed, bool isSVE,
     unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
   const auto &MFI = MF.getFrameInfo();
   const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
@@ -1629,16 +1630,17 @@ StackOffset AArch64FrameLowering::resolv
       !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
 
   const StackOffset &SVEStackSize = getSVEStackSize(MF);
-  if (SVEStackSize)
-    llvm_unreachable("Accessing frame indices in presence of SVE "
-                     "not yet supported");
 
   // Use frame pointer to reference fixed objects. Use it for locals if
   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
   // reliable as a base). Make sure useFPForScavengingIndex() does the
   // right thing for the emergency spill slot.
   bool UseFP = false;
-  if (AFI->hasStackFrame()) {
+  if (AFI->hasStackFrame() && !isSVE) {
+    // We shouldn't prefer using the FP when there is an SVE area
+    // in between the FP and the non-SVE locals/spills.
+    PreferFP &= !SVEStackSize;
+
     // Note: Keeping the following as multiple 'if' statements rather than
     // merging to a single expression for readability.
     //
@@ -1666,8 +1668,10 @@ StackOffset AArch64FrameLowering::resolv
         bool CanUseBP = RegInfo->hasBasePointer(MF);
         if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
           UseFP = PreferFP;
-        else if (!CanUseBP) // Can't use BP. Forced to use FP.
+        else if (!CanUseBP) { // Can't use BP. Forced to use FP.
+          assert(!SVEStackSize && "Expected BP to be available");
           UseFP = true;
+        }
         // else we can use BP and FP, but the offset from FP won't fit.
         // That will make us scavenge registers which we can probably avoid by
         // using BP. If it won't fit for BP either, we'll scavenge anyway.
@@ -1697,9 +1701,36 @@ StackOffset AArch64FrameLowering::resolv
          "In the presence of dynamic stack pointer realignment, "
          "non-argument/CSR objects cannot be accessed through the frame pointer");
 
+  if (isSVE) {
+    int64_t OffsetToSVEArea =
+        MFI.getStackSize() - AFI->getCalleeSavedStackSize();
+    StackOffset FPOffset = {ObjectOffset, MVT::nxv1i8};
+    StackOffset SPOffset = SVEStackSize +
+                           StackOffset(ObjectOffset, MVT::nxv1i8) +
+                           StackOffset(OffsetToSVEArea, MVT::i8);
+    // Always use the FP for SVE spills if available and beneficial.
+    if (hasFP(MF) &&
+        (SPOffset.getBytes() ||
+         FPOffset.getScalableBytes() < SPOffset.getScalableBytes() ||
+         RegInfo->needsStackRealignment(MF))) {
+      FrameReg = RegInfo->getFrameRegister(MF);
+      return FPOffset;
+    }
+
+    FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
+                                           : (unsigned)AArch64::SP;
+    return SPOffset;
+  }
+
+  StackOffset ScalableOffset = {};
+  if (UseFP && !(isFixed || isCSR))
+    ScalableOffset = -SVEStackSize;
+  if (!UseFP && (isFixed || isCSR))
+    ScalableOffset = SVEStackSize;
+
   if (UseFP) {
     FrameReg = RegInfo->getFrameRegister(MF);
-    return StackOffset(FPOffset, MVT::i8);
+    return StackOffset(FPOffset, MVT::i8) + ScalableOffset;
   }
 
   // Use the base pointer if we have one.
@@ -1716,7 +1747,7 @@ StackOffset AArch64FrameLowering::resolv
       Offset -= AFI->getLocalStackSize();
   }
 
-  return StackOffset(Offset, MVT::i8);
+  return StackOffset(Offset, MVT::i8) + ScalableOffset;
 }
 
 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
@@ -2213,24 +2244,20 @@ void AArch64FrameLowering::determineCall
              << ' ' << printReg(Reg, RegInfo);
              dbgs() << "\n";);
 
-  bool HasSVEStackObjects = [&MFI]() {
-    for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
-      if (MFI.getStackID(I) == TargetStackID::SVEVector &&
-          MFI.getObjectOffset(I) < 0)
-        return true;
-    // Note: We don't take allocatable stack objects into
-    // account yet, because allocation for those is not yet
-    // implemented.
-    return false;
-  }();
-
   // If any callee-saved registers are used, the frame cannot be eliminated.
-  bool CanEliminateFrame = (SavedRegs.count() == 0) && !HasSVEStackObjects;
+  unsigned MaxAlign = getStackAlignment();
+  int64_t SVEStackSize =
+      alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign);
+  assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
+  bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
 
   // The CSR spill slots have not been allocated yet, so estimateStackSize
   // won't include them.
   unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
-  bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
+
+  // Conservatively always assume BigStack when there are SVE spills.
+  bool BigStack = SVEStackSize ||
+                  (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
     AFI->setHasStackFrame(true);
 
@@ -2286,6 +2313,23 @@ bool AArch64FrameLowering::enableStackSl
   return AFI->hasCalleeSaveStackFreeSpace();
 }
 
+int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
+                                                    unsigned &MaxAlign) const {
+  // Process all fixed stack objects.
+  int64_t Offset = 0;
+  for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
+    if (MFI.getStackID(I) == TargetStackID::SVEVector) {
+      int64_t FixedOffset = -MFI.getObjectOffset(I);
+      if (FixedOffset > Offset)
+        Offset = FixedOffset;
+    }
+
+  // Note: We don't take allocatable stack objects into
+  // account yet, because allocation for those is not yet
+  // implemented.
+  return Offset;
+}
+
 void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
     MachineFunction &MF, RegScavenger *RS) const {
   MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -2293,22 +2337,11 @@ void AArch64FrameLowering::processFuncti
   assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
          "Upwards growing stack unsupported");
 
-  // Process all fixed stack SVE objects.
-  int64_t Offset = 0;
-  for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
-    unsigned StackID = MFI.getStackID(I);
-    if (StackID == TargetStackID::SVEVector) {
-      int64_t FixedOffset = -MFI.getObjectOffset(I);
-      if (FixedOffset > Offset)
-        Offset = FixedOffset;
-    }
-  }
-
   unsigned MaxAlign = getStackAlignment();
-  uint64_t SVEStackSize = alignTo(Offset, MaxAlign);
+  int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign);
 
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  AFI->setStackSizeSVE(SVEStackSize);
+  AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign));
   assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
 
   // If this function isn't doing Win64-style C++ EH, we don't need to do

Modified: llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h?rev=374772&r1=374771&r2=374772&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FrameLowering.h Mon Oct 14 06:11:34 2019
@@ -45,8 +45,8 @@ public:
                                          bool ForSimm) const;
   StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
                                           int ObjectOffset, bool isFixed,
-                                          unsigned &FrameReg, bool PreferFP,
-                                          bool ForSimm) const;
+                                          bool isSVE, unsigned &FrameReg,
+                                          bool PreferFP, bool ForSimm) const;
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MI,
                                  const std::vector<CalleeSavedInfo> &CSI,
@@ -101,6 +101,7 @@ public:
 private:
   bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
                                       unsigned StackBumpBytes) const;
+  int64_t determineSVEStackSize(MachineFrameInfo &MF, unsigned &MaxAlign) const;
 };
 
 } // End llvm namespace

Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=374772&r1=374771&r2=374772&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Mon Oct 14 06:11:34 2019
@@ -2198,6 +2198,18 @@ bool AArch64InstrInfo::getMemOpInfo(unsi
     MinOffset = -256;
     MaxOffset = 255;
     break;
+  case AArch64::LDR_PXI:
+  case AArch64::STR_PXI:
+    Scale = Width = 2;
+    MinOffset = -256;
+    MaxOffset = 255;
+    break;
+  case AArch64::LDR_ZXI:
+  case AArch64::STR_ZXI:
+    Scale = Width = 16;
+    MinOffset = -256;
+    MaxOffset = 255;
+    break;
   case AArch64::ST2GOffset:
   case AArch64::STZ2GOffset:
     Scale = 16;
@@ -3340,6 +3352,18 @@ MachineInstr *AArch64InstrInfo::foldMemo
   return nullptr;
 }
 
+static bool isSVEScaledImmInstruction(unsigned Opcode) {
+  switch (Opcode) {
+  case AArch64::LDR_ZXI:
+  case AArch64::STR_ZXI:
+  case AArch64::LDR_PXI:
+  case AArch64::STR_PXI:
+    return true;
+  default:
+    return false;
+  }
+}
+
 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
                                     StackOffset &SOffset,
                                     bool *OutUseUnscaledOp,
@@ -3383,9 +3407,13 @@ int llvm::isAArch64FrameOffsetLegal(cons
     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
 
   // Construct the complete offset.
+  bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode());
+  int64_t Offset =
+      IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes());
+
   const MachineOperand &ImmOpnd =
       MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
-  int Offset = SOffset.getBytes() + ImmOpnd.getImm() * Scale;
+  Offset += ImmOpnd.getImm() * Scale;
 
   // If the offset doesn't match the scale, we rewrite the instruction to
   // use the unscaled instruction instead. Likewise, if we have a negative
@@ -3417,9 +3445,14 @@ int llvm::isAArch64FrameOffsetLegal(cons
   if (OutUnscaledOp && UnscaledOp)
     *OutUnscaledOp = *UnscaledOp;
 
-  SOffset = StackOffset(Offset, MVT::i8);
+  if (IsMulVL)
+    SOffset = StackOffset(Offset, MVT::nxv1i8) +
+              StackOffset(SOffset.getBytes(), MVT::i8);
+  else
+    SOffset = StackOffset(Offset, MVT::i8) +
+              StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
   return AArch64FrameOffsetCanUpdate |
-         (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
+         (SOffset ? 0 : AArch64FrameOffsetIsLegal);
 }
 
 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,

Modified: llvm/trunk/test/CodeGen/AArch64/framelayout-sve.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/framelayout-sve.mir?rev=374772&r1=374771&r2=374772&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/framelayout-sve.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/framelayout-sve.mir Mon Oct 14 06:11:34 2019
@@ -26,9 +26,15 @@
   define void @test_allocate_sve() nounwind { entry: unreachable }
   define void @test_allocate_sve_gpr_callee_saves() nounwind { entry: unreachable }
   define void @test_allocate_sve_gpr_realigned() nounwind { entry: unreachable }
+  define void @test_address_sve() nounwind { entry: unreachable }
+  define void @test_address_sve_fp() nounwind { entry: unreachable }
+  define void @test_stack_arg_sve() nounwind { entry: unreachable }
+  define void @test_address_sve_out_of_range() nounwind { entry: unreachable }
 
 ...
 # +----------+
+# |scratchreg|  // x29 is used as scratch reg.
+# +----------+
 # | %fixed-  |  // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
 # |  stack.0 |  // to be materialized with 2*ADDVL (<=> 2 * n * 16bytes)
 # +----------+
@@ -36,14 +42,16 @@
 # +----------+ <- SP
 
 # CHECK-LABEL: name: test_allocate_sve
-# CHECK:       stackSize: 16
+# CHECK:       stackSize: 32
 
 # CHECK:      bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
 
 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
 # CHECK-NEXT: RET_ReallyLR
 name:            test_allocate_sve
 fixedStack:
@@ -57,6 +65,7 @@ body:             |
 ...
 # +----------+
 # | x20, x21 |  // callee saves
+# |scratchreg|  // x29 is used as scratch reg.
 # +----------+
 # | %fixed-  |  // scalable objects
 # |  stack.0 |
@@ -65,17 +74,19 @@ body:             |
 # +----------+ <- SP
 
 # CHECK-LABEL: name: test_allocate_sve_gpr_callee_saves
-# CHECK:       stackSize: 32
+# CHECK:       stackSize: 48
 
 # CHECK:      bb.0.entry:
-# CHECK-NEXT: $sp = frame-setup STPXpre killed $x21, killed $x20, $sp, -2
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -32
+# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x20, $sp, 2
 # CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
 # CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
 # CHECK-NEXT: $x20 = IMPLICIT_DEF
 # CHECK-NEXT: $x21 = IMPLICIT_DEF
 # CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
 # CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
-# CHECK-NEXT: $sp, $x21, $x20 = frame-destroy LDPXpost $sp, 2
+# CHECK-NEXT: $x21, $x20 = frame-destroy LDPXi $sp, 2
+# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 32
 # CHECK-NEXT: RET_ReallyLR
 name:            test_allocate_sve_gpr_callee_saves
 fixedStack:
@@ -119,3 +130,201 @@ body:             |
   bb.0.entry:
     RET_ReallyLR
 ---
+...
+# +----------+
+# | x20, x21 |  // callee saves
+# +----------+
+# | %stack.0 |  // scalable @ SP + 16b + 32 scalable bytes
+# | %stack.1 |  // scalable @ SP + 16b + 16 scalable bytes
+# | %stack.2 |  // scalable @ SP + 16b + 14 scalable bytes
+# +----------+
+# | %stack.0 |  // not scalable
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_address_sve
+# CHECK:       stackSize: 32
+
+# CHECK:      bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
+# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 2
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
+# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], 1
+# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
+# CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 7
+
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
+# CHECK-NEXT: RET_ReallyLR
+name:            test_address_sve
+frameInfo:
+  maxAlignment:  16
+fixedStack:
+  - { id: 0, stack-id: sve-vec, size: 16, alignment: 8, offset: -16 }
+  - { id: 1, stack-id: sve-vec, size: 16, alignment: 8, offset: -32 }
+  - { id: 2, stack-id: sve-vec, size:  2, alignment: 2, offset: -34 }
+stack:
+  - { id: 0, stack-id: default, size: 16, alignment: 8 }
+body:             |
+  bb.0.entry:
+    liveins: $z0, $z1, $p0
+
+    STR_ZXI $z0, %fixed-stack.0, 0
+    STR_ZXI $z1, %fixed-stack.1, 0
+    STR_PXI $p0, %fixed-stack.2, 0
+
+    RET_ReallyLR
+---
+...
+# +-----------+
+# | x20, x21  |  // callee saves
+# |  lr, fp   |  // frame record
+# +-----------+ <- FP
+# | %fstack.0 |  // scalable @ FP - 16 scalable bytes
+# | %fstack.1 |  // scalable @ FP - 32 scalable bytes
+# | %fstack.2 |  // scalable @ FP - 34 scalable bytes
+# +-----------+
+# | %stack.0  |  // not scalable
+# +-----------+ <- SP
+
+# CHECK-LABEL: name: test_address_sve_fp
+# CHECK:       stackSize: 32
+
+# CHECK:      bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2
+# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+
+# CHECK-NEXT: STR_ZXI $z0, $fp, -1
+# CHECK-NEXT: STR_ZXI $z1, $fp, -2
+# CHECK-NEXT: STR_PXI $p0, $fp, -17
+
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
+# CHECK:      $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
+# CHECK-NEXT: RET_ReallyLR
+name:            test_address_sve_fp
+frameInfo:
+  maxAlignment:  16
+  isFrameAddressTaken: true
+fixedStack:
+  - { id: 0, stack-id: sve-vec, size: 16, alignment: 8, offset: -16 }
+  - { id: 1, stack-id: sve-vec, size: 16, alignment: 8, offset: -32 }
+  - { id: 2, stack-id: sve-vec, size:  2, alignment: 2, offset: -34 }
+stack:
+  - { id: 0, stack-id: default, size: 16, alignment: 8 }
+body:             |
+  bb.0.entry:
+    liveins: $z0, $z1, $p0
+
+    STR_ZXI $z0, %fixed-stack.0, 0
+    STR_ZXI $z1, %fixed-stack.1, 0
+    STR_PXI $p0, %fixed-stack.2, 0
+
+    RET_ReallyLR
+---
+...
+# +-----------+
+# | %fstack.1 |  // stack arg @ SP + 16 scalable bytes + 32 bytes.
+# +-----------+
+# |callee save|  // register saved as scratch reg.
+# +-----------+
+# | %fstack.1 |  // vector of 16 scalable bytes
+# +---------- +
+# | %stack.0  |  // not scalable, 16 bytes
+# +-----------+ <- SP
+# CHECK-LABEL: name: test_stack_arg_sve
+# CHECK:       stackSize: 32
+
+# CHECK:      bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
+# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
+
+# CHECK:      $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
+# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
+
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1
+# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
+# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
+# CHECK-NEXT: RET_ReallyLR
+name:             test_stack_arg_sve
+fixedStack:
+  - { id: 0, stack-id: default, size: 16, alignment: 16, offset: 0 }
+  - { id: 1, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
+stack:
+  - { id: 0, stack-id: default, size: 16, alignment: 16 }
+body:             |
+  bb.0.entry:
+    liveins: $x0
+
+    $x0 = LDRXui %fixed-stack.0, 0
+    RET_ReallyLR
+---
+...
+# Test that the address to access an SVE data vector at an offset that
+# does not fit its immediate, is correctly materialized.
+# +----------+
+# |calleesave|  // register saved as scratch reg.
+# +----------+
+# | %stack.0 |  // one SVE data object @ SP + 256 scalable bytes.
+# |::::::::::|
+# |:        :|
+# |:%stack.1:|  // Large object
+# |:        :|
+# |::::::::::|
+# +----------+ <- SP
+# CHECK-LABEL: name: test_address_sve_out_of_range
+# CHECK:       stackSize: 16
+
+# CHECK:      bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
+
+# CHECK-NEXT: $[[TMP2:x[0-9]+]] = ADDVL_XXI $sp, 1
+# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP2]], 255
+
+# CHECK-NEXT: $[[TMP2:x[0-9]+]] = ADDPL_XXI $sp, 1
+# CHECK-NEXT: STR_PXI $p0, killed $[[TMP2]], 255
+
+# CHECK:      $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
+# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 9
+# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
+# CHECK-NEXT: RET_ReallyLR
+name:            test_address_sve_out_of_range
+frameInfo:
+  maxAlignment:  16
+fixedStack:
+  - { id: 0, stack-id: sve-vec, size:   16, alignment: 16, offset: -16 }
+  - { id: 1, stack-id: sve-vec, size: 3584, alignment: 16, offset: -3600 }
+  - { id: 2, stack-id: sve-vec, size:  512, alignment: 16, offset: -4112 }
+
+body:             |
+  bb.0.entry:
+    liveins: $z0, $p0
+
+    STR_ZXI $z0, %fixed-stack.0, 0
+    STR_PXI $p0, %fixed-stack.1, 0
+
+    RET_ReallyLR
+---




More information about the llvm-commits mailing list