[llvm] 9a1c243 - [AArch64][SVE] Allocate locals that are scalable vectors.

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 13 01:47:59 PST 2019


Author: Sander de Smalen
Date: 2019-11-13T09:45:24Z
New Revision: 9a1c243aa5ded10f7b39887b2be073d0bcfbf5c9

URL: https://github.com/llvm/llvm-project/commit/9a1c243aa5ded10f7b39887b2be073d0bcfbf5c9
DIFF: https://github.com/llvm/llvm-project/commit/9a1c243aa5ded10f7b39887b2be073d0bcfbf5c9.diff

LOG: [AArch64][SVE] Allocate locals that are scalable vectors.

This patch adds a target interface to set the StackID for a given type,
which allows scalable vectors (e.g. `<vscale x 16 x i8>`) to be assigned a
'sve-vec' StackID, so it is allocated in the SVE area of the stack frame.

Reviewers: ostannard, efriedma, rengolin, cameron.mcinally

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D70080

Added: 
    llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll

Modified: 
    llvm/include/llvm/CodeGen/TargetFrameLowering.h
    llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/lib/Target/AArch64/AArch64FrameLowering.h
    llvm/test/CodeGen/AArch64/framelayout-sve.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index 2100a64c8f54..c7d4c4d7e5d4 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -363,6 +363,11 @@ class TargetFrameLowering {
     return true;
   }
 
+  /// Returns the StackID that scalable vectors should be associated with.
+  virtual TargetStackID::Value getStackIDForScalableVectors() const {
+    return TargetStackID::Default;
+  }
+
   virtual bool isSupportedStackID(TargetStackID::Value ID) const {
     switch (ID) {
     default:

diff  --git a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index cf6711adad48..fa33400cd4b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -144,7 +144,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
         if (AI->isStaticAlloca() &&
             (TFI->isStackRealignable() || (Align <= StackAlign))) {
           const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
-          uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
+          uint64_t TySize =
+              MF->getDataLayout().getTypeAllocSize(Ty).getKnownMinSize();
 
           TySize *= CUI->getZExtValue();   // Get total allocated size.
           if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
@@ -159,6 +160,12 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                 MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI);
           }
 
+          // Scalable vectors may need a special StackID to distinguish
+          // them from other (fixed size) stack objects.
+          if (Ty->isVectorTy() && Ty->getVectorIsScalable())
+            MF->getFrameInfo().setStackID(FrameIndex,
+                                          TFI->getStackIDForScalableVectors());
+
           StaticAllocaMap[AI] = FrameIndex;
           // Update the catch handler information.
           if (Iter != CatchObjects.end()) {

diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 169c35c49607..970d7802b1d1 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -206,6 +206,11 @@ static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
   return DefaultSafeSPDisplacement;
 }
 
+TargetStackID::Value
+AArch64FrameLowering::getStackIDForScalableVectors() const {
+  return TargetStackID::SVEVector;
+}
+
 /// Returns the size of the entire SVE stackframe (calleesaves + spills).
 static StackOffset getSVEStackSize(const MachineFunction &MF) {
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -2488,11 +2493,12 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
 /// returns true if there are any SVE callee saves.
 static bool getSVECalleeSaveSlotRange(const MachineFrameInfo &MFI,
                                       int &Min, int &Max) {
+  Min = std::numeric_limits<int>::max();
+  Max = std::numeric_limits<int>::min();
+
   if (!MFI.isCalleeSavedInfoValid())
     return false;
 
-  Min = std::numeric_limits<int>::max();
-  Max = std::numeric_limits<int>::min();
   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   for (auto &CS : CSI) {
     if (AArch64::ZPRRegClass.contains(CS.getReg()) ||
@@ -2526,6 +2532,11 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
         Offset = FixedOffset;
     }
 
+  auto Assign = [&MFI](int FI, int64_t Offset) {
+    LLVM_DEBUG(dbgs() << "alloc FI(" << FI << ") at SP[" << Offset << "]\n");
+    MFI.setObjectOffset(FI, Offset);
+  };
+
   // Then process all callee saved slots.
   if (getSVECalleeSaveSlotRange(MFI, MinCSFrameIndex, MaxCSFrameIndex)) {
     // Make sure to align the last callee save slot.
@@ -2535,17 +2546,40 @@ static int64_t determineSVEStackObjectOffsets(MachineFrameInfo &MFI,
     for (int I = MinCSFrameIndex; I <= MaxCSFrameIndex; ++I) {
       Offset += MFI.getObjectSize(I);
       Offset = alignTo(Offset, MFI.getObjectAlignment(I));
-      if (AssignOffsets) {
-        LLVM_DEBUG(dbgs() << "alloc FI(" << I << ") at SP[" << Offset
-                          << "]\n");
-        MFI.setObjectOffset(I, -Offset);
-      }
+      if (AssignOffsets)
+        Assign(I, -Offset);
     }
   }
 
-  // Note: We don't take allocatable stack objects into
-  // account yet, because allocation for those is not yet
-  // implemented.
+  // Create a buffer of SVE objects to allocate and sort it.
+  SmallVector<int, 8> ObjectsToAllocate;
+  for (int I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I) {
+    unsigned StackID = MFI.getStackID(I);
+    if (StackID != TargetStackID::SVEVector)
+      continue;
+    if (MaxCSFrameIndex >= I && I >= MinCSFrameIndex)
+      continue;
+    if (MFI.isDeadObjectIndex(I))
+      continue;
+
+    ObjectsToAllocate.push_back(I);
+  }
+
+  // Allocate all SVE locals and spills
+  for (unsigned FI : ObjectsToAllocate) {
+    unsigned Align = MFI.getObjectAlignment(FI);
+    // FIXME: Given that the length of SVE vectors is not necessarily a power of
+    // two, we'd need to align every object dynamically at runtime if the
+    // alignment is larger than 16. This is not yet supported.
+    if (Align > 16)
+      report_fatal_error(
+          "Alignment of scalable vectors > 16 bytes is not yet supported");
+
+    Offset = alignTo(Offset + MFI.getObjectSize(FI), Align);
+    if (AssignOffsets)
+      Assign(FI, -Offset);
+  }
+
   return Offset;
 }
 

diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index f84847def34d..3ed849993684 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -72,6 +72,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
   }
 
   bool enableStackSlotScavenging(const MachineFunction &MF) const override;
+  TargetStackID::Value getStackIDForScalableVectors() const override;
 
   void processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                              RegScavenger *RS) const override;

diff  --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 18d6796b172c..75013bc475a1 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -34,6 +34,7 @@
   define aarch64_sve_vector_pcs void @save_restore_zregs_sve() nounwind { entry: unreachable }
   define aarch64_sve_vector_pcs void @save_restore_sve() nounwind { entry: unreachable }
   define aarch64_sve_vector_pcs void @save_restore_sve_realign() nounwind { entry: unreachable }
+  define aarch64_sve_vector_pcs void @frame_layout() nounwind { entry: unreachable }
 
 ...
 # +----------+
@@ -512,3 +513,69 @@ body:             |
 
     RET_ReallyLR
 ---
+# Frame layout should be:
+# +---------------------+ <- Old SP
+# | callee save z8      |@ -16
+# | callee save z23     |@ -32
+# | callee save p4      |@ -34
+# | callee save p15     |@ -48
+# | id #0 (size 32)     |@ -80
+# | id #1 (size 4)      |@ -84
+# | id #2 (size 16)     |@ -112
+# | id #3 (size 2)      |@ -114
+# | id #4 (size 16)     |@ -144
+# | id #5 (size 2)      |@ -146
+# +- - - - - - - - - - -+ <- New SP @-160
+# CHECK-LABEL: name: frame_layout
+# CHECK:       stack:
+# CHECK:        - { id: 0, name: '', type: default, offset: -80, size: 32, alignment: 16,
+# CHECK-NEXT:       stack-id: sve-vec,
+# CHECK:        - { id: 1, name: '', type: default, offset: -84, size: 4, alignment: 2,
+# CHECK-NEXT:       stack-id: sve-vec,
+# CHECK:        - { id: 2, name: '', type: default, offset: -112, size: 16, alignment: 16,
+# CHECK-NEXT:       stack-id: sve-vec,
+# CHECK:        - { id: 3, name: '', type: default, offset: -114, size: 2, alignment: 2,
+# CHECK-NEXT:       stack-id: sve-vec,
+# CHECK:        - { id: 4, name: '', type: spill-slot, offset: -144, size: 16, alignment: 16,
+# CHECK-NEXT:       stack-id: sve-vec,
+# CHECK:        - { id: 5, name: '', type: spill-slot, offset: -146, size: 2, alignment: 2,
+# CHECK-NEXT:       stack-id: sve-vec,
+# CHECK:        - { id: 6, name: '', type: spill-slot, offset: -16, size: 16, alignment: 16,
+# CHECK-NEXT:       stack-id: sve-vec, callee-saved-register: '$z8',
+# CHECK:        - { id: 7, name: '', type: spill-slot, offset: -32, size: 16, alignment: 16,
+# CHECK-NEXT:       stack-id: sve-vec, callee-saved-register: '$z23',
+# CHECK:        - { id: 8, name: '', type: spill-slot, offset: -34, size: 2, alignment: 2,
+# CHECK-NEXT:       stack-id: sve-vec, callee-saved-register: '$p4',
+# CHECK:        - { id: 9, name: '', type: spill-slot, offset: -48, size: 2, alignment: 16,
+# CHECK-NEXT:       stack-id: sve-vec, callee-saved-register: '$p15',
+# CHECK:        - { id: 10, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16,
+# CHECK-NEXT:       stack-id: default, callee-saved-register: '$fp',
+#
+# CHECK:      bb.0.entry:
+# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
+# CHECK-NEXT: STR_PXI killed $p15, $sp, 6
+# CHECK-NEXT: STR_PXI killed $p4, $sp, 7
+# CHECK-NEXT: STR_ZXI killed $z23, $sp, 1
+# CHECK-NEXT: STR_ZXI killed $z8, $sp, 2
+# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -7
+name: frame_layout
+stack:
+  - { id: 0, type: default,    size:  32, alignment: 16, stack-id: sve-vec }
+  - { id: 1, type: default,    size:   4, alignment:  2, stack-id: sve-vec }
+  - { id: 2, type: default,    size:  16, alignment: 16, stack-id: sve-vec }
+  - { id: 3, type: default,    size:   2, alignment:  2, stack-id: sve-vec }
+  - { id: 4, type: spill-slot, size:  16, alignment: 16, stack-id: sve-vec }
+  - { id: 5, type: spill-slot, size:   2, alignment:  2, stack-id: sve-vec }
+body:             |
+  bb.0.entry:
+
+    ; Trigger some callee saves
+    $z8  = IMPLICIT_DEF
+    $z23 = IMPLICIT_DEF
+    $p4  = IMPLICIT_DEF
+    $p15 = IMPLICIT_DEF
+
+    RET_ReallyLR
+
+---

diff  --git a/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll b/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll
new file mode 100644
index 000000000000..4aae5bef22e2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s --check-prefix=CHECKCG
+; RUN: llc -mtriple=aarch64 -mattr=+sve -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECKISEL
+
+; CHECKCG-LABEL: foo:
+; CHECKCG: addvl   sp, sp, #-1
+
+; CHECKISEL-LABEL: name: foo
+; CHECKISEL:       stack:
+; CHECKISEL:       id: 0, name: ptr, type: default, offset: 0, size: 16, alignment: 16,
+; CHECKISEL-NEXT:  stack-id: sve-vec
+define i32 @foo(<vscale x 16 x i8> %val) {
+  %ptr = alloca <vscale x 16 x i8>
+  %res = call i32 @bar(<vscale x 16 x i8>* %ptr)
+  ret i32 %res
+}
+
+declare i32 @bar(<vscale x 16 x i8>* %ptr);


        


More information about the llvm-commits mailing list