[llvm] 2e794a4 - [AArch64] Stack frame reordering.

Evgenii Stepanov via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 15 13:06:37 PDT 2020


Author: Evgenii Stepanov
Date: 2020-10-15T12:50:16-07:00
New Revision: 2e794a46b58c5878c5ab71c8517c5417f791860e

URL: https://github.com/llvm/llvm-project/commit/2e794a46b58c5878c5ab71c8517c5417f791860e
DIFF: https://github.com/llvm/llvm-project/commit/2e794a46b58c5878c5ab71c8517c5417f791860e.diff

LOG: [AArch64] Stack frame reordering.

Implement stack frame reordering in the AArch64 backend.

Unlike the X86 implementation, AArch64 does not seem to benefit from
"access density" based frame reordering, mainly because it has a much
smaller variety of addressing modes, and the fact that all instructions
are 4 bytes so each frame object is either in range of an instruction
(and then the access is "free") or not (and that has a code size cost
of 4 bytes).

This change improves Memory Tagging codegen by
* Placing an object that has been chosen as the base tagged pointer of
the function at SP + 0. This saves one instruction to setup the pointer
(IRG does not have an offset immediate), and more because that object
can now be referenced without materializing its tagged address in a
scratch register.
* Placing objects that go out of scope simultaneously together. This
exposes opportunities for instruction merging in tryMergeAdjacentSTG.

Differential Revision: https://reviews.llvm.org/D72366

Added: 
    llvm/test/CodeGen/AArch64/settag-merge-order.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/lib/Target/AArch64/AArch64FrameLowering.h
    llvm/test/CodeGen/AArch64/settag-merge.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d52267272092..49f84713f26f 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -176,6 +176,10 @@ static cl::opt<bool> StackTaggingMergeSetTag(
     cl::desc("merge settag instruction in function epilog"), cl::init(true),
     cl::Hidden);
 
+static cl::opt<bool> OrderFrameObjects("aarch64-order-frame-objects",
+                                       cl::desc("sort stack allocations"),
+                                       cl::init(true), cl::Hidden);
+
 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
 
 /// Returns the argument pop size.
@@ -3297,3 +3301,162 @@ unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
   return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
                  getStackAlign());
 }
+
+namespace {
+struct FrameObject {
+  bool IsValid = false;
+  // Index of the object in MFI.
+  int ObjectIndex = 0;
+  // Group ID this object belongs to.
+  int GroupIndex = -1;
+  // This object should be placed first (closest to SP).
+  bool ObjectFirst = false;
+  // This object's group (which always contains the object with
+  // ObjectFirst==true) should be placed first.
+  bool GroupFirst = false;
+};
+
+class GroupBuilder {
+  SmallVector<int, 8> CurrentMembers;
+  int NextGroupIndex = 0;
+  std::vector<FrameObject> &Objects;
+
+public:
+  GroupBuilder(std::vector<FrameObject> &Objects) : Objects(Objects) {}
+  void AddMember(int Index) { CurrentMembers.push_back(Index); }
+  void EndCurrentGroup() {
+    if (CurrentMembers.size() > 1) {
+      // Create a new group with the current member list. This might remove them
+      // from their pre-existing groups. That's OK, dealing with overlapping
+      // groups is too hard and unlikely to make a 
diff erence.
+      LLVM_DEBUG(dbgs() << "group:");
+      for (int Index : CurrentMembers) {
+        Objects[Index].GroupIndex = NextGroupIndex;
+        LLVM_DEBUG(dbgs() << " " << Index);
+      }
+      LLVM_DEBUG(dbgs() << "\n");
+      NextGroupIndex++;
+    }
+    CurrentMembers.clear();
+  }
+};
+
+bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
+  // Objects at a lower index are closer to FP; objects at a higher index are
+  // closer to SP.
+  //
+  // For consistency in our comparison, all invalid objects are placed
+  // at the end. This also allows us to stop walking when we hit the
+  // first invalid item after it's all sorted.
+  //
+  // The "first" object goes first (closest to SP), followed by the members of
+  // the "first" group.
+  //
+  // The rest are sorted by the group index to keep the groups together.
+  // Higher numbered groups are more likely to be around longer (i.e. untagged
+  // in the function epilogue and not at some earlier point). Place them closer
+  // to SP.
+  //
+  // If all else equal, sort by the object index to keep the objects in the
+  // original order.
+  return std::make_tuple(!A.IsValid, A.ObjectFirst, A.GroupFirst, A.GroupIndex,
+                         A.ObjectIndex) <
+         std::make_tuple(!B.IsValid, B.ObjectFirst, B.GroupFirst, B.GroupIndex,
+                         B.ObjectIndex);
+}
+} // namespace
+
+void AArch64FrameLowering::orderFrameObjects(
+    const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
+  if (!OrderFrameObjects || ObjectsToAllocate.empty())
+    return;
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  std::vector<FrameObject> FrameObjects(MFI.getObjectIndexEnd());
+  for (auto &Obj : ObjectsToAllocate) {
+    FrameObjects[Obj].IsValid = true;
+    FrameObjects[Obj].ObjectIndex = Obj;
+  }
+
+  // Identify stack slots that are tagged at the same time.
+  GroupBuilder GB(FrameObjects);
+  for (auto &MBB : MF) {
+    for (auto &MI : MBB) {
+      if (MI.isDebugInstr())
+        continue;
+      int OpIndex;
+      switch (MI.getOpcode()) {
+      case AArch64::STGloop:
+      case AArch64::STZGloop:
+        OpIndex = 3;
+        break;
+      case AArch64::STGOffset:
+      case AArch64::STZGOffset:
+      case AArch64::ST2GOffset:
+      case AArch64::STZ2GOffset:
+        OpIndex = 1;
+        break;
+      default:
+        OpIndex = -1;
+      }
+
+      int TaggedFI = -1;
+      if (OpIndex >= 0) {
+        const MachineOperand &MO = MI.getOperand(OpIndex);
+        if (MO.isFI()) {
+          int FI = MO.getIndex();
+          if (FI >= 0 && FI < MFI.getObjectIndexEnd() &&
+              FrameObjects[FI].IsValid)
+            TaggedFI = FI;
+        }
+      }
+
+      // If this is a stack tagging instruction for a slot that is not part of a
+      // group yet, either start a new group or add it to the current one.
+      if (TaggedFI >= 0)
+        GB.AddMember(TaggedFI);
+      else
+        GB.EndCurrentGroup();
+    }
+    // Groups should never span multiple basic blocks.
+    GB.EndCurrentGroup();
+  }
+
+  // If the function's tagged base pointer is pinned to a stack slot, we want to
+  // put that slot first when possible. This will likely place it at SP + 0,
+  // and save one instruction when generating the base pointer because IRG does
+  // not allow an immediate offset.
+  const AArch64FunctionInfo &AFI = *MF.getInfo<AArch64FunctionInfo>();
+  Optional<int> TBPI = AFI.getTaggedBasePointerIndex();
+  if (TBPI) {
+    FrameObjects[*TBPI].ObjectFirst = true;
+    FrameObjects[*TBPI].GroupFirst = true;
+    int FirstGroupIndex = FrameObjects[*TBPI].GroupIndex;
+    if (FirstGroupIndex >= 0)
+      for (FrameObject &Object : FrameObjects)
+        if (Object.GroupIndex == FirstGroupIndex)
+          Object.GroupFirst = true;
+  }
+
+  llvm::stable_sort(FrameObjects, FrameObjectCompare);
+
+  int i = 0;
+  for (auto &Obj : FrameObjects) {
+    // All invalid items are sorted at the end, so it's safe to stop.
+    if (!Obj.IsValid)
+      break;
+    ObjectsToAllocate[i++] = Obj.ObjectIndex;
+  }
+
+  LLVM_DEBUG(dbgs() << "Final frame order:\n"; for (auto &Obj
+                                                    : FrameObjects) {
+    if (!Obj.IsValid)
+      break;
+    dbgs() << "  " << Obj.ObjectIndex << ": group " << Obj.GroupIndex;
+    if (Obj.ObjectFirst)
+      dbgs() << ", first";
+    if (Obj.GroupFirst)
+      dbgs() << ", group-first";
+    dbgs() << "\n";
+  });
+}

diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 270353790dcf..ed77e3fb2989 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -118,6 +118,10 @@ class AArch64FrameLowering : public TargetFrameLowering {
     return StackId != TargetStackID::SVEVector;
   }
 
+  void
+  orderFrameObjects(const MachineFunction &MF,
+                    SmallVectorImpl<int> &ObjectsToAllocate) const override;
+
 private:
   bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
                                       uint64_t StackBumpBytes) const;

diff  --git a/llvm/test/CodeGen/AArch64/settag-merge-order.ll b/llvm/test/CodeGen/AArch64/settag-merge-order.ll
new file mode 100644
index 000000000000..9043b096f8ac
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/settag-merge-order.ll
@@ -0,0 +1,71 @@
+; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=1 | FileCheck %s
+
+declare void @use(i8* %p)
+declare void @llvm.aarch64.settag(i8* %p, i64 %a)
+declare void @llvm.aarch64.settag.zero(i8* %p, i64 %a)
+
+; Two loops of size 256; the second loop updates SP.
+; After frame reordering, two loops can be merged into one.
+define void @stg128_128_gap_128_128() {
+entry:
+; CHECK-LABEL: stg128_128_gap_128_128:
+; CHECK: mov     x8, #512
+; CHECK: st2g    sp, [sp], #32
+; CHECK: sub     x8, x8, #32
+; CHECK: cbnz    x8,
+; CHECK: ret
+  %a = alloca i8, i32 128, align 16
+  %a2 = alloca i8, i32 128, align 16
+  %b = alloca i8, i32 32, align 16
+  %c = alloca i8, i32 128, align 16
+  %c2 = alloca i8, i32 128, align 16
+  call void @use(i8* %b)
+  call void @llvm.aarch64.settag(i8* %a, i64 128)
+  call void @llvm.aarch64.settag(i8* %a2, i64 128)
+  call void @llvm.aarch64.settag(i8* %c, i64 128)
+  call void @llvm.aarch64.settag(i8* %c2, i64 128)
+  ret void
+}
+
+define void @stg2(i1 %flag) {
+entry:
+; CHECK-LABEL: stg2:
+  %a = alloca i8, i32 160, align 16
+  %a2 = alloca i8, i32 160, align 16
+  %b = alloca i8, i32 32, align 16
+  %c = alloca i8, i32 128, align 16
+  %c2 = alloca i8, i32 128, align 16
+  call void @use(i8* %b)
+  br i1 %flag, label %if.then, label %if.else
+
+if.then:
+; CHECK: mov     x8, #320
+; CHECK: st2g    x9, [x9], #32
+; CHECK: sub     x8, x8, #32
+; CHECK: cbnz    x8,
+  call void @llvm.aarch64.settag(i8* %a, i64 160)
+  call void @llvm.aarch64.settag(i8* %a2, i64 160)
+  br label %if.end
+
+if.else:
+; CHECK: mov     x8, #256
+; CHECK: st2g    x9, [x9], #32
+; CHECK: sub     x8, x8, #32
+; CHECK: cbnz    x8,
+  call void @llvm.aarch64.settag(i8* %c, i64 128)
+  call void @llvm.aarch64.settag(i8* %c2, i64 128)
+  br label %if.end
+
+if.end:
+; CHECK: mov     x8, #576
+; CHECK: st2g    sp, [sp], #32
+; CHECK: sub     x8, x8, #32
+; CHECK: cbnz    x8,
+  call void @llvm.aarch64.settag(i8* %a, i64 160)
+  call void @llvm.aarch64.settag(i8* %a2, i64 160)
+  call void @llvm.aarch64.settag(i8* %c, i64 128)
+  call void @llvm.aarch64.settag(i8* %c2, i64 128)
+
+; CHECK: ret
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AArch64/settag-merge.ll b/llvm/test/CodeGen/AArch64/settag-merge.ll
index 1bc93a82070f..9604331d4bc8 100644
--- a/llvm/test/CodeGen/AArch64/settag-merge.ll
+++ b/llvm/test/CodeGen/AArch64/settag-merge.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=0 | FileCheck %s
 
 declare void @use(i8* %p)
 declare void @llvm.aarch64.settag(i8* %p, i64 %a)


        


More information about the llvm-commits mailing list