[llvm] [MachineOutliner] Consider Leaf Descendants as Outlining Candidates (PR #88996)

Xuan Zhang via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 25 13:27:39 PDT 2024


https://github.com/xuanzh-meta updated https://github.com/llvm/llvm-project/pull/88996

>From 6aa6f73279e15eec8db050d4f05c4a26960d166a Mon Sep 17 00:00:00 2001
From: Xuan Zhang <xuanzh at meta.com>
Date: Fri, 12 Apr 2024 09:59:15 -0700
Subject: [PATCH 1/6] efficient implementation of
 MachineOutliner::findCandidates()

---
 llvm/lib/CodeGen/MachineOutliner.cpp          | 11 ++---
 llvm/lib/Support/SuffixTree.cpp               |  5 +++
 .../Analysis/IRSimilarityIdentifier/basic.ll  | 26 ++++++------
 .../IRSimilarityIdentifier/different.ll       |  6 +--
 .../IROutliner/outlining-commutative.ll       | 20 +++++-----
 llvm/test/tools/llvm-sim/single-sim-file.test | 40 +++++++++----------
 llvm/test/tools/llvm-sim/single-sim.test      | 40 +++++++++----------
 7 files changed, 75 insertions(+), 73 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index dc2f5ef15206e8..e682d42c76747e 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -616,17 +616,14 @@ void MachineOutliner::findCandidates(
       // * End before the other starts
       // * Start after the other ends
       unsigned EndIdx = StartIdx + StringLen - 1;
-      auto FirstOverlap = find_if(
-          CandidatesForRepeatedSeq, [StartIdx, EndIdx](const Candidate &C) {
-            return EndIdx >= C.getStartIdx() && StartIdx <= C.getEndIdx();
-          });
-      if (FirstOverlap != CandidatesForRepeatedSeq.end()) {
+      if (CandidatesForRepeatedSeq.size() > 0 &&
+          StartIdx <= CandidatesForRepeatedSeq.back().getEndIdx()) {
 #ifndef NDEBUG
         ++NumDiscarded;
         LLVM_DEBUG(dbgs() << "    .. DISCARD candidate @ [" << StartIdx
                           << ", " << EndIdx << "]; overlaps with candidate @ ["
-                          << FirstOverlap->getStartIdx() << ", "
-                          << FirstOverlap->getEndIdx() << "]\n");
+                          << CandidatesForRepeatedSeq.back().getStartIdx() << ", "
+                          << CandidatesForRepeatedSeq.back().getEndIdx() << "]\n");
 #endif
         continue;
       }
diff --git a/llvm/lib/Support/SuffixTree.cpp b/llvm/lib/Support/SuffixTree.cpp
index eaa653078e0900..03ed1d02840aa1 100644
--- a/llvm/lib/Support/SuffixTree.cpp
+++ b/llvm/lib/Support/SuffixTree.cpp
@@ -274,6 +274,11 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
     RS.Length = Length;
     for (unsigned StartIdx : RepeatedSubstringStarts)
       RS.StartIndices.push_back(StartIdx);
+
+    // Sort the start indices so that we can efficiently check if candidates
+    // overlap with each other in MachineOutliner::findCandidates().
+    llvm::sort(RS.StartIndices);
+
     break;
   }
   // At this point, either NewRS is an empty RepeatedSubstring, or it was
diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll b/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll
index 1c08cb407c2e3c..b38e7d19973db6 100644
--- a/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll
+++ b/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll
@@ -4,7 +4,7 @@
 ; This is a simple test to make sure the IRSimilarityIdentifier and
 ; IRSimilarityPrinterPass is working.
 
-; CHECK: 4 candidates of length 6.  Found in: 
+; CHECK: 4 candidates of length 6.  Found in:
 ; CHECK-NEXT:  Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:    Start Instruction:   store i32 1, ptr %1, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 6, ptr %6, align 4
@@ -17,7 +17,7 @@
 ; CHECK-NEXT:  Function: dog, Basic Block: entry
 ; CHECK-NEXT:    Start Instruction:   store i32 6, ptr %0, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 5, ptr %5, align 4
-; CHECK-NEXT:4 candidates of length 5.  Found in: 
+; CHECK-NEXT:4 candidates of length 5.  Found in:
 ; CHECK-NEXT:  Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:    Start Instruction:   store i32 2, ptr %2, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 6, ptr %6, align 4
@@ -30,7 +30,7 @@
 ; CHECK-NEXT:  Function: dog, Basic Block: entry
 ; CHECK-NEXT:    Start Instruction:   store i32 1, ptr %1, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 5, ptr %5, align 4
-; CHECK-NEXT:4 candidates of length 4.  Found in: 
+; CHECK-NEXT:4 candidates of length 4.  Found in:
 ; CHECK-NEXT:  Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:    Start Instruction:   store i32 3, ptr %3, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 6, ptr %6, align 4
@@ -43,7 +43,7 @@
 ; CHECK-NEXT:  Function: dog, Basic Block: entry
 ; CHECK-NEXT:    Start Instruction:   store i32 2, ptr %2, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 5, ptr %5, align 4
-; CHECK-NEXT:4 candidates of length 3.  Found in: 
+; CHECK-NEXT:4 candidates of length 3.  Found in:
 ; CHECK-NEXT:  Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:    Start Instruction:   store i32 4, ptr %4, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 6, ptr %6, align 4
@@ -56,7 +56,7 @@
 ; CHECK-NEXT:  Function: dog, Basic Block: entry
 ; CHECK-NEXT:    Start Instruction:   store i32 3, ptr %3, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 5, ptr %5, align 4
-; CHECK-NEXT:4 candidates of length 2.  Found in: 
+; CHECK-NEXT:4 candidates of length 2.  Found in:
 ; CHECK-NEXT:  Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:    Start Instruction:   store i32 5, ptr %5, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 6, ptr %6, align 4
@@ -70,40 +70,40 @@
 ; CHECK-NEXT:    Start Instruction:   store i32 4, ptr %4, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 5, ptr %5, align 4
 
-define linkonce_odr void @fish() {
-entry:
-  %0 = alloca i32, align 4
+define void @turtle() {
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
   %4 = alloca i32, align 4
   %5 = alloca i32, align 4
-  store i32 6, ptr %0, align 4
+  %6 = alloca i32, align 4
   store i32 1, ptr %1, align 4
   store i32 2, ptr %2, align 4
   store i32 3, ptr %3, align 4
   store i32 4, ptr %4, align 4
   store i32 5, ptr %5, align 4
+  store i32 6, ptr %6, align 4
   ret void
 }
 
-define void @turtle() {
+define void @cat() {
+entry:
+  %0 = alloca i32, align 4
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
   %4 = alloca i32, align 4
   %5 = alloca i32, align 4
-  %6 = alloca i32, align 4
+  store i32 6, ptr %0, align 4
   store i32 1, ptr %1, align 4
   store i32 2, ptr %2, align 4
   store i32 3, ptr %3, align 4
   store i32 4, ptr %4, align 4
   store i32 5, ptr %5, align 4
-  store i32 6, ptr %6, align 4
   ret void
 }
 
-define void @cat() {
+define linkonce_odr void @fish() {
 entry:
   %0 = alloca i32, align 4
   %1 = alloca i32, align 4
diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/different.ll b/llvm/test/Analysis/IRSimilarityIdentifier/different.ll
index e5c9970b159b9f..70d422077c3e9c 100644
--- a/llvm/test/Analysis/IRSimilarityIdentifier/different.ll
+++ b/llvm/test/Analysis/IRSimilarityIdentifier/different.ll
@@ -14,11 +14,11 @@
 ; CHECK-NEXT:       End Instruction:   store i32 5, ptr %5, align 4
 ; CHECK-NEXT: 2 candidates of length 3.  Found in:
 ; CHECK-NEXT:   Function: turtle, Basic Block: (unnamed)
-; CHECK-NEXT:     Start Instruction:   %b = load i32, ptr %1, align 4
-; CHECK-NEXT:       End Instruction:   %d = load i32, ptr %3, align 4
-; CHECK-NEXT:   Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:     Start Instruction:   %a = load i32, ptr %0, align 4
 ; CHECK-NEXT:       End Instruction:   %c = load i32, ptr %2, align 4
+; CHECK-NEXT:   Function: turtle, Basic Block: (unnamed)
+; CHECK-NEXT:     Start Instruction:   %b = load i32, ptr %1, align 4
+; CHECK-NEXT:       End Instruction:   %d = load i32, ptr %3, align 4
 
 define linkonce_odr void @fish() {
 entry:
diff --git a/llvm/test/Transforms/IROutliner/outlining-commutative.ll b/llvm/test/Transforms/IROutliner/outlining-commutative.ll
index 8862dc295d4351..1534829bad7ba7 100644
--- a/llvm/test/Transforms/IROutliner/outlining-commutative.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-commutative.ll
@@ -123,7 +123,7 @@ define void @outline_from_sub1() {
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]])
+; CHECK-NEXT:    call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -148,7 +148,7 @@ define void @outline_from_sub2() {
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]])
+; CHECK-NEXT:    call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -173,7 +173,7 @@ define void @dontoutline_from_flipped_sub3() {
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]])
+; CHECK-NEXT:    call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -198,7 +198,7 @@ define void @dontoutline_from_flipped_sub4() {
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]])
+; CHECK-NEXT:    call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -237,9 +237,9 @@ entry:
 ; CHECK-NEXT:    [[AL:%.*]] = load i32, ptr [[ARG0]], align 4
 ; CHECK-NEXT:    [[BL:%.*]] = load i32, ptr [[ARG1]], align 4
 ; CHECK-NEXT:    [[CL:%.*]] = load i32, ptr [[ARG2]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[BL]], [[AL]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[CL]], [[AL]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[CL]], [[BL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[AL]], [[BL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[AL]], [[CL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BL]], [[CL]]
 
 ; CHECK: define internal void @outlined_ir_func_2(ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]]) #0 {
 ; CHECK: entry_to_outline:
@@ -249,6 +249,6 @@ entry:
 ; CHECK-NEXT:    [[AL:%.*]] = load i32, ptr [[ARG0]], align 4
 ; CHECK-NEXT:    [[BL:%.*]] = load i32, ptr [[ARG1]], align 4
 ; CHECK-NEXT:    [[CL:%.*]] = load i32, ptr [[ARG2]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[AL]], [[BL]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[AL]], [[CL]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BL]], [[CL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[BL]], [[AL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[CL]], [[AL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[CL]], [[BL]]
diff --git a/llvm/test/tools/llvm-sim/single-sim-file.test b/llvm/test/tools/llvm-sim/single-sim-file.test
index cef14b36085005..4279931f36cdf2 100644
--- a/llvm/test/tools/llvm-sim/single-sim-file.test
+++ b/llvm/test/tools/llvm-sim/single-sim-file.test
@@ -6,52 +6,52 @@
 # CHECK: {
 # CHECK-NEXT: "1": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 14,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 4,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 14,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "2": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 15,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 5,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 15,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "3": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 16,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 6,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 16,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "4": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 17,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 7,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 17,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "5": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 18,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 8,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 18,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ]
 # CHECK-NEXT:}
diff --git a/llvm/test/tools/llvm-sim/single-sim.test b/llvm/test/tools/llvm-sim/single-sim.test
index 0095ec6acbc588..3300b5cbda31a5 100644
--- a/llvm/test/tools/llvm-sim/single-sim.test
+++ b/llvm/test/tools/llvm-sim/single-sim.test
@@ -5,52 +5,52 @@
 # CHECK: {
 # CHECK-NEXT: "1": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 14,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 4,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 14,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "2": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 15,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 5,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 15,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "3": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 16,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 6,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 16,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "4": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 17,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 7,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 17,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "5": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 18,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 8,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 18,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ]
 # CHECK-NEXT:}

>From ef79176889a70c4c0c653f35e3cb464125600599 Mon Sep 17 00:00:00 2001
From: Xuan Zhang <xuanzh at meta.com>
Date: Wed, 24 Apr 2024 11:35:36 -0700
Subject: [PATCH 2/6] fix additional test case

---
 .../CodeGen/AArch64/machine-outliner-overlap.mir     | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir b/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir
index 649bb33828c32c..c6bd4c1d04d871 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir
@@ -8,27 +8,27 @@
 # CHECK-NEXT:    Candidates discarded: 0
 # CHECK-NEXT:    Candidates kept: 2
 # CHECK-DAG:  Sequence length: 8
-# CHECK-NEXT:    .. DISCARD candidate @ [5, 12]; overlaps with candidate @ [12, 19]
+# CHECK-NEXT:    .. DISCARD candidate @ [12, 19]; overlaps with candidate @ [5, 12]
 # CHECK-NEXT:    Candidates discarded: 1
 # CHECK-NEXT:    Candidates kept: 1
 # CHECK-DAG:   Sequence length: 9
-# CHECK-NEXT:    .. DISCARD candidate @ [4, 12]; overlaps with candidate @ [11, 19]
+# CHECK-NEXT:    .. DISCARD candidate @ [11, 19]; overlaps with candidate @ [4, 12]
 # CHECK-NEXT:    Candidates discarded: 1
 # CHECK-NEXT:    Candidates kept: 1
 # CHECK-DAG:   Sequence length: 10
-# CHECK-NEXT:    .. DISCARD candidate @ [3, 12]; overlaps with candidate @ [10, 19]
+# CHECK-NEXT:    .. DISCARD candidate @ [10, 19]; overlaps with candidate @ [3, 12]
 # CHECK-NEXT:    Candidates discarded: 1
 # CHECK-NEXT:    Candidates kept: 1
 # CHECK-DAG:   Sequence length: 11
-# CHECK-NEXT:    .. DISCARD candidate @ [2, 12]; overlaps with candidate @ [9, 19]
+# CHECK-NEXT:    .. DISCARD candidate @ [9, 19]; overlaps with candidate @ [2, 12]
 # CHECK-NEXT:    Candidates discarded: 1
 # CHECK-NEXT:    Candidates kept: 1
 # CHECK-DAG:   Sequence length: 12
-# CHECK-NEXT:    .. DISCARD candidate @ [1, 12]; overlaps with candidate @ [8, 19]
+# CHECK-NEXT:    .. DISCARD candidate @ [8, 19]; overlaps with candidate @ [1, 12]
 # CHECK-NEXT:    Candidates discarded: 1
 # CHECK-NEXT:    Candidates kept: 1
 # CHECK-DAG:   Sequence length: 13
-# CHECK-NEXT:    .. DISCARD candidate @ [0, 12]; overlaps with candidate @ [7, 19]
+# CHECK-NEXT:    .. DISCARD candidate @ [7, 19]; overlaps with candidate @ [0, 12]
 # CHECK-NEXT:    Candidates discarded: 1
 # CHECK-NEXT:    Candidates kept: 1
 

>From 24bbfa7a393e574988b9ac01deb5bc2320e43984 Mon Sep 17 00:00:00 2001
From: Xuan Zhang <xuanzh at meta.com>
Date: Fri, 12 Apr 2024 10:55:13 -0700
Subject: [PATCH 3/6] outlining order based on priority instead of benefits

---
 llvm/lib/CodeGen/MachineOutliner.cpp          | 10 +-
 .../machine-outliner-sort-per-priority.ll     | 96 +++++++++++++++++++
 2 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll

diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index e682d42c76747e..341c94e7adf2ee 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -825,10 +825,16 @@ bool MachineOutliner::outline(Module &M,
                     << "\n");
   bool OutlinedSomething = false;
 
-  // Sort by benefit. The most beneficial functions should be outlined first.
+  // Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
+  // The function with highest priority should be outlined first.
   stable_sort(FunctionList,
               [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
-                return LHS.getBenefit() > RHS.getBenefit();
+                if (LHS.getBenefit() == 0)
+                  return false;
+                if (LHS.getBenefit() > 0 && RHS.getBenefit() == 0)
+                  return true;
+                return LHS.getNotOutlinedCost() * RHS.getOutliningCost() >
+                       RHS.getNotOutlinedCost() * LHS.getOutliningCost();
               });
 
   // Walk over each function, outlining them as we go along. Functions are
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll b/llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll
new file mode 100644
index 00000000000000..00efc3c6e71c89
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll
@@ -0,0 +1,96 @@
+; This tests the order in which functions are outlined in MachineOutliner
+; There are TWO key OutlinedFunction in FunctionList
+;
+; ===================== First One =====================
+;   ```
+;     mov     w0, #1
+;     mov     w1, #2
+;     mov     w2, #3
+;     mov     w3, #4
+;     mov     w4, #5
+;   ```
+; It has:
+;   - `SequenceSize=20` and `OccurrenceCount=6`
+;   - each Candidate has `CallOverhead=12` and `FrameOverhead=4`
+;   - `NotOutlinedCost=20*6=120` and `OutliningCost=12*6+20+4=96`
+;   - `Benefit=120-96=24` and `Priority=120/96=1.25`
+;
+; ===================== Second One =====================
+;   ```
+;     mov     w6, #6
+;     mov     w7, #7
+;     b
+;   ```
+; It has:
+;   - `SequenceSize=12` and `OccurrenceCount=4`
+;   - each Candidate has `CallOverhead=4` and `FrameOverhead=0`
+;   - `NotOutlinedCost=12*4=48` and `OutliningCost=4*4+12+0=28`
+;   - `Benefit=120-96=20` and `Priority=48/28=1.71`
+;
+; Note that the first one has higher benefit, but lower priority.
+; Hence, when outlining per priority, the second one will be outlined first.
+
+; RUN: llc %s -enable-machine-outliner=always -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-SORT-BY-PRIORITY
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-benefit-threshold=22 -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-THRESHOLD
+
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx14.0.0"
+
+declare i32 @_Z3fooiiii(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef)
+
+define i32 @_Z2f1v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 11, i32 noundef 6, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f2v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 12, i32 noundef 6, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f3v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 13, i32 noundef 6, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f4v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 14, i32 noundef 6, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f5v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 15, i32 noundef 8, i32 noundef 9)
+  ret i32 %1
+}
+
+define i32 @_Z2f6v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 16, i32 noundef 9, i32 noundef 8)
+  ret i32 %1
+}
+
+; CHECK-SORT-BY-PRIORITY: <_OUTLINED_FUNCTION_0>:
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w6, #0x6
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w7, #0x7
+; CHECK-SORT-BY-PRIORITY-NEXT: b
+
+; CHECK-SORT-BY-PRIORITY: <_OUTLINED_FUNCTION_1>:
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w0, #0x1
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w1, #0x2
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w2, #0x3
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w3, #0x4
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w4, #0x5
+; CHECK-SORT-BY-PRIORITY-NEXT: ret
+
+; CHECK-THRESHOLD: <_OUTLINED_FUNCTION_0>:
+; CHECK-THRESHOLD-NEXT: mov     w0, #0x1
+; CHECK-THRESHOLD-NEXT: mov     w1, #0x2
+; CHECK-THRESHOLD-NEXT: mov     w2, #0x3
+; CHECK-THRESHOLD-NEXT: mov     w3, #0x4
+; CHECK-THRESHOLD-NEXT: mov     w4, #0x5
+; CHECK-THRESHOLD-NEXT: ret
+
+; CHECK-THRESHOLD-NOT: <_OUTLINED_FUNCTION_1>:

>From 74abcba07ba388b61fc833db980758819bb51ad6 Mon Sep 17 00:00:00 2001
From: Xuan Zhang <xuanzh at meta.com>
Date: Thu, 25 Apr 2024 09:58:00 -0700
Subject: [PATCH 4/6] fix test cases for ARM

---
 .../CodeGen/ARM/machine-outliner-calls.mir    | 80 +++++++------------
 .../CodeGen/ARM/machine-outliner-default.mir  | 33 ++++----
 .../ARM/machine-outliner-stack-fixup-arm.mir  | 56 ++++++-------
 .../machine-outliner-stack-fixup-thumb.mir    | 74 +++++++----------
 4 files changed, 94 insertions(+), 149 deletions(-)

diff --git a/llvm/test/CodeGen/ARM/machine-outliner-calls.mir b/llvm/test/CodeGen/ARM/machine-outliner-calls.mir
index a92c9dd28be5ae..7634ecd6e863ae 100644
--- a/llvm/test/CodeGen/ARM/machine-outliner-calls.mir
+++ b/llvm/test/CodeGen/ARM/machine-outliner-calls.mir
@@ -26,15 +26,15 @@ body:             |
   ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $r4, -8
-  ; CHECK:   BL @OUTLINED_FUNCTION_0
+  ; CHECK:   BL @OUTLINED_FUNCTION_2
   ; CHECK: bb.1:
-  ; CHECK:   BL @OUTLINED_FUNCTION_0
+  ; CHECK:   BL @OUTLINED_FUNCTION_2
   ; CHECK: bb.2:
-  ; CHECK:   BL @OUTLINED_FUNCTION_0
+  ; CHECK:   BL @OUTLINED_FUNCTION_2
   ; CHECK: bb.3:
-  ; CHECK:   BL @OUTLINED_FUNCTION_0
+  ; CHECK:   BL @OUTLINED_FUNCTION_2
   ; CHECK: bb.4:
-  ; CHECK:   BL @OUTLINED_FUNCTION_0
+  ; CHECK:   BL @OUTLINED_FUNCTION_2
   ; CHECK: bb.5:
   ; CHECK:   $sp = frame-destroy LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $lr
   ; CHECK:   BX_RET 14 /* CC::al */, $noreg
@@ -139,13 +139,13 @@ body:             |
   ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $r4, -8
-  ; CHECK:   BL @OUTLINED_FUNCTION_1
+  ; CHECK:   BL @OUTLINED_FUNCTION_0
   ; CHECK: bb.1:
-  ; CHECK:   BL @OUTLINED_FUNCTION_1
+  ; CHECK:   BL @OUTLINED_FUNCTION_0
   ; CHECK: bb.2:
-  ; CHECK:   BL @OUTLINED_FUNCTION_1
+  ; CHECK:   BL @OUTLINED_FUNCTION_0
   ; CHECK: bb.3:
-  ; CHECK:   BL @OUTLINED_FUNCTION_1
+  ; CHECK:   BL @OUTLINED_FUNCTION_0
   ; CHECK: bb.4:
   ; CHECK:   $sp = frame-destroy LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $lr
   ; CHECK:   BX_RET 14 /* CC::al */, $noreg
@@ -245,19 +245,19 @@ body:             |
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -4
   ; CHECK:   frame-setup CFI_INSTRUCTION offset $r4, -8
   ; CHECK:   BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp
-  ; CHECK:   BL @OUTLINED_FUNCTION_2
+  ; CHECK:   BL @OUTLINED_FUNCTION_1
   ; CHECK: bb.1:
   ; CHECK:   BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp
-  ; CHECK:   BL @OUTLINED_FUNCTION_2
+  ; CHECK:   BL @OUTLINED_FUNCTION_1
   ; CHECK: bb.2:
   ; CHECK:   BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp
-  ; CHECK:   BL @OUTLINED_FUNCTION_2
+  ; CHECK:   BL @OUTLINED_FUNCTION_1
   ; CHECK: bb.3:
   ; CHECK:   BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp
-  ; CHECK:   BL @OUTLINED_FUNCTION_2
+  ; CHECK:   BL @OUTLINED_FUNCTION_1
   ; CHECK: bb.4:
   ; CHECK:   BL @"\01mcount", csr_aapcs, implicit-def dead $lr, implicit $sp
-  ; CHECK:   BL @OUTLINED_FUNCTION_2
+  ; CHECK:   BL @OUTLINED_FUNCTION_1
   ; CHECK: bb.5:
   ; CHECK:   $sp = frame-destroy LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $lr
   ; CHECK:   BX_RET 14 /* CC::al */, $noreg
@@ -307,38 +307,17 @@ body:             |
   bb.0:
     BX_RET 14, $noreg
 
-
   ; CHECK-LABEL: name: OUTLINED_FUNCTION_0
   ; CHECK: bb.0:
-  ; CHECK:   liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr
-  ; CHECK:   early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
-  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -8
-  ; CHECK:   BL @bar, implicit-def dead $lr, implicit $sp
-  ; CHECK:   $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $r1 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $r2 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $r3 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $r4 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg
-  ; CHECK:   MOVPCLR 14 /* CC::al */, $noreg
-
-  ; CHECK-LABEL: name: OUTLINED_FUNCTION_1
-  ; CHECK: bb.0:
-  ; CHECK:   liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr
-  ; CHECK:   early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
-  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -8
-  ; CHECK:   BL @bar, implicit-def dead $lr, implicit $sp
+  ; CHECK:   liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8
   ; CHECK:   $r0 = MOVi 2, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r1 = MOVi 2, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r2 = MOVi 2, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r3 = MOVi 2, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r4 = MOVi 2, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg
   ; CHECK:   TAILJMPd @bar, implicit $sp
 
-  ; CHECK-LABEL: name: OUTLINED_FUNCTION_2
+  ; CHECK-LABEL: name: OUTLINED_FUNCTION_1
   ; CHECK: bb.0:
   ; CHECK:   liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8
   ; CHECK:   $r0 = MOVi 3, 14 /* CC::al */, $noreg, $noreg
@@ -348,31 +327,28 @@ body:             |
   ; CHECK:   $r4 = MOVi 3, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   MOVPCLR 14 /* CC::al */, $noreg
 
+  ; CHECK-LABEL: name: OUTLINED_FUNCTION_2
+  ; CHECK: bb.0:
+  ; CHECK:   liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8
+  ; CHECK:   $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   $r1 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   $r2 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   $r3 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   $r4 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   MOVPCLR 14 /* CC::al */, $noreg
+
   ; CHECK-LABEL: name: OUTLINED_FUNCTION_3
   ; CHECK: bb.0:
-  ; CHECK:   liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr
-  ; CHECK:   early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
-  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -8
-  ; CHECK:   tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp
+  ; CHECK:   liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8
   ; CHECK:   $r0 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r1 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r2 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg
   ; CHECK:   tTAILJMPdND @bar, 14 /* CC::al */, $noreg, implicit $sp
 
   ; CHECK-LABEL: name: OUTLINED_FUNCTION_4
   ; CHECK: bb.0:
-  ; CHECK:   liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr
-  ; CHECK:   early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-  ; CHECK:   frame-setup CFI_INSTRUCTION def_cfa_offset 8
-  ; CHECK:   frame-setup CFI_INSTRUCTION offset $lr, -8
-  ; CHECK:   tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp
+  ; CHECK:   liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8
   ; CHECK:   $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r1 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r2 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg
   ; CHECK:   tBX_RET 14 /* CC::al */, $noreg
-
-
-
diff --git a/llvm/test/CodeGen/ARM/machine-outliner-default.mir b/llvm/test/CodeGen/ARM/machine-outliner-default.mir
index 6d0218dbfe636d..de2b8f55969765 100644
--- a/llvm/test/CodeGen/ARM/machine-outliner-default.mir
+++ b/llvm/test/CodeGen/ARM/machine-outliner-default.mir
@@ -19,17 +19,17 @@ body:             |
   ; CHECK: bb.0:
   ; CHECK:   liveins: $lr
   ; CHECK:   early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-  ; CHECK:   BL @OUTLINED_FUNCTION_0
+  ; CHECK:   BL @OUTLINED_FUNCTION_1
   ; CHECK:   $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg
   ; CHECK: bb.1:
   ; CHECK:   liveins: $lr, $r6, $r7, $r8, $r9, $r10, $r11
   ; CHECK:   early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-  ; CHECK:   BL @OUTLINED_FUNCTION_0
+  ; CHECK:   BL @OUTLINED_FUNCTION_1
   ; CHECK:   $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg
   ; CHECK: bb.2:
   ; CHECK:   liveins: $lr, $r6, $r7, $r8, $r9, $r10, $r11
   ; CHECK:   early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-  ; CHECK:   BL @OUTLINED_FUNCTION_0
+  ; CHECK:   BL @OUTLINED_FUNCTION_1
   ; CHECK:   $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg
   ; CHECK: bb.3:
   ; CHECK:   liveins: $lr, $r6, $r7, $r8, $r9, $r10, $r11
@@ -73,17 +73,17 @@ body:             |
   ; CHECK: bb.0:
   ; CHECK:   liveins: $lr
   ; CHECK:   early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-  ; CHECK:   tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_1
+  ; CHECK:   tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0
   ; CHECK:   $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg
   ; CHECK: bb.1:
   ; CHECK:   liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11
   ; CHECK:   early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-  ; CHECK:   tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_1
+  ; CHECK:   tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0
   ; CHECK:   $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg
   ; CHECK: bb.2:
   ; CHECK:   liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11
   ; CHECK:   early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-  ; CHECK:   tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_1
+  ; CHECK:   tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0
   ; CHECK:   $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg
   ; CHECK: bb.3:
   ; CHECK:   liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11
@@ -114,6 +114,15 @@ body:             |
 
   ; CHECK-LABEL: name: OUTLINED_FUNCTION_0
   ; CHECK: bb.0:
+  ; CHECK:   liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11
+  ; CHECK:   $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   $r1 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   $r2 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   $r3 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   tBX_RET 14 /* CC::al */, $noreg
+
+  ; CHECK-LABEL: name: OUTLINED_FUNCTION_1
+  ; CHECK: bb.0:
   ; CHECK:   liveins: $lr, $r6, $r7, $r8, $r9, $r10, $r11
   ; CHECK:   $r0 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r1 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
@@ -122,15 +131,3 @@ body:             |
   ; CHECK:   $r4 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   $r5 = MOVi 1, 14 /* CC::al */, $noreg, $noreg
   ; CHECK:   MOVPCLR 14 /* CC::al */, $noreg
-
-  ; CHECK-LABEL: name: OUTLINED_FUNCTION_1
-  ; CHECK: bb.0:
-  ; CHECK:   liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11
-  ; CHECK:   $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $r1 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $r2 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   $r3 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
-  ; CHECK:   tBX_RET 14 /* CC::al */, $noreg
-
-
-
diff --git a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir
index ae5caa5b7c06db..e71edc8ceb3f60 100644
--- a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir
+++ b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir
@@ -18,6 +18,7 @@ body:             |
     liveins: $r0
     ; CHECK-LABEL: name:           CheckAddrMode_i12
     ; CHECK: $r1 = MOVr killed $r0, 14 /* CC::al */, $noreg, $noreg
+    ; CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
     ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I12:[0-9]+]]
     ; CHECK-NEXT: $r6 = LDRi12 $sp, 4088, 14 /* CC::al */, $noreg
     $r1 = MOVr killed $r0, 14, $noreg, $noreg
@@ -47,6 +48,7 @@ body:             |
     liveins: $r1
     ; CHECK-LABEL: name:           CheckAddrMode3
     ; CHECK: $r0 = MOVr killed $r1, 14 /* CC::al */, $noreg, $noreg
+    ; CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
     ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I3:[0-9]+]]
     ; CHECK-NEXT: $r6 = LDRSH $sp, $noreg, 248, 14 /* CC::al */, $noreg
     $r0 = MOVr killed $r1, 14, $noreg, $noreg
@@ -76,6 +78,7 @@ body:             |
     liveins: $r2
     ; CHECK-LABEL: name:           CheckAddrMode5
     ; CHECK: $r0 = MOVr killed $r2, 14 /* CC::al */, $noreg, $noreg
+    ; CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
     ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I5:[0-9]+]]
     ; CHECK-NEXT: $d5 = VLDRD $sp, 254, 14 /* CC::al */, $noreg
     $r0 = MOVr killed $r2, 14, $noreg, $noreg
@@ -110,6 +113,7 @@ body:             |
     liveins: $r3
     ; CHECK-LABEL: name:           CheckAddrMode5FP16
     ; CHECK: $r0 = MOVr killed $r3, 14 /* CC::al */, $noreg, $noreg
+    ; CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
     ; CHECK-NEXT: BL @OUTLINED_FUNCTION_[[I5FP16:[0-9]+]]
     ; CHECK-NEXT: $s6 = VLDRH $sp, 252, 14, $noreg
     $r0 = MOVr killed $r3, 14, $noreg, $noreg
@@ -146,41 +150,29 @@ body:             |
     BX_RET 14, $noreg
 
     ;CHECK: name:           OUTLINED_FUNCTION_[[I5]]
-    ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8
-    ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
-    ;CHECK-NEXT: $d0 = VLDRD $sp, 2, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $d1 = VLDRD $sp, 10, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $d4 = VLDRD $sp, 255, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg
+    ;CHECK: liveins: $r10, $r9, $r8, $r7, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8
+    ;CHECK: $d0 = VLDRD $sp, 0, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: $d1 = VLDRD $sp, 8, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: $d4 = VLDRD $sp, 253, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg
 
     ;CHECK: name:           OUTLINED_FUNCTION_[[I5FP16]]
-    ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8
-    ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
-    ;CHECK-NEXT: $s1 = VLDRH $sp, 4, 14, $noreg
-    ;CHECK-NEXT: $s2 = VLDRH $sp, 12, 14, $noreg
-    ;CHECK-NEXT: $s5 = VLDRH $sp, 244, 14, $noreg
-    ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg
+    ;CHECK: liveins: $r10, $r9, $r8, $r7, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8
+    ;CHECK: $s1 = VLDRH $sp, 0, 14, $noreg
+    ;CHECK-NEXT: $s2 = VLDRH $sp, 8, 14, $noreg
+    ;CHECK-NEXT: $s5 = VLDRH $sp, 240, 14, $noreg
+    ;CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg
 
     ;CHECK: name:           OUTLINED_FUNCTION_[[I12]]
-    ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8
-    ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
-    ;CHECK-NEXT: $r1 = LDRi12 $sp, 8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $r2 = LDRi12 $sp, 16, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $r5 = LDRi12 $sp, 4094, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg
+    ;CHECK: liveins: $r10, $r9, $r8, $r7, $d8, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9
+    ;CHECK: $r1 = LDRi12 $sp, 0, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: $r2 = LDRi12 $sp, 8, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: $r5 = LDRi12 $sp, 4086, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg
 
     ;CHECK: name:           OUTLINED_FUNCTION_[[I3]]
-    ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8
-    ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp
-    ;CHECK-NEXT: $r1 = LDRSH $sp, $noreg, 8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $r2 = LDRSH $sp, $noreg, 16, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $r5 = LDRSH $sp, $noreg, 255, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg
+    ;CHECK: liveins: $r10, $r9, $r8, $r7, $d8, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9
+    ;CHECK: $r1 = LDRSH $sp, $noreg, 0, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: $r2 = LDRSH $sp, $noreg, 8, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: $r5 = LDRSH $sp, $noreg, 247, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg
diff --git a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir
index 56184448424489..f2cdaebdfa8baa 100644
--- a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir
+++ b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir
@@ -19,7 +19,7 @@ body:             |
   bb.0:
     liveins: $r1
     ;CHECK-LABEL: name:           CheckAddrModeT2_i12
-    ;CHECK: $r0 = tMOVr killed $r1, 14 /* CC::al */, $noreg
+    ;CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[SHARED:[0-9+]]]
     ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[I12:[0-9]+]]
     ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 4088, 14 /* CC::al */, $noreg
     $r0 = tMOVr killed $r1, 14, $noreg
@@ -49,7 +49,7 @@ body:             |
   bb.0:
     liveins: $r1
     ;CHECK-LABEL: name:           CheckAddrModeT2_i8
-    ;CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg
+    ;CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[SHARED:[0-9+]]]
     ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[I8:[0-9]+]]
     ;CHECK-NEXT: t2STRHT $r0, $sp, 248, 14 /* CC::al */, $noreg
     $r0 = tMOVr $r1, 14, $noreg
@@ -79,7 +79,7 @@ body:             |
   bb.0:
     liveins: $r1
     ;CHECK-LABEL: name:           CheckAddrModeT2_i8s4
-    ;CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg
+    ;CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[SHARED:[0-9+]]]
     ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[I8S4:[0-9]+]]
     ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 1020, 14 /* CC::al */, $noreg
     $r0 = tMOVr $r1, 14, $noreg
@@ -109,7 +109,7 @@ body:             |
   bb.0:
     liveins: $r1
     ;CHECK-LABEL: name:           CheckAddrModeT2_ldrex
-    ;CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg
+    ;CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[SHARED:[0-9+]]]
     ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[LDREX:[0-9]+]]
     ;CHECK-NEXT: $r1 = t2LDREX $sp, 254, 14 /* CC::al */, $noreg
     $r0 = tMOVr $r1, 14, $noreg
@@ -144,8 +144,10 @@ body:             |
   bb.0:
     liveins: $r0, $r1
     ;CHECK-LABEL: name:           CheckAddrModeT1_s
-    ;CHECK: $r0 = tMOVr $r1, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[T1_S:[0-9]+]]
+    ;CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_[[SHARED:[0-9+]]]
+    ;CHECK-NEXT: tSTRspi $r0, $sp, 0, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: tSTRspi $r0, $sp, 4, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: tSTRspi $r0, $sp, 253, 14 /* CC::al */, $noreg
     ;CHECK-NEXT: tSTRspi $r0, $sp, 254, 14 /* CC::al */, $noreg
     $r0 = tMOVr $r1, 14, $noreg
     tBL 14, $noreg, @foo, implicit-def dead $lr, implicit $sp
@@ -181,51 +183,29 @@ body:             |
     BX_RET 14, $noreg
 
     ;CHECK: name:           OUTLINED_FUNCTION_[[LDREX]]
-    ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8
-    ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp
-    ;CHECK-NEXT: $r1 = t2LDREX $sp, 2, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $r1 = t2LDREX $sp, 10, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $r1 = t2LDREX $sp, 255, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg
+    ;CHECK: $r1 = t2LDREX $sp, 0, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: $r1 = t2LDREX $sp, 8, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: $r1 = t2LDREX $sp, 253, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg
 
     ;CHECK: name:           OUTLINED_FUNCTION_[[I8]]
-    ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8
-    ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp
-    ;CHECK-NEXT: t2STRHT $r0, $sp, 8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: t2STRHT $r0, $sp, 12, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: t2STRHT $r0, $sp, 255, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg
+    ;CHECK: t2STRHT $r0, $sp, 0, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: t2STRHT $r0, $sp, 4, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: t2STRHT $r0, $sp, 247, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg
 
     ;CHECK: name:           OUTLINED_FUNCTION_[[I8S4]]
-    ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8
-    ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp
+    ;CHECK: t2STRDi8 $r0, $r1, $sp, 0, 14 /* CC::al */, $noreg
     ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 16, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 1020, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 1012, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: tBX_RET 14 /* CC::al */, $nore
 
     ;CHECK: name:           OUTLINED_FUNCTION_[[I12]]
-    ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8
-    ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp
-    ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 12, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 4094, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg
-
-    ;CHECK: name:           OUTLINED_FUNCTION_[[T1_S]]
-    ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8
-    ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8
-    ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp
-    ;CHECK-NEXT: tSTRspi $r0, $sp, 2, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: tSTRspi $r0, $sp, 6, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: tSTRspi $r0, $sp, 255, 14 /* CC::al */, $noreg
-    ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg
+    ;CHECK: $r0 = t2LDRi12 $sp, 0, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 4, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 4086, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg
+
+    ;CHECK: name:           OUTLINED_FUNCTION_[[SHARED]]
+    ;CHECK: $r0 = tMOVr killed $r1, 14 /* CC::al */, $noreg
+    ;CHECK-NEXT: tTAILJMPdND @foo, 14 /* CC::al */, $noreg, implicit $sp

>From 8cedcaba5cb3714e70b211eef55a495de85d5a9f Mon Sep 17 00:00:00 2001
From: Xuan Zhang <xuanzh at meta.com>
Date: Fri, 12 Apr 2024 10:57:51 -0700
Subject: [PATCH 5/6] consider leaf descendants to include more candidates for
 outlining

---
 llvm/include/llvm/Support/SuffixTree.h        |  33 ++++-
 llvm/include/llvm/Support/SuffixTreeNode.h    |  25 +++-
 llvm/lib/CodeGen/MachineOutliner.cpp          |   8 +-
 llvm/lib/Support/SuffixTree.cpp               |  83 ++++++++++-
 llvm/lib/Support/SuffixTreeNode.cpp           |   5 +
 .../machine-outliner-cfi-tail-some.mir        |   2 +-
 .../machine-outliner-leaf-descendants.ll      | 124 ++++++++++++++++
 .../machine-outliner-retaddr-sign-sp-mod.mir  |   2 +-
 .../machine-outliner-retaddr-sign-thunk.ll    |   4 +-
 .../AArch64/machine-outliner-throw2.ll        |   4 +-
 .../CodeGen/AArch64/machine-outliner-thunk.ll |   2 +-
 .../test/CodeGen/AArch64/machine-outliner.mir |   2 +-
 llvm/unittests/Support/SuffixTreeTest.cpp     | 134 ++++++++++++++++++
 13 files changed, 413 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll

diff --git a/llvm/include/llvm/Support/SuffixTree.h b/llvm/include/llvm/Support/SuffixTree.h
index 4940fbbf308d8b..90f8aae60a575b 100644
--- a/llvm/include/llvm/Support/SuffixTree.h
+++ b/llvm/include/llvm/Support/SuffixTree.h
@@ -42,6 +42,9 @@ class SuffixTree {
   /// Each element is an integer representing an instruction in the module.
   ArrayRef<unsigned> Str;
 
+  /// Whether to consider leaf descendants or only leaf children.
+  bool OutlinerLeafDescendants;
+
   /// A repeated substring in the tree.
   struct RepeatedSubstring {
     /// The length of the string.
@@ -130,11 +133,27 @@ class SuffixTree {
   /// this step.
   unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd);
 
+  /// This vector contains all leaf nodes of this suffix tree. These leaf nodes
+  /// are identified using post-order depth-first traversal, so that the order
+  /// of these leaf nodes in the vector matches the order of the leaves in the
+  /// tree from left to right if one were to draw the tree on paper.
+  std::vector<SuffixTreeLeafNode *> LeafNodes;
+
+  /// Perform a post-order depth-first traversal of the tree and perform two
+  /// tasks during the traversal. The first is to populate LeafNodes, adding
+  /// nodes in order of the traversal. The second is to keep track of the leaf
+  /// descendants of every internal node by assigning values to LeftLeafIndex
+  /// and RightLefIndex fields of SuffixTreeNode for all internal nodes.
+  void setLeafNodes();
+
 public:
   /// Construct a suffix tree from a sequence of unsigned integers.
   ///
   /// \param Str The string to construct the suffix tree for.
-  SuffixTree(const ArrayRef<unsigned> &Str);
+  /// \param OutlinerLeafDescendants Whether to consider leaf descendants or
+  /// only leaf children (used by Machine Outliner).
+  SuffixTree(const ArrayRef<unsigned> &Str,
+             bool OutlinerLeafDescendants = false);
 
   /// Iterator for finding all repeated substrings in the suffix tree.
   struct RepeatedSubstringIterator {
@@ -154,6 +173,12 @@ class SuffixTree {
     /// instruction lengths.
     const unsigned MinLength = 2;
 
+    /// Vector of leaf nodes of the suffix tree.
+    const std::vector<SuffixTreeLeafNode *> &LeafNodes;
+
+    /// Whether to consider leaf descendants or only leaf children.
+    bool OutlinerLeafDescendants = !LeafNodes.empty();
+
     /// Move the iterator to the next repeated substring.
     void advance();
 
@@ -179,7 +204,9 @@ class SuffixTree {
       return !(*this == Other);
     }
 
-    RepeatedSubstringIterator(SuffixTreeInternalNode *N) : N(N) {
+    RepeatedSubstringIterator(
+        SuffixTreeInternalNode *N, const std::vector<SuffixTreeLeafNode *> &LeafNodes = {})
+        : N(N), LeafNodes(LeafNodes) {
       // Do we have a non-null node?
       if (!N)
         return;
@@ -191,7 +218,7 @@ class SuffixTree {
   };
 
   typedef RepeatedSubstringIterator iterator;
-  iterator begin() { return iterator(Root); }
+  iterator begin() { return iterator(Root, LeafNodes); }
   iterator end() { return iterator(nullptr); }
 };
 
diff --git a/llvm/include/llvm/Support/SuffixTreeNode.h b/llvm/include/llvm/Support/SuffixTreeNode.h
index 7d0d1cf0c58b95..84b590f2deb0cd 100644
--- a/llvm/include/llvm/Support/SuffixTreeNode.h
+++ b/llvm/include/llvm/Support/SuffixTreeNode.h
@@ -46,6 +46,17 @@ struct SuffixTreeNode {
   /// the root to this node.
   unsigned ConcatLen = 0;
 
+  /// These two indices give a range of indices for its leaf descendants.
+  /// Imagine drawing a tree on paper and assigning a unique index to each leaf
+  /// node in monotonically increasing order from left to right. This way of
+  /// numbering the leaf nodes allows us to associate a continuous range of
+  /// indices with each internal node. For example, if a node has leaf
+  /// descendants with indices i, i+1, ..., j, then its LeftLeafIdx is i and
+  /// its RightLeafIdx is j. These indices are for LeafNodes in the SuffixTree
+  /// class, which is constructed using post-order depth-first traversal.
+  unsigned LeftLeafIdx = EmptyIdx;
+  unsigned RightLeafIdx = EmptyIdx;
+
 public:
   // LLVM RTTI boilerplate.
   NodeKind getKind() const { return Kind; }
@@ -56,6 +67,18 @@ struct SuffixTreeNode {
   /// \returns the end index of this node.
   virtual unsigned getEndIdx() const = 0;
 
+  /// \return the index of this node's left most leaf node.
+  unsigned getLeftLeafIdx() const;
+
+  /// \return the index of this node's right most leaf node.
+  unsigned getRightLeafIdx() const;
+
+  /// Set the index of the left most leaf node of this node to \p Idx.
+  void setLeftLeafIdx(unsigned Idx);
+
+  /// Set the index of the right most leaf node of this node to \p Idx.
+  void setRightLeafIdx(unsigned Idx);
+
   /// Advance this node's StartIdx by \p Inc.
   void incrementStartIdx(unsigned Inc);
 
@@ -168,4 +191,4 @@ struct SuffixTreeLeafNode : SuffixTreeNode {
   virtual ~SuffixTreeLeafNode() = default;
 };
 } // namespace llvm
-#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H
\ No newline at end of file
+#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 341c94e7adf2ee..db197291d2d0d5 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -121,6 +121,12 @@ static cl::opt<unsigned> OutlinerBenefitThreshold(
     cl::desc(
         "The minimum size in bytes before an outlining candidate is accepted"));
 
+static cl::opt<bool> OutlinerLeafDescendants(
+    "outliner-leaf-descendants", cl::init(true), cl::Hidden,
+    cl::desc("Consider all leaf descendants of internal nodes of the suffix "
+             "tree as candidates for outlining (if false, only leaf children "
+             "are considered)"));
+
 namespace {
 
 /// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -576,7 +582,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
 void MachineOutliner::findCandidates(
     InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
   FunctionList.clear();
-  SuffixTree ST(Mapper.UnsignedVec);
+  SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
 
   // First, find all of the repeated substrings in the tree of minimum length
   // 2.
diff --git a/llvm/lib/Support/SuffixTree.cpp b/llvm/lib/Support/SuffixTree.cpp
index 03ed1d02840aa1..481233bfd4790b 100644
--- a/llvm/lib/Support/SuffixTree.cpp
+++ b/llvm/lib/Support/SuffixTree.cpp
@@ -11,9 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SuffixTree.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/SuffixTreeNode.h"
+#include <stack>
 
 using namespace llvm;
 
@@ -26,7 +28,9 @@ static size_t numElementsInSubstring(const SuffixTreeNode *N) {
   return N->getEndIdx() - N->getStartIdx() + 1;
 }
 
-SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
+SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str,
+                       bool OutlinerLeafDescendants)
+    : Str(Str), OutlinerLeafDescendants(OutlinerLeafDescendants) {
   Root = insertRoot();
   Active.Node = Root;
 
@@ -46,6 +50,11 @@ SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
   // Set the suffix indices of each leaf.
   assert(Root && "Root node can't be nullptr!");
   setSuffixIndices();
+
+  // Collect all leaf nodes of the suffix tree. And for each internal node,
+  // record the range of leaf nodes that are descendants of it.
+  if (OutlinerLeafDescendants)
+    setLeafNodes();
 }
 
 SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
@@ -105,6 +114,68 @@ void SuffixTree::setSuffixIndices() {
   }
 }
 
+void SuffixTree::setLeafNodes() {
+  // A stack that keeps track of nodes to visit for post-order DFS traversal.
+  std::stack<SuffixTreeNode *> ToVisit;
+  ToVisit.push(Root);
+
+  // This keeps track of the index of the next leaf node to be added to
+  // the LeafNodes vector of the suffix tree.
+  unsigned LeafCounter = 0;
+
+  // This keeps track of nodes whose children have been added to the stack
+  // during the post-order depth-first traversal of the tree.
+  llvm::SmallPtrSet<SuffixTreeInternalNode *, 32> ChildrenAddedToStack;
+
+  // Traverse the tree in post-order.
+  while (!ToVisit.empty()) {
+    SuffixTreeNode *CurrNode = ToVisit.top();
+    ToVisit.pop();
+    if (auto *CurrInternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) {
+      // The current node is an internal node.
+      if (ChildrenAddedToStack.find(CurrInternalNode) !=
+          ChildrenAddedToStack.end()) {
+        // If the children of the current node has been added to the stack,
+        // then this is the second time we visit this node and at this point,
+        // all of its children have already been processed. Now, we can
+        // set its LeftLeafIdx and RightLeafIdx;
+        auto it = CurrInternalNode->Children.begin();
+        if (it != CurrInternalNode->Children.end()) {
+          // Get the first child to use its RightLeafIdx. The RightLeafIdx is
+          // used as the first child is the initial one added to the stack, so
+          // it's the last one to be processed. This implies that the leaf
+          // descendants of the first child are assigned the largest index
+          // numbers.
+          CurrNode->setRightLeafIdx(it->second->getRightLeafIdx());
+          // get the last child to use its LeftLeafIdx.
+          while (std::next(it) != CurrInternalNode->Children.end())
+            it = std::next(it);
+          CurrNode->setLeftLeafIdx(it->second->getLeftLeafIdx());
+          assert(CurrNode->getLeftLeafIdx() <= CurrNode->getRightLeafIdx() &&
+                 "LeftLeafIdx should not be larger than RightLeafIdx");
+        }
+      } else {
+        // This is the first time we visit this node. This means that its
+        // children have not been added to the stack yet. Hence, we will add
+        // the current node back to the stack and add its children to the
+        // stack for processing.
+        ToVisit.push(CurrNode);
+        for (auto &ChildPair : CurrInternalNode->Children)
+          ToVisit.push(ChildPair.second);
+        ChildrenAddedToStack.insert(CurrInternalNode);
+      }
+    } else {
+      // The current node is a leaf node.
+      // We can simplyset its LeftLeafIdx and RightLeafIdx.
+      CurrNode->setLeftLeafIdx(LeafCounter);
+      CurrNode->setRightLeafIdx(LeafCounter);
+      LeafCounter++;
+      auto *CurrLeafNode = cast<SuffixTreeLeafNode>(CurrNode);
+      LeafNodes.push_back(CurrLeafNode);
+    }
+  }
+}
+
 unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
   SuffixTreeInternalNode *NeedsLink = nullptr;
 
@@ -230,6 +301,7 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
 
   // Each leaf node represents a repeat of a string.
   SmallVector<unsigned> RepeatedSubstringStarts;
+  SmallVector<SuffixTreeLeafNode *> LeafDescendants;
 
   // Continue visiting nodes until we find one which repeats more than once.
   while (!InternalNodesToVisit.empty()) {
@@ -252,7 +324,7 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
         continue;
       }
 
-      if (Length < MinLength)
+      if (Length < MinLength || OutlinerLeafDescendants)
         continue;
 
       // Have an occurrence of a potentially repeated string. Save it.
@@ -260,6 +332,13 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
       RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
     }
 
+    if (OutlinerLeafDescendants && Length >= MinLength) {
+      LeafDescendants.assign(LeafNodes.begin() + Curr->getLeftLeafIdx(),
+                             LeafNodes.begin() + Curr->getRightLeafIdx() + 1);
+      for (SuffixTreeLeafNode *Leaf : LeafDescendants)
+        RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
+    }
+
     // The root never represents a repeated substring. If we're looking at
     // that, then skip it.
     if (Curr->isRoot())
diff --git a/llvm/lib/Support/SuffixTreeNode.cpp b/llvm/lib/Support/SuffixTreeNode.cpp
index 113b990fd352fc..9f1f94a39895e8 100644
--- a/llvm/lib/Support/SuffixTreeNode.cpp
+++ b/llvm/lib/Support/SuffixTreeNode.cpp
@@ -38,3 +38,8 @@ unsigned SuffixTreeLeafNode::getEndIdx() const {
 
 unsigned SuffixTreeLeafNode::getSuffixIdx() const { return SuffixIdx; }
 void SuffixTreeLeafNode::setSuffixIdx(unsigned Idx) { SuffixIdx = Idx; }
+
+unsigned SuffixTreeNode::getLeftLeafIdx() const { return LeftLeafIdx; }
+unsigned SuffixTreeNode::getRightLeafIdx() const { return RightLeafIdx; }
+void SuffixTreeNode::setLeftLeafIdx(unsigned Idx) { LeftLeafIdx = Idx; }
+void SuffixTreeNode::setRightLeafIdx(unsigned Idx) { RightLeafIdx = Idx; }
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir b/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir
index 67d411962ce4f7..3afa1d5559a585 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs -outliner-leaf-descendants=false %s -o - | FileCheck %s
 
 # Outlining CFI instructions is unsafe if we cannot outline all of the CFI
 # instructions from a function.  This shows that we choose not to outline the
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll b/llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll
new file mode 100644
index 00000000000000..bdaf653a79566d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll
@@ -0,0 +1,124 @@
+; This test is mainly for the -outliner-leaf-descendants flag for MachineOutliner.
+;
+; ===================== -outliner-leaf-descendants=false =====================
+; MachineOutliner finds THREE key `OutlinedFunction` and outlines them. They are:
+;   ```
+;     mov     w0, #1
+;     mov     w1, #2
+;     mov     w2, #3
+;     mov     w3, #4
+;     mov     w4, #5
+;     mov     w5, #6 or #7 or #8
+;     b
+;   ```
+; Each has:
+;   - `SequenceSize=28` and `OccurrenceCount=2`
+;   - each Candidate has `CallOverhead=4` and `FrameOverhead=0`
+;   - `NotOutlinedCost=28*2=56` and `OutliningCost=4*2+28+0=36`
+;   - `Benefit=56-36=20` and `Priority=56/36=1.56`
+;
+; ===================== -outliner-leaf-descendants=false =====================
+; MachineOutliner finds a FOURTH key `OutlinedFunction`, which is:
+;   ```
+;   mov     w0, #1
+;   mov     w1, #2
+;   mov     w2, #3
+;   mov     w3, #4
+;   mov     w4, #5
+;   ```
+; This corresponds to an internal node that has ZERO leaf children, but SIX leaf descendants.
+; It has:
+;   - `SequenceSize=20` and `OccurrenceCount=6`
+;   - each Candidate has `CallOverhead=12` and `FrameOverhead=4`
+;   - `NotOutlinedCost=20*6=120` and `OutliningCost=12*6+20+4=96`
+;   - `Benefit=120-96=24` and `Priority=120/96=1.25`
+;
+; The FOURTH `OutlinedFunction` has lower _priority_ compared to the first THREE `OutlinedFunction`
+; Hence, if we additionally include the `-sort-per-priority` flag,  the first THREE `OutlinedFunction` are outlined.
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=false -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-BASELINE
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=false -outliner-benefit-threshold=22 -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-NO-CANDIDATE
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=true -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-BASELINE
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=true -outliner-benefit-threshold=22 -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-LEAF-DESCENDANTS
+
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx14.0.0"
+
+declare i32 @_Z3fooiiii(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef)
+
+define i32 @_Z2f1v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6)
+  ret i32 %1
+}
+
+define i32 @_Z2f2v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6)
+  ret i32 %1
+}
+
+define i32 @_Z2f3v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f4v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f5v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 8)
+  ret i32 %1
+}
+
+define i32 @_Z2f6v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 8)
+  ret i32 %1
+}
+
+; CHECK-BASELINE: <_OUTLINED_FUNCTION_0>:
+; CHECK-BASELINE-NEXT: mov     w0, #0x1
+; CHECK-BASELINE-NEXT: mov     w1, #0x2
+; CHECK-BASELINE-NEXT: mov     w2, #0x3
+; CHECK-BASELINE-NEXT: mov     w3, #0x4
+; CHECK-BASELINE-NEXT: mov     w4, #0x5
+; CHECK-BASELINE-NEXT: mov     w5, #0x6
+; CHECK-BASELINE-NEXT: b
+
+; CHECK-BASELINE: <_OUTLINED_FUNCTION_1>:
+; CHECK-BASELINE-NEXT: mov     w0, #0x1
+; CHECK-BASELINE-NEXT: mov     w1, #0x2
+; CHECK-BASELINE-NEXT: mov     w2, #0x3
+; CHECK-BASELINE-NEXT: mov     w3, #0x4
+; CHECK-BASELINE-NEXT: mov     w4, #0x5
+; CHECK-BASELINE-NEXT: mov     w5, #0x8
+; CHECK-BASELINE-NEXT: b
+
+; CHECK-BASELINE: <_OUTLINED_FUNCTION_2>:
+; CHECK-BASELINE-NEXT: mov     w0, #0x1
+; CHECK-BASELINE-NEXT: mov     w1, #0x2
+; CHECK-BASELINE-NEXT: mov     w2, #0x3
+; CHECK-BASELINE-NEXT: mov     w3, #0x4
+; CHECK-BASELINE-NEXT: mov     w4, #0x5
+; CHECK-BASELINE-NEXT: mov     w5, #0x7
+; CHECK-BASELINE-NEXT: b
+
+; CHECK-LEAF-DESCENDANTS: <_OUTLINED_FUNCTION_0>:
+; CHECK-LEAF-DESCENDANTS-NEXT: mov     w0, #0x1
+; CHECK-LEAF-DESCENDANTS-NEXT: mov     w1, #0x2
+; CHECK-LEAF-DESCENDANTS-NEXT: mov     w2, #0x3
+; CHECK-LEAF-DESCENDANTS-NEXT: mov     w3, #0x4
+; CHECK-LEAF-DESCENDANTS-NEXT: mov     w4, #0x5
+; CHECK-LEAF-DESCENDANTS-NEXT: ret
+
+; CHECK-LEAF-DESCENDANTS-NOT: <_OUTLINED_FUNCTION_1>:
+
+; CHECK-NO-CANDIDATE-NOT: <_OUTLINED_FUNCTION_0>:
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
index c1c2720dec6ad6..22e5edef2a9395 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
@@ -1,4 +1,4 @@
-# RUN: llc -verify-machineinstrs -run-pass=machine-outliner -run-pass=aarch64-ptrauth %s -o - | FileCheck %s
+# RUN: llc -verify-machineinstrs -run-pass=machine-outliner -run-pass=aarch64-ptrauth -outliner-leaf-descendants=false %s -o - | FileCheck %s
 
 --- |
   target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
index 9250718fc0d585..618973b9368d1d 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple aarch64-arm-linux-gnu --enable-machine-outliner \
+; RUN: llc -mtriple aarch64-arm-linux-gnu --enable-machine-outliner -outliner-leaf-descendants=false \
 ; RUN: -verify-machineinstrs %s -o - | FileCheck --check-prefixes CHECK,V8A %s
-; RUN-V83A: llc -mtriple aarch64 -enable-machine-outliner \
+; RUN-V83A: llc -mtriple aarch64 -enable-machine-outliner -outliner-leaf-descendants=false \
 ; RUN-V83A: -verify-machineinstrs -mattr=+v8.3a %s -o - > %t
 ; RUN-V83A: FileCheck --check-prefixes CHECK,V83A < %t %s
 
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll b/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
index aa6e31d6ff21d7..538e1165e39c1d 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64 -frame-pointer=non-leaf < %s | FileCheck %s --check-prefix=NOOMIT
-; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64 -frame-pointer=none < %s | FileCheck %s --check-prefix=OMITFP
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -outliner-leaf-descendants=false -mtriple=aarch64 -frame-pointer=non-leaf < %s | FileCheck %s --check-prefix=NOOMIT
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -outliner-leaf-descendants=false -mtriple=aarch64 -frame-pointer=none < %s | FileCheck %s --check-prefix=OMITFP
 
 define void @_Z1giii(i32 %x, i32 %y, i32 %z) minsize {
 ; NOOMIT-LABEL: _Z1giii:
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
index 8740aac0549eec..7e34adf16d25d3 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -enable-machine-outliner -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -enable-machine-outliner -outliner-leaf-descendants=false -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-pc-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner.mir b/llvm/test/CodeGen/AArch64/machine-outliner.mir
index 83eda744d24a9e..66779addaff0a8 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -verify-machineinstrs -frame-pointer=non-leaf %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -verify-machineinstrs -frame-pointer=non-leaf -outliner-leaf-descendants=false %s -o - | FileCheck %s
 --- |
 
   @x = common global i32 0, align 4
diff --git a/llvm/unittests/Support/SuffixTreeTest.cpp b/llvm/unittests/Support/SuffixTreeTest.cpp
index f5d8112ccf5cec..59f6dc9bf963fa 100644
--- a/llvm/unittests/Support/SuffixTreeTest.cpp
+++ b/llvm/unittests/Support/SuffixTreeTest.cpp
@@ -140,4 +140,138 @@ TEST(SuffixTreeTest, TestExclusion) {
   }
 }
 
+// Tests that the SuffixTree is able to find the following substrings:
+// {1, 1} at indices 0, 1, 2, 3, and 4;
+// {1, 1, 1} at indices 0, 1, 2, and 3;
+// {1, 1, 1, 1}  at indices 0, 1, and 2; and
+// {1, 1, 1, 1, 1} at indices 0 and 1.
+//
+// This is a FIX to the Test TestSingleCharacterRepeat
+TEST(SuffixTreeTest, TestSingleCharacterRepeatWithLeafDescendants) {
+  std::vector<unsigned> RepeatedRepetitionData = {1, 1, 1, 1, 1, 1, 2};
+  std::vector<unsigned>::iterator RRDIt, RRDIt2;
+  SuffixTree ST(RepeatedRepetitionData, true);
+  std::vector<SuffixTree::RepeatedSubstring> SubStrings;
+  for (auto It = ST.begin(); It != ST.end(); It++)
+    SubStrings.push_back(*It);
+  EXPECT_EQ(SubStrings.size(), 4u);
+  for (SuffixTree::RepeatedSubstring &RS : SubStrings) {
+    EXPECT_EQ(RS.StartIndices.size(),
+              RepeatedRepetitionData.size() - RS.Length);
+    for (unsigned StartIdx : SubStrings[0].StartIndices) {
+      RRDIt = RRDIt2 = RepeatedRepetitionData.begin();
+      std::advance(RRDIt, StartIdx);
+      std::advance(RRDIt2, StartIdx + SubStrings[0].Length);
+      ASSERT_TRUE(
+          all_of(make_range<std::vector<unsigned>::iterator>(RRDIt, RRDIt2),
+                 [](unsigned Elt) { return Elt == 1; }));
+    }
+  }
+}
+
+// Tests that the SuffixTree is able to find three substrings
+// {1, 2, 3} at indices 6 and 10;
+// {2, 3} at indices 7 and 11; and
+// {1, 2} at indicies 0 and 3.
+//
+// FIXME: {1, 2} has indices 6 and 10 missing as it is a substring of {1, 2, 3}
+// See Test TestSubstringRepeatsWithLeafDescendants for the FIX
+TEST(SuffixTreeTest, TestSubstringRepeats) {
+  std::vector<unsigned> RepeatedRepetitionData = {1, 2, 100, 1, 2, 101, 1,
+                                                  2, 3, 103, 1, 2, 3,   104};
+  SuffixTree ST(RepeatedRepetitionData);
+  std::vector<SuffixTree::RepeatedSubstring> SubStrings;
+  for (auto It = ST.begin(); It != ST.end(); It++)
+    SubStrings.push_back(*It);
+  EXPECT_EQ(SubStrings.size(), 3u);
+  unsigned Len;
+  for (SuffixTree::RepeatedSubstring &RS : SubStrings) {
+    Len = RS.Length;
+    bool IsExpectedLen = (Len == 3u || Len == 2u);
+    ASSERT_TRUE(IsExpectedLen);
+    bool IsExpectedIndex;
+
+    if (Len == 3u) { // {1, 2, 3}
+      EXPECT_EQ(RS.StartIndices.size(), 2u);
+      for (unsigned StartIdx : RS.StartIndices) {
+        IsExpectedIndex = (StartIdx == 6u || StartIdx == 10u);
+        EXPECT_TRUE(IsExpectedIndex);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx], 1u);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx + 2], 3u);
+      }
+    } else {
+      if (RepeatedRepetitionData[RS.StartIndices[0]] == 1u) { // {1, 2}
+        EXPECT_EQ(RS.StartIndices.size(), 2u);
+        for (unsigned StartIdx : RS.StartIndices) {
+          IsExpectedIndex = (StartIdx == 0u || StartIdx == 3u);
+          EXPECT_TRUE(IsExpectedIndex);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+        }
+      } else { // {2, 3}
+        EXPECT_EQ(RS.StartIndices.size(), 2u);
+        for (unsigned StartIdx : RS.StartIndices) {
+          IsExpectedIndex = (StartIdx == 7u || StartIdx == 11u);
+          EXPECT_TRUE(IsExpectedIndex);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx], 2u);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 3u);
+        }
+      }
+    }
+  }
+}
+
+// Tests that the SuffixTree is able to find three substrings
+// {1, 2, 3} at indices 6 and 10;
+// {2, 3} at indices 7 and 11; and
+// {1, 2} at indicies 0, 3, 6, and 10.
+//
+// This is a FIX to the Test TestSubstringRepeats
+
+TEST(SuffixTreeTest, TestSubstringRepeatsWithLeafDescendants) {
+  std::vector<unsigned> RepeatedRepetitionData = {1, 2, 100, 1, 2, 101, 1,
+                                                  2, 3, 103, 1, 2, 3,   104};
+  SuffixTree ST(RepeatedRepetitionData, true);
+  std::vector<SuffixTree::RepeatedSubstring> SubStrings;
+  for (auto It = ST.begin(); It != ST.end(); It++)
+    SubStrings.push_back(*It);
+  EXPECT_EQ(SubStrings.size(), 3u);
+  unsigned Len;
+  for (SuffixTree::RepeatedSubstring &RS : SubStrings) {
+    Len = RS.Length;
+    bool IsExpectedLen = (Len == 3u || Len == 2u);
+    ASSERT_TRUE(IsExpectedLen);
+    bool IsExpectedIndex;
+
+    if (Len == 3u) { // {1, 2, 3}
+      EXPECT_EQ(RS.StartIndices.size(), 2u);
+      for (unsigned StartIdx : RS.StartIndices) {
+        IsExpectedIndex = (StartIdx == 6u || StartIdx == 10u);
+        EXPECT_TRUE(IsExpectedIndex);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx], 1u);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx + 2], 3u);
+      }
+    } else {
+      if (RepeatedRepetitionData[RS.StartIndices[0]] == 1u) { // {1, 2}
+        EXPECT_EQ(RS.StartIndices.size(), 4u);
+        for (unsigned StartIdx : RS.StartIndices) {
+          IsExpectedIndex = (StartIdx == 0u || StartIdx == 3u ||
+                             StartIdx == 6u || StartIdx == 10u);
+          EXPECT_TRUE(IsExpectedIndex);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+        }
+      } else { // {2, 3}
+        EXPECT_EQ(RS.StartIndices.size(), 2u);
+        for (unsigned StartIdx : RS.StartIndices) {
+          IsExpectedIndex = (StartIdx == 7u || StartIdx == 11u);
+          EXPECT_TRUE(IsExpectedIndex);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx], 2u);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 3u);
+        }
+      }
+    }
+  }
+}
+
 } // namespace

>From 081bc050793c44c2e9c33ee371ab6df306c269e7 Mon Sep 17 00:00:00 2001
From: Xuan Zhang <xuanzh at meta.com>
Date: Thu, 25 Apr 2024 12:54:29 -0700
Subject: [PATCH 6/6] fix additional test cases for leaf descendants

---
 .../AArch64/machine-outliner-overlap.mir      | 24 ++++++++++++++++++-
 .../RISCV/machineoutliner-pcrel-lo.mir        |  8 +++----
 2 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir b/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir
index c6bd4c1d04d871..8b0e03dbee8d8d 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-overlap.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --include-generated-funcs
-# RUN: llc %s -mtriple aarch64 -debug-only=machine-outliner -run-pass=machine-outliner -o - 2>&1 | FileCheck %s
+# RUN: llc %s -mtriple aarch64 -outliner-leaf-descendants=false -debug-only=machine-outliner -run-pass=machine-outliner -o - 2>&1 | FileCheck %s
+# RUN: llc %s -mtriple aarch64 -debug-only=machine-outliner -run-pass=machine-outliner -o - 2>&1 | FileCheck %s --check-prefix=CHECK-LEAF
 # REQUIRES: asserts
 
 # CHECK: *** Discarding overlapping candidates ***
@@ -54,6 +55,27 @@ body:             |
     ; CHECK-NEXT: BL @OUTLINED_FUNCTION_0, implicit-def $lr, implicit $sp, implicit-def $lr, implicit-def $x8, implicit-def $x9, implicit $sp, implicit $x0, implicit $x9
     ; CHECK-NEXT: RET undef $x9
 
+    ; CHECK-LEAF-LABEL: name: overlap
+    ; CHECK-LEAF: liveins: $x0, $x9
+    ; CHECK-LEAF-NEXT: {{  $}}
+    ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+    ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+    ; CHECK-LEAF-NEXT: $x8 = ADDXri $x0, 3, 0
+    ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+    ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+    ; CHECK-LEAF-NEXT: $x8 = ADDXri $x0, 3, 0
+    ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+    ; CHECK-LEAF-NEXT: BL @OUTLINED_FUNCTION_0
+    ; CHECK-LEAF-NEXT: RET undef $x9
+
+    ; CHECK-LEAF-LABEL: name: OUTLINED_FUNCTION_0
+    ; CHECK-LEAF: liveins: $x0, $x9, $lr
+    ; CHECK-LEAF-NEXT: {{  $}}
+    ; CHECK-LEAF-NEXT: $x9 = ADDXri $x9, 16, 0
+    ; CHECK-LEAF-NEXT: $x9 = ADDXri $x9, 16, 0
+    ; CHECK-LEAF-NEXT: $x9 = ADDXri $x9, 16, 0
+    ; CHECK-LEAF-NEXT: RET $lr
+
     ; fixme: outline!
     $x9 = ADDXri $x9, 16, 0
     $x9 = ADDXri $x9, 16, 0
diff --git a/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir b/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir
index 34f7a93b6a168c..8a83543b0280fd 100644
--- a/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir
+++ b/llvm/test/CodeGen/RISCV/machineoutliner-pcrel-lo.mir
@@ -1,11 +1,11 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
+# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs -outliner-leaf-descendants=false < %s \
 # RUN: | FileCheck %s
-# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs < %s \
+# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir -verify-machineinstrs -outliner-leaf-descendants=false < %s \
 # RUN: | FileCheck %s
-# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir --function-sections -verify-machineinstrs < %s \
+# RUN: llc -mtriple=riscv32 -x mir -run-pass=machine-outliner -simplify-mir --function-sections -verify-machineinstrs -outliner-leaf-descendants=false < %s \
 # RUN: | FileCheck -check-prefix=CHECK-FS %s
-# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir --function-sections -verify-machineinstrs < %s \
+# RUN: llc -mtriple=riscv64 -x mir -run-pass=machine-outliner -simplify-mir --function-sections -verify-machineinstrs -outliner-leaf-descendants=false < %s \
 # RUN: | FileCheck -check-prefix=CHECK-FS %s
 
 --- |



More information about the llvm-commits mailing list