[llvm] [MachineOutliner] Consider Leaf Descendants as Outlining Candidates (PR #88996)

Xuan Zhang via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 16 14:58:08 PDT 2024


https://github.com/xuanzh-meta created https://github.com/llvm/llvm-project/pull/88996

In the current implementation, only leaf children of each internal node in the suffix tree are included as candidates for outlining. But all leaf descendants are outlining candidates, which we include in the new implementation.



>From 84db82fdb07928e5ba59a4f6e652dbfcfcd3acd7 Mon Sep 17 00:00:00 2001
From: Xuan Zhang <xuanzh at meta.com>
Date: Fri, 12 Apr 2024 09:59:15 -0700
Subject: [PATCH 1/3] efficient implementation of
 MachineOutliner::findCandidates()

---
 llvm/lib/CodeGen/MachineOutliner.cpp          | 11 ++---
 llvm/lib/Support/SuffixTree.cpp               |  5 +++
 .../Analysis/IRSimilarityIdentifier/basic.ll  | 26 ++++++------
 .../IRSimilarityIdentifier/different.ll       |  6 +--
 .../IROutliner/outlining-commutative.ll       | 20 +++++-----
 llvm/test/tools/llvm-sim/single-sim-file.test | 40 +++++++++----------
 llvm/test/tools/llvm-sim/single-sim.test      | 40 +++++++++----------
 7 files changed, 75 insertions(+), 73 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index dc2f5ef15206e8..e682d42c76747e 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -616,17 +616,14 @@ void MachineOutliner::findCandidates(
       // * End before the other starts
       // * Start after the other ends
       unsigned EndIdx = StartIdx + StringLen - 1;
-      auto FirstOverlap = find_if(
-          CandidatesForRepeatedSeq, [StartIdx, EndIdx](const Candidate &C) {
-            return EndIdx >= C.getStartIdx() && StartIdx <= C.getEndIdx();
-          });
-      if (FirstOverlap != CandidatesForRepeatedSeq.end()) {
+      if (CandidatesForRepeatedSeq.size() > 0 &&
+          StartIdx <= CandidatesForRepeatedSeq.back().getEndIdx()) {
 #ifndef NDEBUG
         ++NumDiscarded;
         LLVM_DEBUG(dbgs() << "    .. DISCARD candidate @ [" << StartIdx
                           << ", " << EndIdx << "]; overlaps with candidate @ ["
-                          << FirstOverlap->getStartIdx() << ", "
-                          << FirstOverlap->getEndIdx() << "]\n");
+                          << CandidatesForRepeatedSeq.back().getStartIdx() << ", "
+                          << CandidatesForRepeatedSeq.back().getEndIdx() << "]\n");
 #endif
         continue;
       }
diff --git a/llvm/lib/Support/SuffixTree.cpp b/llvm/lib/Support/SuffixTree.cpp
index eaa653078e0900..03ed1d02840aa1 100644
--- a/llvm/lib/Support/SuffixTree.cpp
+++ b/llvm/lib/Support/SuffixTree.cpp
@@ -274,6 +274,11 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
     RS.Length = Length;
     for (unsigned StartIdx : RepeatedSubstringStarts)
       RS.StartIndices.push_back(StartIdx);
+
+    // Sort the start indices so that we can efficiently check if candidates
+    // overlap with each other in MachineOutliner::findCandidates().
+    llvm::sort(RS.StartIndices);
+
     break;
   }
   // At this point, either NewRS is an empty RepeatedSubstring, or it was
diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll b/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll
index 1c08cb407c2e3c..b38e7d19973db6 100644
--- a/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll
+++ b/llvm/test/Analysis/IRSimilarityIdentifier/basic.ll
@@ -4,7 +4,7 @@
 ; This is a simple test to make sure the IRSimilarityIdentifier and
 ; IRSimilarityPrinterPass is working.
 
-; CHECK: 4 candidates of length 6.  Found in: 
+; CHECK: 4 candidates of length 6.  Found in:
 ; CHECK-NEXT:  Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:    Start Instruction:   store i32 1, ptr %1, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 6, ptr %6, align 4
@@ -17,7 +17,7 @@
 ; CHECK-NEXT:  Function: dog, Basic Block: entry
 ; CHECK-NEXT:    Start Instruction:   store i32 6, ptr %0, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 5, ptr %5, align 4
-; CHECK-NEXT:4 candidates of length 5.  Found in: 
+; CHECK-NEXT:4 candidates of length 5.  Found in:
 ; CHECK-NEXT:  Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:    Start Instruction:   store i32 2, ptr %2, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 6, ptr %6, align 4
@@ -30,7 +30,7 @@
 ; CHECK-NEXT:  Function: dog, Basic Block: entry
 ; CHECK-NEXT:    Start Instruction:   store i32 1, ptr %1, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 5, ptr %5, align 4
-; CHECK-NEXT:4 candidates of length 4.  Found in: 
+; CHECK-NEXT:4 candidates of length 4.  Found in:
 ; CHECK-NEXT:  Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:    Start Instruction:   store i32 3, ptr %3, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 6, ptr %6, align 4
@@ -43,7 +43,7 @@
 ; CHECK-NEXT:  Function: dog, Basic Block: entry
 ; CHECK-NEXT:    Start Instruction:   store i32 2, ptr %2, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 5, ptr %5, align 4
-; CHECK-NEXT:4 candidates of length 3.  Found in: 
+; CHECK-NEXT:4 candidates of length 3.  Found in:
 ; CHECK-NEXT:  Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:    Start Instruction:   store i32 4, ptr %4, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 6, ptr %6, align 4
@@ -56,7 +56,7 @@
 ; CHECK-NEXT:  Function: dog, Basic Block: entry
 ; CHECK-NEXT:    Start Instruction:   store i32 3, ptr %3, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 5, ptr %5, align 4
-; CHECK-NEXT:4 candidates of length 2.  Found in: 
+; CHECK-NEXT:4 candidates of length 2.  Found in:
 ; CHECK-NEXT:  Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:    Start Instruction:   store i32 5, ptr %5, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 6, ptr %6, align 4
@@ -70,40 +70,40 @@
 ; CHECK-NEXT:    Start Instruction:   store i32 4, ptr %4, align 4
 ; CHECK-NEXT:      End Instruction:   store i32 5, ptr %5, align 4
 
-define linkonce_odr void @fish() {
-entry:
-  %0 = alloca i32, align 4
+define void @turtle() {
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
   %4 = alloca i32, align 4
   %5 = alloca i32, align 4
-  store i32 6, ptr %0, align 4
+  %6 = alloca i32, align 4
   store i32 1, ptr %1, align 4
   store i32 2, ptr %2, align 4
   store i32 3, ptr %3, align 4
   store i32 4, ptr %4, align 4
   store i32 5, ptr %5, align 4
+  store i32 6, ptr %6, align 4
   ret void
 }
 
-define void @turtle() {
+define void @cat() {
+entry:
+  %0 = alloca i32, align 4
   %1 = alloca i32, align 4
   %2 = alloca i32, align 4
   %3 = alloca i32, align 4
   %4 = alloca i32, align 4
   %5 = alloca i32, align 4
-  %6 = alloca i32, align 4
+  store i32 6, ptr %0, align 4
   store i32 1, ptr %1, align 4
   store i32 2, ptr %2, align 4
   store i32 3, ptr %3, align 4
   store i32 4, ptr %4, align 4
   store i32 5, ptr %5, align 4
-  store i32 6, ptr %6, align 4
   ret void
 }
 
-define void @cat() {
+define linkonce_odr void @fish() {
 entry:
   %0 = alloca i32, align 4
   %1 = alloca i32, align 4
diff --git a/llvm/test/Analysis/IRSimilarityIdentifier/different.ll b/llvm/test/Analysis/IRSimilarityIdentifier/different.ll
index e5c9970b159b9f..70d422077c3e9c 100644
--- a/llvm/test/Analysis/IRSimilarityIdentifier/different.ll
+++ b/llvm/test/Analysis/IRSimilarityIdentifier/different.ll
@@ -14,11 +14,11 @@
 ; CHECK-NEXT:       End Instruction:   store i32 5, ptr %5, align 4
 ; CHECK-NEXT: 2 candidates of length 3.  Found in:
 ; CHECK-NEXT:   Function: turtle, Basic Block: (unnamed)
-; CHECK-NEXT:     Start Instruction:   %b = load i32, ptr %1, align 4
-; CHECK-NEXT:       End Instruction:   %d = load i32, ptr %3, align 4
-; CHECK-NEXT:   Function: turtle, Basic Block: (unnamed)
 ; CHECK-NEXT:     Start Instruction:   %a = load i32, ptr %0, align 4
 ; CHECK-NEXT:       End Instruction:   %c = load i32, ptr %2, align 4
+; CHECK-NEXT:   Function: turtle, Basic Block: (unnamed)
+; CHECK-NEXT:     Start Instruction:   %b = load i32, ptr %1, align 4
+; CHECK-NEXT:       End Instruction:   %d = load i32, ptr %3, align 4
 
 define linkonce_odr void @fish() {
 entry:
diff --git a/llvm/test/Transforms/IROutliner/outlining-commutative.ll b/llvm/test/Transforms/IROutliner/outlining-commutative.ll
index 8862dc295d4351..1534829bad7ba7 100644
--- a/llvm/test/Transforms/IROutliner/outlining-commutative.ll
+++ b/llvm/test/Transforms/IROutliner/outlining-commutative.ll
@@ -123,7 +123,7 @@ define void @outline_from_sub1() {
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]])
+; CHECK-NEXT:    call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -148,7 +148,7 @@ define void @outline_from_sub2() {
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]])
+; CHECK-NEXT:    call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -173,7 +173,7 @@ define void @dontoutline_from_flipped_sub3() {
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]])
+; CHECK-NEXT:    call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -198,7 +198,7 @@ define void @dontoutline_from_flipped_sub4() {
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[C:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    call void @outlined_ir_func_1(ptr [[A]], ptr [[B]], ptr [[C]])
+; CHECK-NEXT:    call void @outlined_ir_func_2(ptr [[A]], ptr [[B]], ptr [[C]])
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -237,9 +237,9 @@ entry:
 ; CHECK-NEXT:    [[AL:%.*]] = load i32, ptr [[ARG0]], align 4
 ; CHECK-NEXT:    [[BL:%.*]] = load i32, ptr [[ARG1]], align 4
 ; CHECK-NEXT:    [[CL:%.*]] = load i32, ptr [[ARG2]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[BL]], [[AL]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[CL]], [[AL]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[CL]], [[BL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[AL]], [[BL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[AL]], [[CL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BL]], [[CL]]
 
 ; CHECK: define internal void @outlined_ir_func_2(ptr [[ARG0:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]]) #0 {
 ; CHECK: entry_to_outline:
@@ -249,6 +249,6 @@ entry:
 ; CHECK-NEXT:    [[AL:%.*]] = load i32, ptr [[ARG0]], align 4
 ; CHECK-NEXT:    [[BL:%.*]] = load i32, ptr [[ARG1]], align 4
 ; CHECK-NEXT:    [[CL:%.*]] = load i32, ptr [[ARG2]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[AL]], [[BL]]
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[AL]], [[CL]]
-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BL]], [[CL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[BL]], [[AL]]
+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[CL]], [[AL]]
+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[CL]], [[BL]]
diff --git a/llvm/test/tools/llvm-sim/single-sim-file.test b/llvm/test/tools/llvm-sim/single-sim-file.test
index cef14b36085005..4279931f36cdf2 100644
--- a/llvm/test/tools/llvm-sim/single-sim-file.test
+++ b/llvm/test/tools/llvm-sim/single-sim-file.test
@@ -6,52 +6,52 @@
 # CHECK: {
 # CHECK-NEXT: "1": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 14,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 4,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 14,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "2": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 15,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 5,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 15,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "3": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 16,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 6,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 16,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "4": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 17,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 7,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 17,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "5": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 18,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 8,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 18,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ]
 # CHECK-NEXT:}
diff --git a/llvm/test/tools/llvm-sim/single-sim.test b/llvm/test/tools/llvm-sim/single-sim.test
index 0095ec6acbc588..3300b5cbda31a5 100644
--- a/llvm/test/tools/llvm-sim/single-sim.test
+++ b/llvm/test/tools/llvm-sim/single-sim.test
@@ -5,52 +5,52 @@
 # CHECK: {
 # CHECK-NEXT: "1": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 14,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 4,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 14,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "2": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 15,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 5,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 15,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "3": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 16,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 6,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 16,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "4": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 17,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 7,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 17,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ],
 # CHECK-NEXT: "5": [
 # CHECK-NEXT:  {
-# CHECK-NEXT:   "start": 18,
-# CHECK-NEXT:   "end": 19
-# CHECK-NEXT:  },
-# CHECK-NEXT:  {
 # CHECK-NEXT:   "start": 8,
 # CHECK-NEXT:   "end": 9
+# CHECK-NEXT:  },
+# CHECK-NEXT:  {
+# CHECK-NEXT:   "start": 18,
+# CHECK-NEXT:   "end": 19
 # CHECK-NEXT:  }
 # CHECK-NEXT: ]
 # CHECK-NEXT:}

>From 010bc6e9596844a69ba1ddf60ff0174d1d3c4bc3 Mon Sep 17 00:00:00 2001
From: Xuan Zhang <xuanzh at meta.com>
Date: Fri, 12 Apr 2024 10:55:13 -0700
Subject: [PATCH 2/3] outlining order based on priority instead of benefits

---
 llvm/lib/CodeGen/MachineOutliner.cpp          | 10 +-
 .../machine-outliner-sort-per-priority.ll     | 96 +++++++++++++++++++
 2 files changed, 104 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll

diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index e682d42c76747e..341c94e7adf2ee 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -825,10 +825,16 @@ bool MachineOutliner::outline(Module &M,
                     << "\n");
   bool OutlinedSomething = false;
 
-  // Sort by benefit. The most beneficial functions should be outlined first.
+  // Sort by priority where priority := getNotOutlinedCost / getOutliningCost.
+  // The function with highest priority should be outlined first.
   stable_sort(FunctionList,
               [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
-                return LHS.getBenefit() > RHS.getBenefit();
+                if (LHS.getBenefit() == 0)
+                  return false;
+                if (LHS.getBenefit() > 0 && RHS.getBenefit() == 0)
+                  return true;
+                return LHS.getNotOutlinedCost() * RHS.getOutliningCost() >
+                       RHS.getNotOutlinedCost() * LHS.getOutliningCost();
               });
 
   // Walk over each function, outlining them as we go along. Functions are
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll b/llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll
new file mode 100644
index 00000000000000..00efc3c6e71c89
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-sort-per-priority.ll
@@ -0,0 +1,96 @@
+; This tests the order in which functions are outlined in MachineOutliner
+; There are TWO key OutlinedFunction in FunctionList
+;
+; ===================== First One =====================
+;   ```
+;     mov     w0, #1
+;     mov     w1, #2
+;     mov     w2, #3
+;     mov     w3, #4
+;     mov     w4, #5
+;   ```
+; It has:
+;   - `SequenceSize=20` and `OccurrenceCount=6`
+;   - each Candidate has `CallOverhead=12` and `FrameOverhead=4`
+;   - `NotOutlinedCost=20*6=120` and `OutliningCost=12*6+20+4=96`
+;   - `Benefit=120-96=24` and `Priority=120/96=1.25`
+;
+; ===================== Second One =====================
+;   ```
+;     mov     w6, #6
+;     mov     w7, #7
+;     b
+;   ```
+; It has:
+;   - `SequenceSize=12` and `OccurrenceCount=4`
+;   - each Candidate has `CallOverhead=4` and `FrameOverhead=0`
+;   - `NotOutlinedCost=12*4=48` and `OutliningCost=4*4+12+0=28`
+;   - `Benefit=120-96=20` and `Priority=48/28=1.71`
+;
+; Note that the first one has higher benefit, but lower priority.
+; Hence, when outlining per priority, the second one will be outlined first.
+
+; RUN: llc %s -enable-machine-outliner=always -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-SORT-BY-PRIORITY
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-benefit-threshold=22 -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-THRESHOLD
+
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx14.0.0"
+
+declare i32 @_Z3fooiiii(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef)
+
+define i32 @_Z2f1v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 11, i32 noundef 6, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f2v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 12, i32 noundef 6, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f3v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 13, i32 noundef 6, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f4v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 14, i32 noundef 6, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f5v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 15, i32 noundef 8, i32 noundef 9)
+  ret i32 %1
+}
+
+define i32 @_Z2f6v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 16, i32 noundef 9, i32 noundef 8)
+  ret i32 %1
+}
+
+; CHECK-SORT-BY-PRIORITY: <_OUTLINED_FUNCTION_0>:
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w6, #0x6
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w7, #0x7
+; CHECK-SORT-BY-PRIORITY-NEXT: b
+
+; CHECK-SORT-BY-PRIORITY: <_OUTLINED_FUNCTION_1>:
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w0, #0x1
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w1, #0x2
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w2, #0x3
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w3, #0x4
+; CHECK-SORT-BY-PRIORITY-NEXT: mov     w4, #0x5
+; CHECK-SORT-BY-PRIORITY-NEXT: ret
+
+; CHECK-THRESHOLD: <_OUTLINED_FUNCTION_0>:
+; CHECK-THRESHOLD-NEXT: mov     w0, #0x1
+; CHECK-THRESHOLD-NEXT: mov     w1, #0x2
+; CHECK-THRESHOLD-NEXT: mov     w2, #0x3
+; CHECK-THRESHOLD-NEXT: mov     w3, #0x4
+; CHECK-THRESHOLD-NEXT: mov     w4, #0x5
+; CHECK-THRESHOLD-NEXT: ret
+
+; CHECK-THRESHOLD-NOT: <_OUTLINED_FUNCTION_1>:

>From 8ab0551be7279b65fa89f255895f5886a72d4764 Mon Sep 17 00:00:00 2001
From: Xuan Zhang <xuanzh at meta.com>
Date: Fri, 12 Apr 2024 10:57:51 -0700
Subject: [PATCH 3/3] consider leaf descendants to include more candidates for
 outlining

---
 llvm/include/llvm/Support/SuffixTree.h        |  33 ++++-
 llvm/include/llvm/Support/SuffixTreeNode.h    |  25 +++-
 llvm/lib/CodeGen/MachineOutliner.cpp          |   8 +-
 llvm/lib/Support/SuffixTree.cpp               |  83 ++++++++++-
 llvm/lib/Support/SuffixTreeNode.cpp           |   5 +
 .../machine-outliner-cfi-tail-some.mir        |   2 +-
 .../machine-outliner-leaf-descendants.ll      | 124 ++++++++++++++++
 .../machine-outliner-retaddr-sign-sp-mod.mir  |   2 +-
 .../machine-outliner-retaddr-sign-thunk.ll    |   4 +-
 .../AArch64/machine-outliner-throw2.ll        |   4 +-
 .../CodeGen/AArch64/machine-outliner-thunk.ll |   2 +-
 .../test/CodeGen/AArch64/machine-outliner.mir |   2 +-
 llvm/unittests/Support/SuffixTreeTest.cpp     | 134 ++++++++++++++++++
 13 files changed, 413 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll

diff --git a/llvm/include/llvm/Support/SuffixTree.h b/llvm/include/llvm/Support/SuffixTree.h
index 4940fbbf308d8b..90f8aae60a575b 100644
--- a/llvm/include/llvm/Support/SuffixTree.h
+++ b/llvm/include/llvm/Support/SuffixTree.h
@@ -42,6 +42,9 @@ class SuffixTree {
   /// Each element is an integer representing an instruction in the module.
   ArrayRef<unsigned> Str;
 
+  /// Whether to consider leaf descendants or only leaf children.
+  bool OutlinerLeafDescendants;
+
   /// A repeated substring in the tree.
   struct RepeatedSubstring {
     /// The length of the string.
@@ -130,11 +133,27 @@ class SuffixTree {
   /// this step.
   unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd);
 
+  /// This vector contains all leaf nodes of this suffix tree. These leaf nodes
+  /// are identified using post-order depth-first traversal, so that the order
+  /// of these leaf nodes in the vector matches the order of the leaves in the
+  /// tree from left to right if one were to draw the tree on paper.
+  std::vector<SuffixTreeLeafNode *> LeafNodes;
+
+  /// Perform a post-order depth-first traversal of the tree and perform two
+  /// tasks during the traversal. The first is to populate LeafNodes, adding
+  /// nodes in order of the traversal. The second is to keep track of the leaf
+  /// descendants of every internal node by assigning values to LeftLeafIndex
+  /// and RightLefIndex fields of SuffixTreeNode for all internal nodes.
+  void setLeafNodes();
+
 public:
   /// Construct a suffix tree from a sequence of unsigned integers.
   ///
   /// \param Str The string to construct the suffix tree for.
-  SuffixTree(const ArrayRef<unsigned> &Str);
+  /// \param OutlinerLeafDescendants Whether to consider leaf descendants or
+  /// only leaf children (used by Machine Outliner).
+  SuffixTree(const ArrayRef<unsigned> &Str,
+             bool OutlinerLeafDescendants = false);
 
   /// Iterator for finding all repeated substrings in the suffix tree.
   struct RepeatedSubstringIterator {
@@ -154,6 +173,12 @@ class SuffixTree {
     /// instruction lengths.
     const unsigned MinLength = 2;
 
+    /// Vector of leaf nodes of the suffix tree.
+    const std::vector<SuffixTreeLeafNode *> &LeafNodes;
+
+    /// Whether to consider leaf descendants or only leaf children.
+    bool OutlinerLeafDescendants = !LeafNodes.empty();
+
     /// Move the iterator to the next repeated substring.
     void advance();
 
@@ -179,7 +204,9 @@ class SuffixTree {
       return !(*this == Other);
     }
 
-    RepeatedSubstringIterator(SuffixTreeInternalNode *N) : N(N) {
+    RepeatedSubstringIterator(
+        SuffixTreeInternalNode *N, const std::vector<SuffixTreeLeafNode *> &LeafNodes = {})
+        : N(N), LeafNodes(LeafNodes) {
       // Do we have a non-null node?
       if (!N)
         return;
@@ -191,7 +218,7 @@ class SuffixTree {
   };
 
   typedef RepeatedSubstringIterator iterator;
-  iterator begin() { return iterator(Root); }
+  iterator begin() { return iterator(Root, LeafNodes); }
   iterator end() { return iterator(nullptr); }
 };
 
diff --git a/llvm/include/llvm/Support/SuffixTreeNode.h b/llvm/include/llvm/Support/SuffixTreeNode.h
index 7d0d1cf0c58b95..84b590f2deb0cd 100644
--- a/llvm/include/llvm/Support/SuffixTreeNode.h
+++ b/llvm/include/llvm/Support/SuffixTreeNode.h
@@ -46,6 +46,17 @@ struct SuffixTreeNode {
   /// the root to this node.
   unsigned ConcatLen = 0;
 
+  /// These two indices give a range of indices for its leaf descendants.
+  /// Imagine drawing a tree on paper and assigning a unique index to each leaf
+  /// node in monotonically increasing order from left to right. This way of
+  /// numbering the leaf nodes allows us to associate a continuous range of
+  /// indices with each internal node. For example, if a node has leaf
+  /// descendants with indices i, i+1, ..., j, then its LeftLeafIdx is i and
+  /// its RightLeafIdx is j. These indices are for LeafNodes in the SuffixTree
+  /// class, which is constructed using post-order depth-first traversal.
+  unsigned LeftLeafIdx = EmptyIdx;
+  unsigned RightLeafIdx = EmptyIdx;
+
 public:
   // LLVM RTTI boilerplate.
   NodeKind getKind() const { return Kind; }
@@ -56,6 +67,18 @@ struct SuffixTreeNode {
   /// \returns the end index of this node.
   virtual unsigned getEndIdx() const = 0;
 
+  /// \return the index of this node's left most leaf node.
+  unsigned getLeftLeafIdx() const;
+
+  /// \return the index of this node's right most leaf node.
+  unsigned getRightLeafIdx() const;
+
+  /// Set the index of the left most leaf node of this node to \p Idx.
+  void setLeftLeafIdx(unsigned Idx);
+
+  /// Set the index of the right most leaf node of this node to \p Idx.
+  void setRightLeafIdx(unsigned Idx);
+
   /// Advance this node's StartIdx by \p Inc.
   void incrementStartIdx(unsigned Inc);
 
@@ -168,4 +191,4 @@ struct SuffixTreeLeafNode : SuffixTreeNode {
   virtual ~SuffixTreeLeafNode() = default;
 };
 } // namespace llvm
-#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H
\ No newline at end of file
+#endif // LLVM_SUPPORT_SUFFIXTREE_NODE_H
diff --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 341c94e7adf2ee..db197291d2d0d5 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -121,6 +121,12 @@ static cl::opt<unsigned> OutlinerBenefitThreshold(
     cl::desc(
         "The minimum size in bytes before an outlining candidate is accepted"));
 
+static cl::opt<bool> OutlinerLeafDescendants(
+    "outliner-leaf-descendants", cl::init(true), cl::Hidden,
+    cl::desc("Consider all leaf descendants of internal nodes of the suffix "
+             "tree as candidates for outlining (if false, only leaf children "
+             "are considered)"));
+
 namespace {
 
 /// Maps \p MachineInstrs to unsigned integers and stores the mappings.
@@ -576,7 +582,7 @@ void MachineOutliner::emitOutlinedFunctionRemark(OutlinedFunction &OF) {
 void MachineOutliner::findCandidates(
     InstructionMapper &Mapper, std::vector<OutlinedFunction> &FunctionList) {
   FunctionList.clear();
-  SuffixTree ST(Mapper.UnsignedVec);
+  SuffixTree ST(Mapper.UnsignedVec, OutlinerLeafDescendants);
 
   // First, find all of the repeated substrings in the tree of minimum length
   // 2.
diff --git a/llvm/lib/Support/SuffixTree.cpp b/llvm/lib/Support/SuffixTree.cpp
index 03ed1d02840aa1..481233bfd4790b 100644
--- a/llvm/lib/Support/SuffixTree.cpp
+++ b/llvm/lib/Support/SuffixTree.cpp
@@ -11,9 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SuffixTree.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/SuffixTreeNode.h"
+#include <stack>
 
 using namespace llvm;
 
@@ -26,7 +28,9 @@ static size_t numElementsInSubstring(const SuffixTreeNode *N) {
   return N->getEndIdx() - N->getStartIdx() + 1;
 }
 
-SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
+SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str,
+                       bool OutlinerLeafDescendants)
+    : Str(Str), OutlinerLeafDescendants(OutlinerLeafDescendants) {
   Root = insertRoot();
   Active.Node = Root;
 
@@ -46,6 +50,11 @@ SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str) : Str(Str) {
   // Set the suffix indices of each leaf.
   assert(Root && "Root node can't be nullptr!");
   setSuffixIndices();
+
+  // Collect all leaf nodes of the suffix tree. And for each internal node,
+  // record the range of leaf nodes that are descendants of it.
+  if (OutlinerLeafDescendants)
+    setLeafNodes();
 }
 
 SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
@@ -105,6 +114,68 @@ void SuffixTree::setSuffixIndices() {
   }
 }
 
+void SuffixTree::setLeafNodes() {
+  // A stack that keeps track of nodes to visit for post-order DFS traversal.
+  std::stack<SuffixTreeNode *> ToVisit;
+  ToVisit.push(Root);
+
+  // This keeps track of the index of the next leaf node to be added to
+  // the LeafNodes vector of the suffix tree.
+  unsigned LeafCounter = 0;
+
+  // This keeps track of nodes whose children have been added to the stack
+  // during the post-order depth-first traversal of the tree.
+  llvm::SmallPtrSet<SuffixTreeInternalNode *, 32> ChildrenAddedToStack;
+
+  // Traverse the tree in post-order.
+  while (!ToVisit.empty()) {
+    SuffixTreeNode *CurrNode = ToVisit.top();
+    ToVisit.pop();
+    if (auto *CurrInternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) {
+      // The current node is an internal node.
+      if (ChildrenAddedToStack.find(CurrInternalNode) !=
+          ChildrenAddedToStack.end()) {
+        // If the children of the current node has been added to the stack,
+        // then this is the second time we visit this node and at this point,
+        // all of its children have already been processed. Now, we can
+        // set its LeftLeafIdx and RightLeafIdx;
+        auto it = CurrInternalNode->Children.begin();
+        if (it != CurrInternalNode->Children.end()) {
+          // Get the first child to use its RightLeafIdx. The RightLeafIdx is
+          // used as the first child is the initial one added to the stack, so
+          // it's the last one to be processed. This implies that the leaf
+          // descendants of the first child are assigned the largest index
+          // numbers.
+          CurrNode->setRightLeafIdx(it->second->getRightLeafIdx());
+          // get the last child to use its LeftLeafIdx.
+          while (std::next(it) != CurrInternalNode->Children.end())
+            it = std::next(it);
+          CurrNode->setLeftLeafIdx(it->second->getLeftLeafIdx());
+          assert(CurrNode->getLeftLeafIdx() <= CurrNode->getRightLeafIdx() &&
+                 "LeftLeafIdx should not be larger than RightLeafIdx");
+        }
+      } else {
+        // This is the first time we visit this node. This means that its
+        // children have not been added to the stack yet. Hence, we will add
+        // the current node back to the stack and add its children to the
+        // stack for processing.
+        ToVisit.push(CurrNode);
+        for (auto &ChildPair : CurrInternalNode->Children)
+          ToVisit.push(ChildPair.second);
+        ChildrenAddedToStack.insert(CurrInternalNode);
+      }
+    } else {
+      // The current node is a leaf node.
+      // We can simplyset its LeftLeafIdx and RightLeafIdx.
+      CurrNode->setLeftLeafIdx(LeafCounter);
+      CurrNode->setRightLeafIdx(LeafCounter);
+      LeafCounter++;
+      auto *CurrLeafNode = cast<SuffixTreeLeafNode>(CurrNode);
+      LeafNodes.push_back(CurrLeafNode);
+    }
+  }
+}
+
 unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
   SuffixTreeInternalNode *NeedsLink = nullptr;
 
@@ -230,6 +301,7 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
 
   // Each leaf node represents a repeat of a string.
   SmallVector<unsigned> RepeatedSubstringStarts;
+  SmallVector<SuffixTreeLeafNode *> LeafDescendants;
 
   // Continue visiting nodes until we find one which repeats more than once.
   while (!InternalNodesToVisit.empty()) {
@@ -252,7 +324,7 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
         continue;
       }
 
-      if (Length < MinLength)
+      if (Length < MinLength || OutlinerLeafDescendants)
         continue;
 
       // Have an occurrence of a potentially repeated string. Save it.
@@ -260,6 +332,13 @@ void SuffixTree::RepeatedSubstringIterator::advance() {
       RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
     }
 
+    if (OutlinerLeafDescendants && Length >= MinLength) {
+      LeafDescendants.assign(LeafNodes.begin() + Curr->getLeftLeafIdx(),
+                             LeafNodes.begin() + Curr->getRightLeafIdx() + 1);
+      for (SuffixTreeLeafNode *Leaf : LeafDescendants)
+        RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
+    }
+
     // The root never represents a repeated substring. If we're looking at
     // that, then skip it.
     if (Curr->isRoot())
diff --git a/llvm/lib/Support/SuffixTreeNode.cpp b/llvm/lib/Support/SuffixTreeNode.cpp
index 113b990fd352fc..9f1f94a39895e8 100644
--- a/llvm/lib/Support/SuffixTreeNode.cpp
+++ b/llvm/lib/Support/SuffixTreeNode.cpp
@@ -38,3 +38,8 @@ unsigned SuffixTreeLeafNode::getEndIdx() const {
 
 unsigned SuffixTreeLeafNode::getSuffixIdx() const { return SuffixIdx; }
 void SuffixTreeLeafNode::setSuffixIdx(unsigned Idx) { SuffixIdx = Idx; }
+
+unsigned SuffixTreeNode::getLeftLeafIdx() const { return LeftLeafIdx; }
+unsigned SuffixTreeNode::getRightLeafIdx() const { return RightLeafIdx; }
+void SuffixTreeNode::setLeftLeafIdx(unsigned Idx) { LeftLeafIdx = Idx; }
+void SuffixTreeNode::setRightLeafIdx(unsigned Idx) { RightLeafIdx = Idx; }
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir b/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir
index 67d411962ce4f7..3afa1d5559a585 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-cfi-tail-some.mir
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64-apple-unknown -run-pass=machine-outliner -verify-machineinstrs -outliner-leaf-descendants=false %s -o - | FileCheck %s
 
 # Outlining CFI instructions is unsafe if we cannot outline all of the CFI
 # instructions from a function.  This shows that we choose not to outline the
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll b/llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll
new file mode 100644
index 00000000000000..bdaf653a79566d
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-leaf-descendants.ll
@@ -0,0 +1,124 @@
+; This test is mainly for the -outliner-leaf-descendants flag for MachineOutliner.
+;
+; ===================== -outliner-leaf-descendants=false =====================
+; MachineOutliner finds THREE key `OutlinedFunction` and outlines them. They are:
+;   ```
+;     mov     w0, #1
+;     mov     w1, #2
+;     mov     w2, #3
+;     mov     w3, #4
+;     mov     w4, #5
+;     mov     w5, #6 or #7 or #8
+;     b
+;   ```
+; Each has:
+;   - `SequenceSize=28` and `OccurrenceCount=2`
+;   - each Candidate has `CallOverhead=4` and `FrameOverhead=0`
+;   - `NotOutlinedCost=28*2=56` and `OutliningCost=4*2+28+0=36`
+;   - `Benefit=56-36=20` and `Priority=56/36=1.56`
+;
+; ===================== -outliner-leaf-descendants=false =====================
+; MachineOutliner finds a FOURTH key `OutlinedFunction`, which is:
+;   ```
+;   mov     w0, #1
+;   mov     w1, #2
+;   mov     w2, #3
+;   mov     w3, #4
+;   mov     w4, #5
+;   ```
+; This corresponds to an internal node that has ZERO leaf children, but SIX leaf descendants.
+; It has:
+;   - `SequenceSize=20` and `OccurrenceCount=6`
+;   - each Candidate has `CallOverhead=12` and `FrameOverhead=4`
+;   - `NotOutlinedCost=20*6=120` and `OutliningCost=12*6+20+4=96`
+;   - `Benefit=120-96=24` and `Priority=120/96=1.25`
+;
+; The FOURTH `OutlinedFunction` has lower _priority_ compared to the first THREE `OutlinedFunction`
+; Hence, if we additionally include the `-sort-per-priority` flag,  the first THREE `OutlinedFunction` are outlined.
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=false -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-BASELINE
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=false -outliner-benefit-threshold=22 -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-NO-CANDIDATE
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=true -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-BASELINE
+
+; RUN: llc %s -enable-machine-outliner=always -outliner-leaf-descendants=true -outliner-benefit-threshold=22 -filetype=obj -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=CHECK-LEAF-DESCENDANTS
+
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-macosx14.0.0"
+
+declare i32 @_Z3fooiiii(i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef, i32 noundef)
+
+define i32 @_Z2f1v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6)
+  ret i32 %1
+}
+
+define i32 @_Z2f2v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 6)
+  ret i32 %1
+}
+
+define i32 @_Z2f3v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f4v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 7)
+  ret i32 %1
+}
+
+define i32 @_Z2f5v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 8)
+  ret i32 %1
+}
+
+define i32 @_Z2f6v() minsize {
+  %1 = tail call i32 @_Z3fooiiii(i32 noundef 1, i32 noundef 2, i32 noundef 3, i32 noundef 4, i32 noundef 5, i32 noundef 8)
+  ret i32 %1
+}
+
+; CHECK-BASELINE: <_OUTLINED_FUNCTION_0>:
+; CHECK-BASELINE-NEXT: mov     w0, #0x1
+; CHECK-BASELINE-NEXT: mov     w1, #0x2
+; CHECK-BASELINE-NEXT: mov     w2, #0x3
+; CHECK-BASELINE-NEXT: mov     w3, #0x4
+; CHECK-BASELINE-NEXT: mov     w4, #0x5
+; CHECK-BASELINE-NEXT: mov     w5, #0x6
+; CHECK-BASELINE-NEXT: b
+
+; CHECK-BASELINE: <_OUTLINED_FUNCTION_1>:
+; CHECK-BASELINE-NEXT: mov     w0, #0x1
+; CHECK-BASELINE-NEXT: mov     w1, #0x2
+; CHECK-BASELINE-NEXT: mov     w2, #0x3
+; CHECK-BASELINE-NEXT: mov     w3, #0x4
+; CHECK-BASELINE-NEXT: mov     w4, #0x5
+; CHECK-BASELINE-NEXT: mov     w5, #0x8
+; CHECK-BASELINE-NEXT: b
+
+; CHECK-BASELINE: <_OUTLINED_FUNCTION_2>:
+; CHECK-BASELINE-NEXT: mov     w0, #0x1
+; CHECK-BASELINE-NEXT: mov     w1, #0x2
+; CHECK-BASELINE-NEXT: mov     w2, #0x3
+; CHECK-BASELINE-NEXT: mov     w3, #0x4
+; CHECK-BASELINE-NEXT: mov     w4, #0x5
+; CHECK-BASELINE-NEXT: mov     w5, #0x7
+; CHECK-BASELINE-NEXT: b
+
+; CHECK-LEAF-DESCENDANTS: <_OUTLINED_FUNCTION_0>:
+; CHECK-LEAF-DESCENDANTS-NEXT: mov     w0, #0x1
+; CHECK-LEAF-DESCENDANTS-NEXT: mov     w1, #0x2
+; CHECK-LEAF-DESCENDANTS-NEXT: mov     w2, #0x3
+; CHECK-LEAF-DESCENDANTS-NEXT: mov     w3, #0x4
+; CHECK-LEAF-DESCENDANTS-NEXT: mov     w4, #0x5
+; CHECK-LEAF-DESCENDANTS-NEXT: ret
+
+; CHECK-LEAF-DESCENDANTS-NOT: <_OUTLINED_FUNCTION_1>:
+
+; CHECK-NO-CANDIDATE-NOT: <_OUTLINED_FUNCTION_0>:
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
index c1c2720dec6ad6..22e5edef2a9395 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-sp-mod.mir
@@ -1,4 +1,4 @@
-# RUN: llc -verify-machineinstrs -run-pass=machine-outliner -run-pass=aarch64-ptrauth %s -o - | FileCheck %s
+# RUN: llc -verify-machineinstrs -run-pass=machine-outliner -run-pass=aarch64-ptrauth -outliner-leaf-descendants=false %s -o - | FileCheck %s
 
 --- |
   target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
index 9250718fc0d585..618973b9368d1d 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-retaddr-sign-thunk.ll
@@ -1,6 +1,6 @@
-; RUN: llc -mtriple aarch64-arm-linux-gnu --enable-machine-outliner \
+; RUN: llc -mtriple aarch64-arm-linux-gnu --enable-machine-outliner -outliner-leaf-descendants=false \
 ; RUN: -verify-machineinstrs %s -o - | FileCheck --check-prefixes CHECK,V8A %s
-; RUN-V83A: llc -mtriple aarch64 -enable-machine-outliner \
+; RUN-V83A: llc -mtriple aarch64 -enable-machine-outliner -outliner-leaf-descendants=false \
 ; RUN-V83A: -verify-machineinstrs -mattr=+v8.3a %s -o - > %t
 ; RUN-V83A: FileCheck --check-prefixes CHECK,V83A < %t %s
 
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll b/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
index aa6e31d6ff21d7..538e1165e39c1d 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-throw2.ll
@@ -1,5 +1,5 @@
-; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64 -frame-pointer=non-leaf < %s | FileCheck %s --check-prefix=NOOMIT
-; RUN: llc -verify-machineinstrs -enable-machine-outliner -mtriple=aarch64 -frame-pointer=none < %s | FileCheck %s --check-prefix=OMITFP
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -outliner-leaf-descendants=false -mtriple=aarch64 -frame-pointer=non-leaf < %s | FileCheck %s --check-prefix=NOOMIT
+; RUN: llc -verify-machineinstrs -enable-machine-outliner -outliner-leaf-descendants=false -mtriple=aarch64 -frame-pointer=none < %s | FileCheck %s --check-prefix=OMITFP
 
 define void @_Z1giii(i32 %x, i32 %y, i32 %z) minsize {
 ; NOOMIT-LABEL: _Z1giii:
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
index 8740aac0549eec..7e34adf16d25d3 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-thunk.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -enable-machine-outliner -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -enable-machine-outliner -outliner-leaf-descendants=false -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-pc-linux-gnu"
diff --git a/llvm/test/CodeGen/AArch64/machine-outliner.mir b/llvm/test/CodeGen/AArch64/machine-outliner.mir
index 83eda744d24a9e..66779addaff0a8 100644
--- a/llvm/test/CodeGen/AArch64/machine-outliner.mir
+++ b/llvm/test/CodeGen/AArch64/machine-outliner.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -verify-machineinstrs -frame-pointer=non-leaf %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64--- -run-pass=prologepilog -run-pass=machine-outliner -verify-machineinstrs -frame-pointer=non-leaf -outliner-leaf-descendants=false %s -o - | FileCheck %s
 --- |
 
   @x = common global i32 0, align 4
diff --git a/llvm/unittests/Support/SuffixTreeTest.cpp b/llvm/unittests/Support/SuffixTreeTest.cpp
index f5d8112ccf5cec..59f6dc9bf963fa 100644
--- a/llvm/unittests/Support/SuffixTreeTest.cpp
+++ b/llvm/unittests/Support/SuffixTreeTest.cpp
@@ -140,4 +140,138 @@ TEST(SuffixTreeTest, TestExclusion) {
   }
 }
 
+// Tests that the SuffixTree is able to find the following substrings:
+// {1, 1} at indices 0, 1, 2, 3, and 4;
+// {1, 1, 1} at indices 0, 1, 2, and 3;
+// {1, 1, 1, 1}  at indices 0, 1, and 2; and
+// {1, 1, 1, 1, 1} at indices 0 and 1.
+//
+// This is a FIX to the Test TestSingleCharacterRepeat
+TEST(SuffixTreeTest, TestSingleCharacterRepeatWithLeafDescendants) {
+  std::vector<unsigned> RepeatedRepetitionData = {1, 1, 1, 1, 1, 1, 2};
+  std::vector<unsigned>::iterator RRDIt, RRDIt2;
+  SuffixTree ST(RepeatedRepetitionData, true);
+  std::vector<SuffixTree::RepeatedSubstring> SubStrings;
+  for (auto It = ST.begin(); It != ST.end(); It++)
+    SubStrings.push_back(*It);
+  EXPECT_EQ(SubStrings.size(), 4u);
+  for (SuffixTree::RepeatedSubstring &RS : SubStrings) {
+    EXPECT_EQ(RS.StartIndices.size(),
+              RepeatedRepetitionData.size() - RS.Length);
+    for (unsigned StartIdx : SubStrings[0].StartIndices) {
+      RRDIt = RRDIt2 = RepeatedRepetitionData.begin();
+      std::advance(RRDIt, StartIdx);
+      std::advance(RRDIt2, StartIdx + SubStrings[0].Length);
+      ASSERT_TRUE(
+          all_of(make_range<std::vector<unsigned>::iterator>(RRDIt, RRDIt2),
+                 [](unsigned Elt) { return Elt == 1; }));
+    }
+  }
+}
+
+// Tests that the SuffixTree is able to find three substrings
+// {1, 2, 3} at indices 6 and 10;
+// {2, 3} at indices 7 and 11; and
+// {1, 2} at indicies 0 and 3.
+//
+// FIXME: {1, 2} has indices 6 and 10 missing as it is a substring of {1, 2, 3}
+// See Test TestSubstringRepeatsWithLeafDescendants for the FIX
+TEST(SuffixTreeTest, TestSubstringRepeats) {
+  std::vector<unsigned> RepeatedRepetitionData = {1, 2, 100, 1, 2, 101, 1,
+                                                  2, 3, 103, 1, 2, 3,   104};
+  SuffixTree ST(RepeatedRepetitionData);
+  std::vector<SuffixTree::RepeatedSubstring> SubStrings;
+  for (auto It = ST.begin(); It != ST.end(); It++)
+    SubStrings.push_back(*It);
+  EXPECT_EQ(SubStrings.size(), 3u);
+  unsigned Len;
+  for (SuffixTree::RepeatedSubstring &RS : SubStrings) {
+    Len = RS.Length;
+    bool IsExpectedLen = (Len == 3u || Len == 2u);
+    ASSERT_TRUE(IsExpectedLen);
+    bool IsExpectedIndex;
+
+    if (Len == 3u) { // {1, 2, 3}
+      EXPECT_EQ(RS.StartIndices.size(), 2u);
+      for (unsigned StartIdx : RS.StartIndices) {
+        IsExpectedIndex = (StartIdx == 6u || StartIdx == 10u);
+        EXPECT_TRUE(IsExpectedIndex);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx], 1u);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx + 2], 3u);
+      }
+    } else {
+      if (RepeatedRepetitionData[RS.StartIndices[0]] == 1u) { // {1, 2}
+        EXPECT_EQ(RS.StartIndices.size(), 2u);
+        for (unsigned StartIdx : RS.StartIndices) {
+          IsExpectedIndex = (StartIdx == 0u || StartIdx == 3u);
+          EXPECT_TRUE(IsExpectedIndex);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+        }
+      } else { // {2, 3}
+        EXPECT_EQ(RS.StartIndices.size(), 2u);
+        for (unsigned StartIdx : RS.StartIndices) {
+          IsExpectedIndex = (StartIdx == 7u || StartIdx == 11u);
+          EXPECT_TRUE(IsExpectedIndex);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx], 2u);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 3u);
+        }
+      }
+    }
+  }
+}
+
+// Tests that the SuffixTree is able to find three substrings
+// {1, 2, 3} at indices 6 and 10;
+// {2, 3} at indices 7 and 11; and
+// {1, 2} at indicies 0, 3, 6, and 10.
+//
+// This is a FIX to the Test TestSubstringRepeats
+
+TEST(SuffixTreeTest, TestSubstringRepeatsWithLeafDescendants) {
+  std::vector<unsigned> RepeatedRepetitionData = {1, 2, 100, 1, 2, 101, 1,
+                                                  2, 3, 103, 1, 2, 3,   104};
+  SuffixTree ST(RepeatedRepetitionData, true);
+  std::vector<SuffixTree::RepeatedSubstring> SubStrings;
+  for (auto It = ST.begin(); It != ST.end(); It++)
+    SubStrings.push_back(*It);
+  EXPECT_EQ(SubStrings.size(), 3u);
+  unsigned Len;
+  for (SuffixTree::RepeatedSubstring &RS : SubStrings) {
+    Len = RS.Length;
+    bool IsExpectedLen = (Len == 3u || Len == 2u);
+    ASSERT_TRUE(IsExpectedLen);
+    bool IsExpectedIndex;
+
+    if (Len == 3u) { // {1, 2, 3}
+      EXPECT_EQ(RS.StartIndices.size(), 2u);
+      for (unsigned StartIdx : RS.StartIndices) {
+        IsExpectedIndex = (StartIdx == 6u || StartIdx == 10u);
+        EXPECT_TRUE(IsExpectedIndex);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx], 1u);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+        EXPECT_EQ(RepeatedRepetitionData[StartIdx + 2], 3u);
+      }
+    } else {
+      if (RepeatedRepetitionData[RS.StartIndices[0]] == 1u) { // {1, 2}
+        EXPECT_EQ(RS.StartIndices.size(), 4u);
+        for (unsigned StartIdx : RS.StartIndices) {
+          IsExpectedIndex = (StartIdx == 0u || StartIdx == 3u ||
+                             StartIdx == 6u || StartIdx == 10u);
+          EXPECT_TRUE(IsExpectedIndex);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 2u);
+        }
+      } else { // {2, 3}
+        EXPECT_EQ(RS.StartIndices.size(), 2u);
+        for (unsigned StartIdx : RS.StartIndices) {
+          IsExpectedIndex = (StartIdx == 7u || StartIdx == 11u);
+          EXPECT_TRUE(IsExpectedIndex);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx], 2u);
+          EXPECT_EQ(RepeatedRepetitionData[StartIdx + 1], 3u);
+        }
+      }
+    }
+  }
+}
+
 } // namespace



More information about the llvm-commits mailing list