[llvm] 0d89627 - Support repeated machine outlining

Jin Lin via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 18 10:49:06 PDT 2020


Author: Jin Lin
Date: 2020-03-18T10:48:52-07:00
New Revision: 0d896278c81c231c370965092a6a23899ab5b344

URL: https://github.com/llvm/llvm-project/commit/0d896278c81c231c370965092a6a23899ab5b344
DIFF: https://github.com/llvm/llvm-project/commit/0d896278c81c231c370965092a6a23899ab5b344.diff

LOG: Support repeated machine outlining

Summary: The following change is to allow the machine outlining can be applied for Nth times, where N is specified by the compiler option. By default the value of N is 1. The motivation is that the repeated machine outlining can further reduce code size.  Please refer to the presentation "Improving Swift Binary Size via Link Time Optimization" in LLVM Developers' Meeting in 2019.

Reviewers: aschwaighofer, tellenbach, paquette

Reviewed By: paquette

Subscribers: tellenbach, hiraditya, llvm-commits, jinlin

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71027

Added: 
    llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir

Modified: 
    llvm/lib/CodeGen/MachineOutliner.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineOutliner.cpp b/llvm/lib/CodeGen/MachineOutliner.cpp
index 19bcb09530f7..d63f194f7e66 100644
--- a/llvm/lib/CodeGen/MachineOutliner.cpp
+++ b/llvm/lib/CodeGen/MachineOutliner.cpp
@@ -97,6 +97,13 @@ static cl::opt<bool> EnableLinkOnceODROutlining(
     cl::desc("Enable the machine outliner on linkonceodr functions"),
     cl::init(false));
 
+// Set the number of times to repeatedly apply outlining.
+// Defaults to 1, but more repetitions can save additional size.
+static cl::opt<unsigned>
+    NumRepeat("machine-outline-runs", cl::Hidden,
+              cl::desc("The number of times to apply machine outlining"),
+              cl::init(1));
+
 namespace {
 
 /// Represents an undefined index in the suffix tree.
@@ -842,6 +849,9 @@ struct MachineOutliner : public ModulePass {
   /// linkonceodr linkage.
   bool OutlineFromLinkOnceODRs = false;
 
+  /// The current repeat number of machine outlining.
+  unsigned OutlineRepeatedNum = 0;
+
   /// Set to true if the outliner should run on all functions in the module
   /// considered safe for outlining.
   /// Set to true by default for compatibility with llc's -run-pass option.
@@ -900,9 +910,12 @@ struct MachineOutliner : public ModulePass {
                                           InstructionMapper &Mapper,
                                           unsigned Name);
 
-  /// Calls 'doOutline()'.
+  /// Calls runOnceOnModule NumRepeat times
   bool runOnModule(Module &M) override;
 
+  /// Calls 'doOutline()'.
+  bool runOnceOnModule(Module &M, unsigned Iter);
+
   /// Construct a suffix tree on the instructions in \p M and outline repeated
   /// strings from that tree.
   bool doOutline(Module &M, unsigned &OutlinedFunctionNum);
@@ -1099,7 +1112,13 @@ MachineFunction *MachineOutliner::createOutlinedFunction(
   // Create the function name. This should be unique.
   // FIXME: We should have a better naming scheme. This should be stable,
   // regardless of changes to the outliner's cost model/traversal order.
-  std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
+  std::string FunctionName;
+  if (OutlineRepeatedNum > 0)
+    FunctionName = ("OUTLINED_FUNCTION_" + Twine(OutlineRepeatedNum + 1) + "_" +
+                    Twine(Name))
+                       .str();
+  else
+    FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
 
   // Create the function using an IR-level function.
   LLVMContext &C = M.getContext();
@@ -1438,12 +1457,14 @@ void MachineOutliner::emitInstrCountChangedRemark(
   }
 }
 
-bool MachineOutliner::runOnModule(Module &M) {
+bool MachineOutliner::runOnceOnModule(Module &M, unsigned Iter) {
   // Check if there's anything in the module. If it's empty, then there's
   // nothing to outline.
   if (M.empty())
     return false;
 
+  OutlineRepeatedNum = Iter;
+
   // Number to append to the current outlined function.
   unsigned OutlinedFunctionNum = 0;
 
@@ -1507,3 +1528,23 @@ bool MachineOutliner::doOutline(Module &M, unsigned &OutlinedFunctionNum) {
 
   return OutlinedSomething;
 }
+
+// Apply machine outlining for NumRepeat times.
+bool MachineOutliner::runOnModule(Module &M) {
+  if (NumRepeat < 1)
+    report_fatal_error("Expect NumRepeat for machine outlining "
+                       "to be greater than or equal to 1!\n");
+
+  bool Changed = false;
+  for (unsigned I = 0; I < NumRepeat; I++) {
+    if (!runOnceOnModule(M, I)) {
+      LLVM_DEBUG(dbgs() << "Stopped outlining at iteration " << I
+                        << " because no changes were found.\n";);
+      return Changed;
+    }
+    Changed = true;
+  }
+  LLVM_DEBUG(dbgs() << "Stopped outlining because iteration is "
+                       "equal to " << NumRepeat << "\n";);
+  return Changed;
+}

diff  --git a/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir b/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir
new file mode 100644
index 000000000000..08aa043f5c68
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/machine-outliner-iterative.mir
@@ -0,0 +1,149 @@
+# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outline-runs=2 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix TWO-RUNS
+# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outline-runs=1 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix ONE-RUN
+# RUN: llc -mtriple=aarch64 -run-pass=machine-outliner -machine-outline-runs=4 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix FOUR-RUNS
+
+# Example of Repeated Instruction Sequence - Iterative Machine Outlining
+#
+#; define void @"$s12"(...) {         define i64 @"$s5” (...) {             define void @"$s13"(...) {
+#   ...                                ...                                   ...
+#   %8 = load i1, i1* %7                                                     %8 = load i1, i1* %7
+#   %9 = load i4, i4*, %6              %9 = load i4, i4*, %6                 %9 = load i4, i4*, %6
+#   store i4 %9, i4* %5                store i4 %9, i4* %5                   store i4 %9, i4* %5
+#   ...                                ...                                   ...
+# }                                  }                                     }
+#
+# After machine outliner (1st time)
+#
+# define void @"$s12"(...) {         define i64 @"$s5” (...) {             define void @"$s13"(...) {
+#   ...                                ...                                   ...
+#   %8 = load i1, i1* %7                                                     %8 = load i1, i1* %7
+#   call void @outlined_function_1_1   call void @outlined_function_1_1      call void @outlined_function_1_1
+#   ...                                ...                                   ...
+# }                                  }                                     }
+#
+# After machine outliner (2nd time)
+#
+# define void @"$s12"(...) {         define i64 @"$s5” (...) {             define void @"$s13"(...) {
+#   ...                                ...                                   ...
+#   call void @outlined_function_2_1   call void @outlined_function_1_1      call void @outlined_function_2_1
+#   ...                                ...                                   ...
+# }                                  }                                     }
+#
+# Check whether machine outliner can further find the outlining opportunity after machine
+# outlining has performed.
+#
+--- |
+  declare void @foo() local_unnamed_addr
+
+  declare void @widget() local_unnamed_addr
+
+  ; Function Attrs: minsize noredzone optsize
+  define void @baz.14() #0 {
+    ret void
+  }
+
+  ; Function Attrs: minsize noredzone optsize
+  define void @baz.15() #0 {
+    ret void
+  }
+
+  ; Function Attrs: minsize noredzone optsize
+  define void @baz.16() #0 {
+    ret void
+  }
+
+  attributes #0 = { minsize noredzone optsize }
+...
+---
+name:            baz.14
+tracksRegLiveness: true
+stack:
+  - { id: 0, offset: -8, size: 8 }
+  - { id: 1, offset: -16, size: 8 }
+body:             |
+  bb.0:
+    liveins: $x0, $x19, $lr
+
+    early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $w19, -8
+    frame-setup CFI_INSTRUCTION offset $w30, -16
+    renamable $x19 = COPY $x0
+    renamable $x0 = nuw ADDXri $x0, 48, 0
+    $x1 = ADDXri $sp, 0, 0
+    dead $w2 = MOVi32imm 33, implicit-def $x2
+    $x3 = COPY $xzr
+    BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
+    $x0 = COPY killed renamable $x19
+    BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
+    early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
+    RET_ReallyLR
+
+...
+---
+name:            baz.15
+tracksRegLiveness: true
+stack:
+  - { id: 0, offset: -8, size: 8 }
+  - { id: 1, offset: -16, size: 8 }
+body:             |
+  bb.0:
+    liveins: $x0, $x19, $lr
+
+    early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $w19, -8
+    frame-setup CFI_INSTRUCTION offset $w30, -16
+    renamable $x19 = COPY $x0
+    renamable $x0 = nuw ADDXri killed renamable $x0, 16, 0
+    $x1 = ADDXri $sp, 0, 0
+    dead $w2 = MOVi32imm 33, implicit-def $x2
+    $x3 = COPY $xzr
+    BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
+    $x0 = COPY killed renamable $x19
+    BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
+    early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
+    RET_ReallyLR
+
+...
+---
+name:            baz.16
+tracksRegLiveness: true
+stack:
+  - { id: 0, offset: -8, size: 8 }
+  - { id: 1, offset: -16, size: 8 }
+body:             |
+  bb.0:
+    liveins: $x0, $x19, $lr
+
+    early-clobber $sp = frame-setup STPXpre killed $lr, killed $x19, $sp, -2 :: (store 8 into %stack.1), (store 8 into %stack.0)
+    frame-setup CFI_INSTRUCTION def_cfa_offset 16
+    frame-setup CFI_INSTRUCTION offset $w19, -8
+    frame-setup CFI_INSTRUCTION offset $w30, -16
+    renamable $x19 = COPY $x0
+    renamable $x0 = nuw ADDXri $x0, 48, 0
+    $x1 = ADDXri $sp, 0, 0
+    dead $w2 = MOVi32imm 33, implicit-def $x2
+    $x3 = COPY $xzr
+    BL @foo, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit killed $x1, implicit killed $x2, implicit killed $x3, implicit-def $sp
+    $x0 = COPY killed renamable $x19
+    BL @widget, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit $x0, implicit-def $sp
+    early-clobber $sp, $lr, $x19 = frame-destroy LDPXpost $sp, 2 :: (load 8 from %stack.1), (load 8 from %stack.0)
+    RET_ReallyLR
+
+...
+
+# TWO-RUNS: name:            OUTLINED_FUNCTION_2_0
+# TWO-RUNS-DAG: bb.0:
+# TWO-RUNS-DAG: renamable $x19 = COPY $x0
+# TWO-RUNS-NEXT: renamable $x0 = nuw ADDXri $x0, 48, 0
+# TWO-RUNS-NEXT: TCRETURNdi @OUTLINED_FUNCTION_0, 0, implicit $sp
+#
+# The machine outliner is expected to stop at the 1st iteration for case ONE-RUN
+# since machine-outline-runs is specified as 1.
+# ONE-RUN-NOT: [[OUTLINED:OUTLINED_FUNCTION_2_[0-9]+]]
+#
+# The machine outliner is expected to stop at the 3rd iteration for case FOUR-RUNS
+# since the MIR has no change at the 3rd iteration.
+# FOUR-RUNS-NOT: [[OUTLINED:OUTLINED_FUNCTION_3_[0-9]+]]
+# FOUR-RUNS-NOT: [[OUTLINED:OUTLINED_FUNCTION_4_[0-9]+]]


        


More information about the llvm-commits mailing list