[Lldb-commits] [clang] [lldb] [llvm] [BOLT] Hash-based function matching (PR #95821)

shaw young via lldb-commits lldb-commits at lists.llvm.org
Mon Jun 24 15:29:24 PDT 2024


https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95821

>From 9452bd574023a7aef75b609d36e0ffac68e1e03d Mon Sep 17 00:00:00 2001
From: Sayhaan Siddiqui <sayhaan at meta.com>
Date: Mon, 17 Jun 2024 11:11:07 -0700
Subject: [PATCH 01/21] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20?=
 =?UTF-8?q?changes=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 bolt/include/bolt/Rewrite/DWARFRewriter.h     |   4 +-
 bolt/lib/Core/BinaryEmitter.cpp               |   1 +
 bolt/lib/Rewrite/DWARFRewriter.cpp            |  61 ++---
 clang/include/clang/Driver/Options.td         |   4 +
 clang/lib/Driver/ToolChains/Gnu.cpp           |  29 +++
 cross-project-tests/lit.cfg.py                |  14 +-
 cross-project-tests/lit.site.cfg.py.in        |   4 +
 lldb/test/API/lit.cfg.py                      |   5 +
 lldb/test/API/lit.site.cfg.py.in              |   8 +
 lldb/test/Shell/helper/toolchain.py           |   5 +
 lldb/test/Shell/lit.site.cfg.py.in            |   9 +
 llvm/CMakeLists.txt                           |   4 +
 llvm/include/llvm/MC/MCFragment.h             |  22 ++
 llvm/include/llvm/MC/MCObjectStreamer.h       |   2 +
 llvm/include/llvm/MC/MCStreamer.h             |   6 +
 llvm/lib/MC/MCAssembler.cpp                   | 118 ++++++----
 llvm/lib/MC/MCExpr.cpp                        |  10 +-
 llvm/lib/MC/MCFragment.cpp                    |  12 +
 llvm/lib/MC/MCObjectStreamer.cpp              |   5 +
 llvm/lib/MC/MCStreamer.cpp                    |   2 +
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp |  24 ++
 llvm/test/MC/X86/directive-avoid_end_align.s  | 208 ++++++++++++++++++
 22 files changed, 483 insertions(+), 74 deletions(-)
 create mode 100644 llvm/test/MC/X86/directive-avoid_end_align.s

diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h
index 8dec32de9008e..3cc9d823c815b 100644
--- a/bolt/include/bolt/Rewrite/DWARFRewriter.h
+++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h
@@ -12,6 +12,7 @@
 #include "bolt/Core/DIEBuilder.h"
 #include "bolt/Core/DebugData.h"
 #include "bolt/Core/DebugNames.h"
+#include "bolt/Core/GDBIndex.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/DIE.h"
 #include "llvm/DWP/DWP.h"
@@ -131,7 +132,8 @@ class DWARFRewriter {
   makeFinalLocListsSection(DWARFVersion Version);
 
   /// Finalize type sections in the main binary.
-  CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer);
+  CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
+                                   GDBIndex &GDBIndexSection);
 
   /// Process and write out CUs that are passsed in.
   void finalizeCompileUnits(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index 5793963f9b80d..c231fffa0d5ff 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -487,6 +487,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
         // This assumes the second instruction in the macro-op pair will get
         // assigned to its own MCRelaxableFragment. Since all JCC instructions
         // are relaxable, we should be safe.
+        Streamer.emitNeverAlignCodeAtEnd(/*Alignment to avoid=*/64, *BC.STI);
       }
 
       if (!EmitCodeOnly) {
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 8814ebbd10aa5..7b62999dfb2b6 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -185,6 +185,7 @@ namespace bolt {
 class DIEStreamer : public DwarfStreamer {
   DIEBuilder *DIEBldr;
   DWARFRewriter &Rewriter;
+  GDBIndex &GDBIndexSection;
 
 private:
   /// Emit the compilation unit header for \p Unit in the debug_info
@@ -247,7 +248,7 @@ class DIEStreamer : public DwarfStreamer {
     const uint64_t TypeSignature = cast<DWARFTypeUnit>(Unit).getTypeHash();
     DIE *TypeDIE = DIEBldr->getTypeDIE(Unit);
     const DIEBuilder::DWARFUnitInfo &UI = DIEBldr->getUnitInfoByDwarfUnit(Unit);
-    Rewriter.addGDBTypeUnitEntry(
+    GDBIndexSection.addGDBTypeUnitEntry(
         {UI.UnitOffset, TypeSignature, TypeDIE->getOffset()});
     if (Unit.getVersion() < 5) {
       // Switch the section to .debug_types section.
@@ -279,11 +280,12 @@ class DIEStreamer : public DwarfStreamer {
 
 public:
   DIEStreamer(DIEBuilder *DIEBldr, DWARFRewriter &Rewriter,
+              GDBIndex &GDBIndexSection,
               DWARFLinkerBase::OutputFileType OutFileType,
               raw_pwrite_stream &OutFile,
               DWARFLinkerBase::MessageHandlerTy Warning)
       : DwarfStreamer(OutFileType, OutFile, Warning), DIEBldr(DIEBldr),
-        Rewriter(Rewriter){};
+        Rewriter(Rewriter), GDBIndexSection(GDBIndexSection) {};
 
   using DwarfStreamer::emitCompileUnitHeader;
 
@@ -326,12 +328,11 @@ static cl::opt<bool> KeepARanges(
         "keep or generate .debug_aranges section if .gdb_index is written"),
     cl::Hidden, cl::cat(BoltCategory));
 
-static cl::opt<bool>
-DeterministicDebugInfo("deterministic-debuginfo",
-  cl::desc("disables parallel execution of tasks that may produce "
-           "nondeterministic debug info"),
-  cl::init(true),
-  cl::cat(BoltCategory));
+static cl::opt<bool> DeterministicDebugInfo(
+    "deterministic-debuginfo",
+    cl::desc("disables parallel execution of tasks that may produce "
+             "nondeterministic debug info"),
+    cl::init(true), cl::cat(BoltCategory));
 
 static cl::opt<std::string> DwarfOutputPath(
     "dwarf-output-path",
@@ -460,10 +461,11 @@ static std::optional<uint64_t> getAsAddress(const DWARFUnit &DU,
 static std::unique_ptr<DIEStreamer>
 createDIEStreamer(const Triple &TheTriple, raw_pwrite_stream &OutFile,
                   StringRef Swift5ReflectionSegmentName, DIEBuilder &DIEBldr,
-                  DWARFRewriter &Rewriter) {
+                  DWARFRewriter &Rewriter, GDBIndex &GDBIndexSection) {
 
   std::unique_ptr<DIEStreamer> Streamer = std::make_unique<DIEStreamer>(
-      &DIEBldr, Rewriter, DWARFLinkerBase::OutputFileType::Object, OutFile,
+      &DIEBldr, Rewriter, GDBIndexSection,
+      DWARFLinkerBase::OutputFileType::Object, OutFile,
       [&](const Twine &Warning, StringRef Context, const DWARFDie *) {});
   Error Err = Streamer->init(TheTriple, Swift5ReflectionSegmentName);
   if (Err)
@@ -484,13 +486,12 @@ emitUnit(DIEBuilder &DIEBldr, DIEStreamer &Streamer, DWARFUnit &Unit) {
   return {U.UnitOffset, U.UnitLength, TypeHash};
 }
 
-static void emitDWOBuilder(const std::string &DWOName,
-                           DIEBuilder &DWODIEBuilder, DWARFRewriter &Rewriter,
-                           DWARFUnit &SplitCU, DWARFUnit &CU,
-                           DWARFRewriter::DWPState &State,
-                           DebugLocWriter &LocWriter,
-                           DebugStrOffsetsWriter &StrOffstsWriter,
-                           DebugStrWriter &StrWriter) {
+static void
+emitDWOBuilder(const std::string &DWOName, DIEBuilder &DWODIEBuilder,
+               DWARFRewriter &Rewriter, DWARFUnit &SplitCU, DWARFUnit &CU,
+               DWARFRewriter::DWPState &State, DebugLocWriter &LocWriter,
+               DebugStrOffsetsWriter &StrOffstsWriter,
+               DebugStrWriter &StrWriter, GDBIndex &GDBIndexSection) {
   // Populate debug_info and debug_abbrev for current dwo into StringRef.
   DWODIEBuilder.generateAbbrevs();
   DWODIEBuilder.finish();
@@ -500,8 +501,9 @@ static void emitDWOBuilder(const std::string &DWOName,
       std::make_shared<raw_svector_ostream>(OutBuffer);
   const object::ObjectFile *File = SplitCU.getContext().getDWARFObj().getFile();
   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
-  std::unique_ptr<DIEStreamer> Streamer = createDIEStreamer(
-      *TheTriple, *ObjOS, "DwoStreamerInitAug2", DWODIEBuilder, Rewriter);
+  std::unique_ptr<DIEStreamer> Streamer =
+      createDIEStreamer(*TheTriple, *ObjOS, "DwoStreamerInitAug2",
+                        DWODIEBuilder, Rewriter, GDBIndexSection);
   DWARFRewriter::UnitMetaVectorType TUMetaVector;
   DWARFRewriter::UnitMeta CUMI = {0, 0, 0};
   if (SplitCU.getContext().getMaxDWOVersion() >= 5) {
@@ -652,6 +654,7 @@ void DWARFRewriter::updateDebugInfo() {
 
   DWARF5AcceleratorTable DebugNamesTable(opts::CreateDebugNames, BC,
                                          *StrWriter);
+  GDBIndex GDBIndexSection(BC);
   DWPState State;
   if (opts::WriteDWP)
     initDWPState(State);
@@ -704,7 +707,8 @@ void DWARFRewriter::updateDebugInfo() {
         TempRangesSectionWriter->finalizeSection();
 
       emitDWOBuilder(DWOName, DWODIEBuilder, *this, **SplitCU, *Unit, State,
-                     DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter);
+                     DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter,
+                     GDBIndexSection);
     }
 
     if (Unit->getVersion() >= 5) {
@@ -729,9 +733,10 @@ void DWARFRewriter::updateDebugInfo() {
       std::make_unique<raw_svector_ostream>(OutBuffer);
   const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile();
   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
-  std::unique_ptr<DIEStreamer> Streamer =
-      createDIEStreamer(*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this);
-  CUOffsetMap OffsetMap = finalizeTypeSections(DIEBlder, *Streamer);
+  std::unique_ptr<DIEStreamer> Streamer = createDIEStreamer(
+      *TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this, GDBIndexSection);
+  CUOffsetMap OffsetMap =
+      finalizeTypeSections(DIEBlder, *Streamer, GDBIndexSection);
 
   const bool SingleThreadedMode =
       opts::NoThreads || opts::DeterministicDebugInfo;
@@ -761,7 +766,8 @@ void DWARFRewriter::updateDebugInfo() {
 
   finalizeDebugSections(DIEBlder, DebugNamesTable, *Streamer, *ObjOS,
                         OffsetMap);
-  updateGdbIndexSection(OffsetMap, CUIndex);
+  GDBIndexSection.updateGdbIndexSection(OffsetMap, CUIndex,
+                                        *ARangesSectionWriter);
 }
 
 void DWARFRewriter::updateUnitDebugInfo(
@@ -1429,7 +1435,8 @@ void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) {
 }
 
 CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder,
-                                                DIEStreamer &Streamer) {
+                                                DIEStreamer &Streamer,
+                                                GDBIndex &GDBIndexSection) {
   // update TypeUnit DW_AT_stmt_list with new .debug_line information.
   auto updateLineTable = [&](const DWARFUnit &Unit) -> void {
     DIE *UnitDIE = DIEBlder.getUnitDIEbyUnit(Unit);
@@ -1449,8 +1456,8 @@ CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder,
       std::make_shared<raw_svector_ostream>(OutBuffer);
   const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile();
   auto TheTriple = std::make_unique<Triple>(File->makeTriple());
-  std::unique_ptr<DIEStreamer> TypeStreamer =
-      createDIEStreamer(*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this);
+  std::unique_ptr<DIEStreamer> TypeStreamer = createDIEStreamer(
+      *TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this, GDBIndexSection);
 
   // generate debug_info and CUMap
   CUOffsetMap CUMap;
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index d44faa55c456f..63bb86717bb14 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5483,6 +5483,10 @@ def pg : Flag<["-"], "pg">, HelpText<"Enable mcount instrumentation">,
   MarshallingInfoFlag<CodeGenOpts<"InstrumentForProfiling">>;
 def pipe : Flag<["-", "--"], "pipe">,
   HelpText<"Use pipes between commands, when possible">;
+// Facebook T92898286
+def post_link_optimize : Flag<["--"], "post-link-optimize">,
+  HelpText<"Apply post-link optimizations using BOLT">;
+// End Facebook T92898286
 def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules">;
 def prebind : Flag<["-"], "prebind">;
 def preload : Flag<["-"], "preload">;
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index b141e5f2adfab..f7611af5763ab 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -672,12 +672,41 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
     }
   }
 
+  // Facebook T92898286
+  if (Args.hasArg(options::OPT_post_link_optimize))
+    CmdArgs.push_back("-q");
+  // End Facebook T92898286
+
   Args.AddAllArgs(CmdArgs, options::OPT_T);
 
   const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
   C.addCommand(std::make_unique<Command>(JA, *this,
                                          ResponseFileSupport::AtFileCurCP(),
                                          Exec, CmdArgs, Inputs, Output));
+  // Facebook T92898286
+  if (!Args.hasArg(options::OPT_post_link_optimize) || !Output.isFilename())
+    return;
+
+  const char *MvExec = Args.MakeArgString(ToolChain.GetProgramPath("mv"));
+  ArgStringList MoveCmdArgs;
+  MoveCmdArgs.push_back(Output.getFilename());
+  const char *PreBoltBin =
+      Args.MakeArgString(Twine(Output.getFilename()) + ".pre-bolt");
+  MoveCmdArgs.push_back(PreBoltBin);
+  C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+                                         MvExec, MoveCmdArgs, std::nullopt));
+
+  ArgStringList BoltCmdArgs;
+  const char *BoltExec =
+      Args.MakeArgString(ToolChain.GetProgramPath("llvm-bolt"));
+  BoltCmdArgs.push_back(PreBoltBin);
+  BoltCmdArgs.push_back("-reorder-blocks=reverse");
+  BoltCmdArgs.push_back("-update-debug-sections");
+  BoltCmdArgs.push_back("-o");
+  BoltCmdArgs.push_back(Output.getFilename());
+  C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+                                         BoltExec, BoltCmdArgs, std::nullopt));
+  // End Facebook T92898286
 }
 
 void tools::gnutools::Assembler::ConstructJob(Compilation &C,
diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py
index 774c4eaf4d976..619634578dfe6 100644
--- a/cross-project-tests/lit.cfg.py
+++ b/cross-project-tests/lit.cfg.py
@@ -84,7 +84,13 @@ def get_required_attr(config, attr_name):
 # use_clang() and use_lld() respectively, so set them to "", if needed.
 if not hasattr(config, "clang_src_dir"):
     config.clang_src_dir = ""
-llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects))
+# Facebook T92898286
+should_test_bolt = get_required_attr(config, "llvm_test_bolt")
+if should_test_bolt:
+    llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects), additional_flags=["--post-link-optimize"])
+else:
+    llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects))
+# End Facebook T92898286
 
 if not hasattr(config, "lld_src_dir"):
     config.lld_src_dir = ""
@@ -293,3 +299,9 @@ def get_clang_default_dwarf_version_string(triple):
 # Allow 'REQUIRES: XXX-registered-target' in tests.
 for arch in config.targets_to_build:
     config.available_features.add(arch.lower() + "-registered-target")
+
+# Facebook T92898286
+# Ensure the user's PYTHONPATH is included.
+if "PYTHONPATH" in os.environ:
+    config.environment["PYTHONPATH"] = os.environ["PYTHONPATH"]
+# End Facebook T92898286
diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in
index 39458dfc79afd..2d53cd377f033 100644
--- a/cross-project-tests/lit.site.cfg.py.in
+++ b/cross-project-tests/lit.site.cfg.py.in
@@ -21,6 +21,10 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@"
 
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
 import lit.llvm
 lit.llvm.initialize(lit_config, config)
 
diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py
index d934349fe3ca3..d4a62c51458cc 100644
--- a/lldb/test/API/lit.cfg.py
+++ b/lldb/test/API/lit.cfg.py
@@ -248,6 +248,11 @@ def delete_module_cache(path):
 if is_configured("lldb_framework_dir"):
     dotest_cmd += ["--framework", config.lldb_framework_dir]
 
+# Facebook T92898286
+if is_configured("llvm_test_bolt"):
+    dotest_cmd += ["-E", '"--post-link-optimize"']
+# End Facebook T92898286
+
 if (
     "lldb-repro-capture" in config.available_features
     or "lldb-repro-replay" in config.available_features
diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in
index 8b2d09ae41cd2..602f45759e48f 100644
--- a/lldb/test/API/lit.site.cfg.py.in
+++ b/lldb/test/API/lit.site.cfg.py.in
@@ -1,5 +1,9 @@
 @LIT_SITE_CFG_IN_HEADER@
 
+#Facebook T92898286
+import lit.util
+#End Facebook T92898286
+
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
@@ -39,6 +43,10 @@ config.libcxx_include_target_dir = "@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@"
 config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api")
 config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api")
 
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
 # Plugins
 lldb_build_intel_pt = '@LLDB_BUILD_INTEL_PT@'
 if lldb_build_intel_pt == '1':
diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py
index 255955fc70d8c..7b7be06643166 100644
--- a/lldb/test/Shell/helper/toolchain.py
+++ b/lldb/test/Shell/helper/toolchain.py
@@ -165,6 +165,11 @@ def use_support_substitutions(config):
     if config.cmake_sysroot:
         host_flags += ["--sysroot={}".format(config.cmake_sysroot)]
 
+    # Facebook T92898286
+    if config.llvm_test_bolt:
+        host_flags += ["--post-link-optimize"]
+    # End Facebook T92898286
+
     host_flags = " ".join(host_flags)
     config.substitutions.append(("%clang_host", "%clang " + host_flags))
     config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags))
diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in
index b69e7bce1bc0b..fe8323734b7db 100644
--- a/lldb/test/Shell/lit.site.cfg.py.in
+++ b/lldb/test/Shell/lit.site.cfg.py.in
@@ -1,5 +1,10 @@
 @LIT_SITE_CFG_IN_HEADER@
 
+#Facebook T92898286
+import lit.util
+#End Facebook T92898286
+
+
 config.llvm_src_root = "@LLVM_SOURCE_DIR@"
 config.llvm_obj_root = "@LLVM_BINARY_DIR@"
 config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
@@ -31,6 +36,10 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
 config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell")
 config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell")
 
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
 import lit.llvm
 lit.llvm.initialize(lit_config, config)
 
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 3208147101c0d..ecd36d2564e4f 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -709,6 +709,10 @@ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
 option(LLVM_USE_SPLIT_DWARF
   "Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF)
 
+# Facebook T92898286
+option(LLVM_TEST_BOLT "Enable BOLT testing in non-BOLT tests that use clang" OFF)
+# End Facebook T92898286
+
 # Define an option controlling whether we should build for 32-bit on 64-bit
 # platforms, where supported.
 if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX"))
diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index a9b19dc56f16a..256d98423e030 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -33,6 +33,7 @@ class MCFragment : public ilist_node_with_parent<MCFragment, MCSection> {
 public:
   enum FragmentType : uint8_t {
     FT_Align,
+    FT_NeverAlign,
     FT_Data,
     FT_CompactEncodedInst,
     FT_Fill,
@@ -344,6 +345,27 @@ class MCAlignFragment : public MCFragment {
   }
 };
 
+class MCNeverAlignFragment : public MCFragment {
+  /// The alignment the end of the next fragment should avoid.
+  unsigned Alignment;
+
+  /// When emitting Nops some subtargets have specific nop encodings.
+  const MCSubtargetInfo &STI;
+
+public:
+  MCNeverAlignFragment(unsigned Alignment, const MCSubtargetInfo &STI,
+                       MCSection *Sec = nullptr)
+      : MCFragment(FT_NeverAlign, false, Sec), Alignment(Alignment), STI(STI) {}
+
+  unsigned getAlignment() const { return Alignment; }
+
+  const MCSubtargetInfo &getSubtargetInfo() const { return STI; }
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_NeverAlign;
+  }
+};
+
 class MCFillFragment : public MCFragment {
   uint8_t ValueSize;
   /// Value to use for filling bytes.
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index e212d54613980..c7d760721e369 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -157,6 +157,8 @@ class MCObjectStreamer : public MCStreamer {
                             unsigned MaxBytesToEmit = 0) override;
   void emitCodeAlignment(Align ByteAlignment, const MCSubtargetInfo *STI,
                          unsigned MaxBytesToEmit = 0) override;
+  void emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
+                               const MCSubtargetInfo &STI) override;
   void emitValueToOffset(const MCExpr *Offset, unsigned char Value,
                          SMLoc Loc) override;
   void emitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column,
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index b7468cf70a664..dd813192d9ca0 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -887,6 +887,12 @@ class MCStreamer {
   virtual void emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI,
                                  unsigned MaxBytesToEmit = 0);
 
+  /// If the end of the fragment following this NeverAlign fragment ever gets
+  /// aligned to \p ByteAlignment, this fragment emits a single nop before the
+  /// following fragment to break this end-alignment.
+  virtual void emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
+                                       const MCSubtargetInfo &STI);
+
   /// Emit some number of copies of \p Value until the byte offset \p
   /// Offset is reached.
   ///
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index ad30b5ce9e631..62baeb93ea7d0 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -298,6 +298,43 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, const MCFixup &Fixup,
   return IsResolved;
 }
 
+/// Check if the branch crosses the boundary.
+///
+/// \param StartAddr start address of the fused/unfused branch.
+/// \param Size size of the fused/unfused branch.
+/// \param BoundaryAlignment alignment requirement of the branch.
+/// \returns true if the branch cross the boundary.
+static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size,
+                             Align BoundaryAlignment) {
+  uint64_t EndAddr = StartAddr + Size;
+  return (StartAddr >> Log2(BoundaryAlignment)) !=
+         ((EndAddr - 1) >> Log2(BoundaryAlignment));
+}
+
+/// Check if the branch is against the boundary.
+///
+/// \param StartAddr start address of the fused/unfused branch.
+/// \param Size size of the fused/unfused branch.
+/// \param BoundaryAlignment alignment requirement of the branch.
+/// \returns true if the branch is against the boundary.
+static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size,
+                              Align BoundaryAlignment) {
+  uint64_t EndAddr = StartAddr + Size;
+  return (EndAddr & (BoundaryAlignment.value() - 1)) == 0;
+}
+
+/// Check if the branch needs padding.
+///
+/// \param StartAddr start address of the fused/unfused branch.
+/// \param Size size of the fused/unfused branch.
+/// \param BoundaryAlignment alignment requirement of the branch.
+/// \returns true if the branch needs padding.
+static bool needPadding(uint64_t StartAddr, uint64_t Size,
+                        Align BoundaryAlignment) {
+  return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) ||
+         isAgainstBoundary(StartAddr, Size, BoundaryAlignment);
+}
+
 uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
                                           const MCFragment &F) const {
   assert(getBackendPtr() && "Requires assembler backend");
@@ -358,6 +395,41 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
     return Size;
   }
 
+  case MCFragment::FT_NeverAlign: {
+    // Disclaimer: NeverAlign fragment size depends on the size of its immediate
+    // successor, but NeverAlign need not be a MCRelaxableFragment.
+    // NeverAlign fragment size is recomputed if the successor is relaxed:
+    // - If RelaxableFragment is relaxed, it gets invalidated by marking its
+    // predecessor as LastValidFragment.
+    // - This forces the assembler to call MCAsmLayout::layoutFragment on that
+    // relaxable fragment, which in turn will always ask the predecessor to
+    // compute its size (see "computeFragmentSize(prev)" in layoutFragment).
+    //
+    // In short, the simplest way to ensure that computeFragmentSize() is sane
+    // is to establish the following rule: it should never examine fragments
+    // after the current fragment in the section. If we logically need to
+    // examine any fragment after the current fragment, we need to do that using
+    // relaxation, inside MCAssembler::layoutSectionOnce.
+    const MCNeverAlignFragment &NAF = cast<MCNeverAlignFragment>(F);
+    const MCFragment *NF = F.getNextNode();
+    uint64_t Offset = Layout.getFragmentOffset(&NAF);
+    size_t NextFragSize = 0;
+    if (const auto *NextFrag = dyn_cast<MCRelaxableFragment>(NF)) {
+      NextFragSize = NextFrag->getContents().size();
+    } else if (const auto *NextFrag = dyn_cast<MCDataFragment>(NF)) {
+      NextFragSize = NextFrag->getContents().size();
+    } else {
+      llvm_unreachable("Didn't find the expected fragment after NeverAlign");
+    }
+    // Check if the next fragment ends at the alignment we want to avoid.
+    if (isAgainstBoundary(Offset, NextFragSize, Align(NAF.getAlignment()))) {
+      // Avoid this alignment by introducing minimum nop.
+      assert(getBackend().getMinimumNopSize() != NAF.getAlignment());
+      return getBackend().getMinimumNopSize();
+    }
+    return 0;
+  }
+
   case MCFragment::FT_Org: {
     const MCOrgFragment &OF = cast<MCOrgFragment>(F);
     MCValue Value;
@@ -581,6 +653,15 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
     break;
   }
 
+  case MCFragment::FT_NeverAlign: {
+    const MCNeverAlignFragment &NAF = cast<MCNeverAlignFragment>(F);
+    if (!Asm.getBackend().writeNopData(OS, FragmentSize,
+                                       &NAF.getSubtargetInfo()))
+      report_fatal_error("unable to write nop sequence of " +
+                         Twine(FragmentSize) + " bytes");
+    break;
+  }
+
   case MCFragment::FT_Data:
     ++stats::EmittedDataFragments;
     OS << cast<MCDataFragment>(F).getContents();
@@ -1052,43 +1133,6 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
   return OldSize != LF.getContents().size();
 }
 
-/// Check if the branch crosses the boundary.
-///
-/// \param StartAddr start address of the fused/unfused branch.
-/// \param Size size of the fused/unfused branch.
-/// \param BoundaryAlignment alignment requirement of the branch.
-/// \returns true if the branch cross the boundary.
-static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size,
-                             Align BoundaryAlignment) {
-  uint64_t EndAddr = StartAddr + Size;
-  return (StartAddr >> Log2(BoundaryAlignment)) !=
-         ((EndAddr - 1) >> Log2(BoundaryAlignment));
-}
-
-/// Check if the branch is against the boundary.
-///
-/// \param StartAddr start address of the fused/unfused branch.
-/// \param Size size of the fused/unfused branch.
-/// \param BoundaryAlignment alignment requirement of the branch.
-/// \returns true if the branch is against the boundary.
-static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size,
-                              Align BoundaryAlignment) {
-  uint64_t EndAddr = StartAddr + Size;
-  return (EndAddr & (BoundaryAlignment.value() - 1)) == 0;
-}
-
-/// Check if the branch needs padding.
-///
-/// \param StartAddr start address of the fused/unfused branch.
-/// \param Size size of the fused/unfused branch.
-/// \param BoundaryAlignment alignment requirement of the branch.
-/// \returns true if the branch needs padding.
-static bool needPadding(uint64_t StartAddr, uint64_t Size,
-                        Align BoundaryAlignment) {
-  return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) ||
-         isAgainstBoundary(StartAddr, Size, BoundaryAlignment);
-}
-
 bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout,
                                      MCBoundaryAlignFragment &BF) {
   // BoundaryAlignFragment that doesn't need to align any fragment should not be
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index b065d03651c45..9add574b0e5b8 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -675,14 +675,8 @@ static void AttemptToFoldSymbolOffsetDifference(
     if (FA == FB) {
       Reverse = SA.getOffset() < SB.getOffset();
     } else if (!isa<MCDummyFragment>(FA)) {
-      // Testing FA < FB is slow. Use setLayoutOrder to speed up computation.
-      // The formal layout order will be finalized in MCAssembler::layout.
-      if (FA->getLayoutOrder() == 0 || FB->getLayoutOrder()== 0) {
-        unsigned LayoutOrder = 0;
-        for (MCFragment &F : *FA->getParent())
-          F.setLayoutOrder(++LayoutOrder);
-      }
-      Reverse = FA->getLayoutOrder() < FB->getLayoutOrder();
+      Reverse = std::find_if(std::next(FA->getIterator()), SecA.end(),
+                             [&](auto &I) { return &I == FB; }) != SecA.end();
     }
 
     uint64_t SAOffset = SA.getOffset(), SBOffset = SB.getOffset();
diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
index 7d0826802d0af..6e09caaab0957 100644
--- a/llvm/lib/MC/MCFragment.cpp
+++ b/llvm/lib/MC/MCFragment.cpp
@@ -274,6 +274,9 @@ void MCFragment::destroy() {
     case FT_Align:
       delete cast<MCAlignFragment>(this);
       return;
+    case FT_NeverAlign:
+      delete cast<MCNeverAlignFragment>(this);
+      return;
     case FT_Data:
       delete cast<MCDataFragment>(this);
       return;
@@ -342,6 +345,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
   OS << "<";
   switch (getKind()) {
   case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
+  case MCFragment::FT_NeverAlign:
+    OS << "MCNeverAlignFragment";
+    break;
   case MCFragment::FT_Data:  OS << "MCDataFragment"; break;
   case MCFragment::FT_CompactEncodedInst:
     OS << "MCCompactEncodedInstFragment"; break;
@@ -381,6 +387,12 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
        << " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
     break;
   }
+  case MCFragment::FT_NeverAlign: {
+    const MCNeverAlignFragment *NAF = cast<MCNeverAlignFragment>(this);
+    OS << "\n       ";
+    OS << " Alignment:" << NAF->getAlignment() << ">";
+    break;
+  }
   case MCFragment::FT_Data:  {
     const auto *DF = cast<MCDataFragment>(this);
     OS << "\n       ";
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 0ccade91677a4..117475b7dd90b 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -658,6 +658,11 @@ void MCObjectStreamer::emitCodeAlignment(Align Alignment,
   cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true, STI);
 }
 
+void MCObjectStreamer::emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
+                                               const MCSubtargetInfo &STI) {
+  insert(new MCNeverAlignFragment(ByteAlignment, STI));
+}
+
 void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
                                          unsigned char Value,
                                          SMLoc Loc) {
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index 199d865ea3496..a97cba6c89972 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -1235,6 +1235,8 @@ void MCStreamer::emitValueToAlignment(Align Alignment, int64_t Value,
                                       unsigned MaxBytesToEmit) {}
 void MCStreamer::emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI,
                                    unsigned MaxBytesToEmit) {}
+void MCStreamer::emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
+                                         const MCSubtargetInfo &STI) {}
 void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value,
                                    SMLoc Loc) {}
 void MCStreamer::emitBundleAlignMode(Align Alignment) {}
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 6623106109316..6c6bd2cf31e86 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1153,6 +1153,7 @@ class X86AsmParser : public MCTargetAsmParser {
   bool parseDirectiveArch();
   bool parseDirectiveNops(SMLoc L);
   bool parseDirectiveEven(SMLoc L);
+  bool parseDirectiveAvoidEndAlign(SMLoc L);
   bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
 
   /// CodeView FPO data directives.
@@ -4601,6 +4602,8 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
     return false;
   } else if (IDVal == ".nops")
     return parseDirectiveNops(DirectiveID.getLoc());
+  else if (IDVal == ".avoid_end_align")
+    return parseDirectiveAvoidEndAlign(DirectiveID.getLoc());
   else if (IDVal == ".even")
     return parseDirectiveEven(DirectiveID.getLoc());
   else if (IDVal == ".cv_fpo_proc")
@@ -4695,6 +4698,27 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) {
   return false;
 }
 
+/// Directive for NeverAlign fragment testing, not for general usage!
+/// parseDirectiveAvoidEndAlign
+///  ::= .avoid_end_align alignment
+bool X86AsmParser::parseDirectiveAvoidEndAlign(SMLoc L) {
+  int64_t Alignment = 0;
+  SMLoc AlignmentLoc;
+  AlignmentLoc = getTok().getLoc();
+  if (getParser().checkForValidSection() ||
+      getParser().parseAbsoluteExpression(Alignment))
+    return true;
+
+  if (getParser().parseEOL("unexpected token in directive"))
+    return true;
+
+  if (Alignment <= 0)
+    return Error(AlignmentLoc, "expected a positive alignment");
+
+  getParser().getStreamer().emitNeverAlignCodeAtEnd(Alignment, getSTI());
+  return false;
+}
+
 /// ParseDirectiveCode
 ///  ::= .code16 | .code32 | .code64
 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
diff --git a/llvm/test/MC/X86/directive-avoid_end_align.s b/llvm/test/MC/X86/directive-avoid_end_align.s
new file mode 100644
index 0000000000000..1d748401edc12
--- /dev/null
+++ b/llvm/test/MC/X86/directive-avoid_end_align.s
@@ -0,0 +1,208 @@
+# RUN: llvm-mc -triple=x86_64 -filetype=obj %s | llvm-objdump --no-show-raw-insn -d - | FileCheck %s
+# RUN: not llvm-mc -triple=x86_64 --defsym ERR=1 %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR
+
+# avoid_end_align has no effect since test doesn't end at alignment boundary:
+.avoid_end_align 64
+# CHECK-NOT: nop
+  testl %eax, %eax
+# CHECK: testl %eax, %eax
+  je  .LBB0
+
+.fill 58, 1, 0x00
+# NeverAlign followed by MCDataFragment:
+# avoid_end_align inserts nop because `test` would end at alignment boundary:
+.avoid_end_align 64
+# CHECK: 			3e: nop
+  testl %eax, %eax
+# CHECK-NEXT: 3f: testl %eax, %eax
+  je  .LBB0
+# CHECK-NEXT: 41: je
+.LBB0:
+  retq
+
+.p2align 6
+.L0:
+.nops 57
+  int3
+# NeverAlign followed by RelaxableFragment:
+.avoid_end_align 64
+# CHECK: 			ba: nop
+  cmpl $(.L1-.L0), %eax
+# CHECK-NEXT: bb: cmpl
+  je  .L0
+# CHECK-NEXT: c1: je
+.nops 65
+.L1:
+
+###############################################################################
+# Experiment A:
+# Check that NeverAlign doesn't introduce infinite loops in layout.
+# Control:
+# 1. NeverAlign fragment is not added,
+# 2. Short formats of cmp and jcc are used (3 and 2 bytes respectively),
+# 3. cmp and jcc are placed such that to be split by 64B alignment boundary.
+# 4. jcc would be relaxed to a longer format if at least one byte is added
+#    between .L10 and je itself, e.g. by adding a NeverAlign padding byte,
+#    or relaxing cmp instruction.
+# 5. cmp would be relaxed to a longer format if at least one byte is added
+#    between .L11 and .L12, e.g. due to relaxing jcc instruction.
+.p2align 6
+# CHECK:      140: int3
+.fill 2, 1, 0xcc
+.L10:
+.nops 122
+  int3
+# CHECK:      1bc: int3
+# no avoid_end_align here
+# CHECK-NOT:  nop
+  cmp $(.L12-.L11), %eax
+# CHECK:      1bd: cmpl
+.L11:
+  je  .L10
+# CHECK-NEXT: 1c0: je
+.nops 125
+.L12:
+
+# Experiment:
+# Same setup as control, except NeverAlign fragment is added before cmp.
+# Expected effect:
+# 1. NeverAlign pads cmp+jcc by one byte since cmp and jcc are split by a 64B
+#    alignment boundary,
+# 2. This extra byte forces jcc relaxation to a longer format (Control rule #4),
+# 3. This results in an cmp relaxation (Control rule #5),
+# 4. Which in turn makes NeverAlign fragment unnecessary as cmp and jcc
+#    are no longer split by an alignment boundary (cmp crosses the boundary).
+# 5. NeverAlign padding is removed.
+# 6. cmp and jcc instruction remain in relaxed form.
+# 7. Relaxation converges, layout succeeds.
+.p2align 6
+# CHECK:      240: int3
+.fill 2, 1, 0xcc
+.L20:
+.nops 122
+  int3
+# CHECK:      2bc: int3
+.avoid_end_align 64
+# CHECK-NOT: 	nop
+  cmp $(.L22-.L21), %eax
+# CHECK-NEXT: 2bd: cmpl
+.L21:
+  je  .L20
+# CHECK-NEXT: 2c3: je
+.nops 125
+.L22:
+
+###############################################################################
+# Experiment B: similar to exp A, but we check that once NeverAlign padding is
+# removed from the layout (exp A, experiment step 5), the increased distance
+# between the symbols L33 and L34 triggers the relaxation of instruction at
+# label L32.
+#
+# Control 1: using a one-byte instruction at L33 (site of NeverAlign) leads to
+# steps 2-3 of exp A, experiment:
+# 2. This extra byte forces jcc relaxation to a longer format (Control rule #4),
+# 3. This results in an cmp relaxation (Control rule #5),
+# => short cmp under L32
+.p2align 6
+# CHECK:      380: int3
+.fill 2, 1, 0xcc
+.L30:
+.nops 122
+  int3
+# CHECK:      3fc: int3
+  hlt
+#.avoid_end_align 64
+.L33:
+  cmp $(.L32-.L31), %eax
+# CHECK:      3fe: cmpl
+.L31:
+  je  .L30
+# CHECK-NEXT: 404: je
+.nops 114
+.p2align 1
+  int3
+  int3
+# CHECK:      47c: int3
+.L34:
+.nops 9
+.L32:
+  cmp $(.L33-.L34), %eax
+# CHECK:      487: cmp
+# note that the size of cmp is 48a-487 == 3 bytes (distance is exactly -128)
+  int3
+# CHECK-NEXT: 48a: int3
+
+# Control 2: leaving out a byte at L43 (site of NeverAlign), plus
+# relaxed jcc and cmp leads to a relaxed cmp under L42 (-129 as cmp's immediate)
+.p2align 6
+# CHECK:      4c0: int3
+.fill 2, 1, 0xcc
+.L40:
+.nops 122
+  int3
+# CHECK:      53c: int3
+#  int3
+#.avoid_end_align 64
+.L43:
+  cmp $(.L42-.L41+0x100), %eax
+# CHECK:      53d: cmpl
+.L41:
+  je  .L40+0x100
+# CHECK-NEXT: 543: je
+.nops 114
+.p2align 1
+  int3
+  int3
+# CHECK:      5bc: int3
+.L44:
+.nops 9
+.L42:
+  cmp $(.L43-.L44), %eax
+# CHECK:      5c7: cmp
+# note that the size of cmp is 5cd-5c7 == 6 bytes (distance is exactly -129)
+  int3
+# CHECK-NEXT: 5cd: int3
+
+# Experiment
+# Checking if removing NeverAlign padding at L53 as a result of alignment and
+# relaxation of cmp and jcc following it (see exp A), thus reproducing the case
+# in Control 2 (getting a relaxed cmp under L52), is handled correctly.
+.p2align 6
+# CHECK:      600: int3
+.fill 2, 1, 0xcc
+.L50:
+.nops 122
+  int3
+# CHECK:      67c: int3
+.avoid_end_align 64
+.L53:
+# CHECK-NOT: 	nop
+  cmp $(.L52-.L51), %eax
+# CHECK-NEXT: 67d: cmpl
+.L51:
+  je  .L50
+# CHECK-NEXT: 683: je
+.nops 114
+.p2align 1
+  int3
+  int3
+# CHECK:      6fc: int3
+.L54:
+.nops 9
+.L52:
+  cmp $(.L53-.L54), %eax
+# CHECK:      707: cmp
+# note that the size of cmp is 70d-707 == 6 bytes (distance is exactly -129)
+  int3
+# CHECK-NEXT: 70d: int3
+
+.ifdef ERR
+# ERR: {{.*}}.s:[[#@LINE+1]]:17: error: unknown token in expression
+.avoid_end_align
+# ERR: {{.*}}.s:[[#@LINE+1]]:18: error: expected absolute expression
+.avoid_end_align x
+# ERR: {{.*}}.s:[[#@LINE+1]]:18: error: expected a positive alignment
+.avoid_end_align 0
+# ERR: {{.*}}.s:[[#@LINE+1]]:20: error: unexpected token in directive
+.avoid_end_align 64, 0
+.endif

>From 92212c96ea169d26ac10bf8d750539bc5dd72c49 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 17 Jun 2024 15:39:02 -0700
Subject: [PATCH 02/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index f0fcb1c130002..2bca83c9d11ec 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,6 +421,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto& [_, BF] : BC.getBinaryFunctions()) {
+    if (!ProfiledFunctions.count(&BF))
+      continue;
     StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
 

>From 2497922ccc46e3189870563b1fe819b67172778d Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 17 Jun 2024 15:39:39 -0700
Subject: [PATCH 03/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2bca83c9d11ec..56474a67307ed 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -417,10 +417,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map<size_t, BinaryFunction*> StrictBinaryFunctionHashes;
+  std::unordered_map<size_t, BinaryFunction *> StrictBinaryFunctionHashes;
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
-  for (auto& [_, BF] : BC.getBinaryFunctions()) {
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
     if (!ProfiledFunctions.count(&BF))
       continue;
     StrictBinaryFunctionHashes[BF.getHash()] = &BF;
@@ -428,7 +428,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   for (auto YamlBF : YamlBP.Functions) {
     auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-    if (It != StrictBinaryFunctionHashes.end() && !ProfiledFunctions.count(It->second)) {
+    if (It != StrictBinaryFunctionHashes.end() &&
+        !ProfiledFunctions.count(It->second)) {
       auto *BF = It->second;
       matchProfileToFunction(YamlBF, *BF);
     }

>From 8e7b2229a69c3795e723404c56e0d4298eef412a Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 17 Jun 2024 15:55:58 -0700
Subject: [PATCH 04/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp       | 2 +-
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 56474a67307ed..779d60bce3b66 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -421,7 +421,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
 
   for (auto &[_, BF] : BC.getBinaryFunctions()) {
-    if (!ProfiledFunctions.count(&BF))
+    if (ProfiledFunctions.count(&BF))
       continue;
     StrictBinaryFunctionHashes[BF.getHash()] = &BF;
   }
diff --git a/bolt/test/X86/profile-passthrough-block.test b/bolt/test/X86/profile-passthrough-block.test
index 1b875885260dc..ed2a8117ddfc4 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:            main
     fid:             0
-    hash:            0x0000000000000000
+    hash:            0x0000000000000001
     exec:            1
     nblocks:         6
     blocks:

>From ef5f0dac9185dbb7a62345938d4f309c3379a85d Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 17 Jun 2024 15:58:22 -0700
Subject: [PATCH 05/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 779d60bce3b66..e3d30bfdb74e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -427,6 +427,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   for (auto YamlBF : YamlBP.Functions) {
+    if (YamlBF.Used)
+      continue;
     auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
     if (It != StrictBinaryFunctionHashes.end() &&
         !ProfiledFunctions.count(It->second)) {

>From 41ce2897a445e47dfe685da66b4af080824e78ed Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 17 Jun 2024 16:00:27 -0700
Subject: [PATCH 06/21] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/profile-passthrough-block.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/test/X86/profile-passthrough-block.test b/bolt/test/X86/profile-passthrough-block.test
index ed2a8117ddfc4..1b875885260dc 100644
--- a/bolt/test/X86/profile-passthrough-block.test
+++ b/bolt/test/X86/profile-passthrough-block.test
@@ -57,7 +57,7 @@ header:
 functions:
   - name:            main
     fid:             0
-    hash:            0x0000000000000001
+    hash:            0x0000000000000000
     exec:            1
     nblocks:         6
     blocks:

>From 99cf8918e945356d12b5d29fe8174c610f305559 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Tue, 18 Jun 2024 14:19:45 -0700
Subject: [PATCH 07/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp        | 51 +++++++-------
 .../X86/hashing-based-function-matching.test  | 67 +++++++++++++++++++
 2 files changed, 93 insertions(+), 25 deletions(-)
 create mode 100644 bolt/test/X86/hashing-based-function-matching.test

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index e3d30bfdb74e4..78d46eea5c728 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -363,9 +363,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     return Profile.Hash == static_cast<uint64_t>(BF.getHash());
   };
 
-  // We have to do 2 passes since LTO introduces an ambiguity in function
-  // names. The first pass assigns profiles that match 100% by name and
-  // by hash. The second pass allows name ambiguity for LTO private functions.
+  // This first pass assigns profiles that match 100% by name and by hash.
   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
     if (!BF)
       continue;
@@ -383,6 +381,30 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       matchProfileToFunction(YamlBF, Function);
   }
 
+  // Uses the strict hash of profiled and binary functions to match functions
+  // that are not matched by name or common name.
+  std::unordered_map<size_t, BinaryFunction *> StrictBinaryFunctionHashes;
+  StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
+
+  for (auto &[_, BF] : BC.getBinaryFunctions()) {
+    if (ProfiledFunctions.count(&BF))
+      continue;
+    BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
+    StrictBinaryFunctionHashes[BF.getHash()] = &BF;
+  }
+
+  for (auto YamlBF : YamlBP.Functions) {
+    if (YamlBF.Used)
+      continue;
+    auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
+    if (It != StrictBinaryFunctionHashes.end() &&
+        !ProfiledFunctions.count(It->second)) {
+      auto *BF = It->second;
+      matchProfileToFunction(YamlBF, *BF);
+    }
+  }
+
+  // This second pass allows name ambiguity for LTO private functions.
   for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
     if (!LTOCommonNameFunctionMap.contains(CommonName))
       continue;
@@ -415,33 +437,12 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     if (!YamlBF.Used && BF && !ProfiledFunctions.count(BF))
       matchProfileToFunction(YamlBF, *BF);
 
-  // Uses the strict hash of profiled and binary functions to match functions
-  // that are not matched by name or common name.
-  std::unordered_map<size_t, BinaryFunction *> StrictBinaryFunctionHashes;
-  StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
-
-  for (auto &[_, BF] : BC.getBinaryFunctions()) {
-    if (ProfiledFunctions.count(&BF))
-      continue;
-    StrictBinaryFunctionHashes[BF.getHash()] = &BF;
-  }
-
-  for (auto YamlBF : YamlBP.Functions) {
-    if (YamlBF.Used)
-      continue;
-    auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-    if (It != StrictBinaryFunctionHashes.end() &&
-        !ProfiledFunctions.count(It->second)) {
-      auto *BF = It->second;
-      matchProfileToFunction(YamlBF, *BF);
-    }
-  }
-
   for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
     if (!YamlBF.Used && opts::Verbosity >= 1)
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
              << '\n';
 
+
   // Set for parseFunctionProfile().
   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
   NormalizeByCalls = usesEvent("branches");
diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test
new file mode 100644
index 0000000000000..3c2cd834f90bf
--- /dev/null
+++ b/bolt/test/X86/hashing-based-function-matching.test
@@ -0,0 +1,67 @@
+## Test YAMLProfileReader support for pass-through blocks in non-matching edges:
+## match the profile edge A -> C to the CFG with blocks A -> B -> C.
+
+# REQUIRES: system-linux
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=1 \
+# RUN:   --print-cfg 2>&1 | FileCheck %s
+
+# CHECK: Binary Function "main" after building cfg
+# CHECK: Profile Acc : 100.0%
+# CHECK-NOT: BOLT-WARNING: no successor for block .LFT0 that matches index 3 or block .Ltmp0
+
+#--- main.s
+.globl main
+.type	main, @function
+main:
+  .cfi_startproc
+.LBB00:
+  pushq   %rbp
+  movq    %rsp, %rbp
+  subq    $16, %rsp
+  testq   %rax, %rax
+  js      .LBB03
+.LBB01:
+  jne     .LBB04
+.LBB02:
+  nop
+.LBB03:
+  xorl    %eax, %eax
+  addq    $16, %rsp
+  popq    %rbp
+  retq
+.LBB04:
+  xorl    %eax, %eax
+  addq    $16, %rsp
+  popq    %rbp
+  retq
+## For relocations against .text
+.LBB05:
+  call exit
+  .cfi_endproc
+  .size	main, .-main
+
+#--- yaml
+---
+header:
+  profile-version: 1
+  binary-name:     'profile-passthrough-block.s.tmp.exe'
+  binary-build-id: '<unknown>'
+  profile-flags:   [ lbr ]
+  profile-origin:  branch profile reader
+  profile-events:  ''
+  dfs-order:       false
+  hash-func:       xxh3
+functions:
+  - name:            main2
+    fid:             0
+    hash:            0x72f82deaa6fe65fb
+    exec:            1
+    nblocks:         6
+    blocks:
+      - bid:             1
+        insns:           1
+        succ:            [ { bid: 3, cnt: 1} ]
+...

>From e11820ff0121d9481a24b825f1910935e6d5789d Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 08:28:02 -0700
Subject: [PATCH 08/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp        | 42 +++++++++----------
 .../X86/hashing-based-function-matching.test  |  4 +-
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 78d46eea5c728..7209168c0f81d 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -363,6 +363,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     return Profile.Hash == static_cast<uint64_t>(BF.getHash());
   };
 
+  // Computes hash for binary functions.
+  if (!opts::IgnoreHash)
+    for (auto &[_, BF] : BC.getBinaryFunctions())
+      BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
+
   // This first pass assigns profiles that match 100% by name and by hash.
   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
     if (!BF)
@@ -372,35 +377,29 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     // the profile.
     Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
 
-    // Recompute hash once per function.
-    if (!opts::IgnoreHash)
-      Function.computeHash(YamlBP.Header.IsDFSOrder,
-                           YamlBP.Header.HashFunction);
-
     if (profileMatches(YamlBF, Function))
       matchProfileToFunction(YamlBF, Function);
   }
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  std::unordered_map<size_t, BinaryFunction *> StrictBinaryFunctionHashes;
-  StrictBinaryFunctionHashes.reserve(BC.getBinaryFunctions().size());
+  if (!opts::IgnoreHash) {
+    std::unordered_map<size_t, BinaryFunction *> StrictHashToBF;
+    StrictHashToBF.reserve(BC.getBinaryFunctions().size());
 
-  for (auto &[_, BF] : BC.getBinaryFunctions()) {
-    if (ProfiledFunctions.count(&BF))
-      continue;
-    BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
-    StrictBinaryFunctionHashes[BF.getHash()] = &BF;
-  }
+    for (auto &[_, BF] : BC.getBinaryFunctions()) {
+      StrictHashToBF[BF.getHash()] = &BF;
+    }
 
-  for (auto YamlBF : YamlBP.Functions) {
-    if (YamlBF.Used)
-      continue;
-    auto It = StrictBinaryFunctionHashes.find(YamlBF.Hash);
-    if (It != StrictBinaryFunctionHashes.end() &&
-        !ProfiledFunctions.count(It->second)) {
-      auto *BF = It->second;
-      matchProfileToFunction(YamlBF, *BF);
+    for (auto YamlBF : YamlBP.Functions) {
+      if (YamlBF.Used)
+        continue;
+      auto It = StrictHashToBF.find(YamlBF.Hash);
+      if (It != StrictHashToBF.end() &&
+          !ProfiledFunctions.count(It->second)) {
+        auto *BF = It->second;
+        matchProfileToFunction(YamlBF, *BF);
+      }
     }
   }
 
@@ -442,7 +441,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
              << '\n';
 
-
   // Set for parseFunctionProfile().
   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
   NormalizeByCalls = usesEvent("branches");
diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test
index 3c2cd834f90bf..833450047284a 100644
--- a/bolt/test/X86/hashing-based-function-matching.test
+++ b/bolt/test/X86/hashing-based-function-matching.test
@@ -5,7 +5,7 @@
 # RUN: split-file %s %t
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
 # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
-# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=1 \
+# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
 # RUN:   --print-cfg 2>&1 | FileCheck %s
 
 # CHECK: Binary Function "main" after building cfg
@@ -57,7 +57,7 @@ header:
 functions:
   - name:            main2
     fid:             0
-    hash:            0x72f82deaa6fe65fb
+    hash:            0x72F82DEAA6FE65FB
     exec:            1
     nblocks:         6
     blocks:

>From 83858e2fcc565673bf10b030199d30153ba015b3 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 10:45:17 -0700
Subject: [PATCH 09/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 7209168c0f81d..124af2c25b9e4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -329,6 +329,9 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
 }
 
 bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
+  if (!opts::IgnoreHash) {
+    return true;
+  }
   for (StringRef Name : BF.getNames())
     if (ProfileFunctionNames.contains(Name))
       return true;

>From 73871266cf7709ae335be22428290cdc1efe410b Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 14:35:45 -0700
Subject: [PATCH 10/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp        | 41 ++++++++++++++++---
 bolt/lib/Rewrite/RewriteInstance.cpp          |  6 ++-
 bolt/lib/Utils/CommandLineOpts.cpp            |  5 +++
 .../X86/hashing-based-function-matching.test  |  6 +--
 4 files changed, 47 insertions(+), 11 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 124af2c25b9e4..603620aa483d7 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -22,6 +22,7 @@ namespace opts {
 extern cl::opt<unsigned> Verbosity;
 extern cl::OptionCategory BoltOptCategory;
 extern cl::opt<bool> InferStaleProfile;
+extern cl::opt<bool> MatchingFunctionsWithHash;
 
 static llvm::cl::opt<bool>
     IgnoreHash("profile-ignore-hash",
@@ -329,9 +330,6 @@ Error YAMLProfileReader::preprocessProfile(BinaryContext &BC) {
 }
 
 bool YAMLProfileReader::mayHaveProfileData(const BinaryFunction &BF) {
-  if (!opts::IgnoreHash) {
-    return true;
-  }
   for (StringRef Name : BF.getNames())
     if (ProfileFunctionNames.contains(Name))
       return true;
@@ -366,10 +364,26 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     return Profile.Hash == static_cast<uint64_t>(BF.getHash());
   };
 
+  uint64_t MatchedWithExactName = 0;
+  uint64_t MatchedWithHash = 0;
+  uint64_t MatchedWithLTOCommonName = 0;
+
   // Computes hash for binary functions.
-  if (!opts::IgnoreHash)
+  if (opts::MatchingFunctionsWithHash) {
     for (auto &[_, BF] : BC.getBinaryFunctions())
       BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
+  }
+  else {
+    for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
+      if (!BF)
+        continue;
+      BinaryFunction &Function = *BF;
+
+      if (!opts::IgnoreHash)
+        Function.computeHash(YamlBP.Header.IsDFSOrder,
+                             YamlBP.Header.HashFunction);
+    }
+  }
 
   // This first pass assigns profiles that match 100% by name and by hash.
   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
@@ -380,8 +394,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     // the profile.
     Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
 
-    if (profileMatches(YamlBF, Function))
+    if (profileMatches(YamlBF, Function)) {
       matchProfileToFunction(YamlBF, Function);
+      ++MatchedWithExactName;
+    }
   }
 
   // Uses the strict hash of profiled and binary functions to match functions
@@ -402,6 +418,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
           !ProfiledFunctions.count(It->second)) {
         auto *BF = It->second;
         matchProfileToFunction(YamlBF, *BF);
+        ++MatchedWithHash;
       }
     }
   }
@@ -420,6 +437,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       for (BinaryFunction *BF : Functions) {
         if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) {
           matchProfileToFunction(*YamlBF, *BF);
+          ++MatchedWithLTOCommonName;
           return true;
         }
       }
@@ -431,8 +449,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     // partially.
     if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
         !LTOProfiles.front()->Used &&
-        !ProfiledFunctions.count(*Functions.begin()))
+        !ProfiledFunctions.count(*Functions.begin())) {
       matchProfileToFunction(*LTOProfiles.front(), **Functions.begin());
+      ++MatchedWithLTOCommonName;
+    }
   }
 
   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
@@ -444,6 +464,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
              << '\n';
 
+  if (opts::Verbosity >= 2) {
+    outs() << "BOLT-INFO: matched " << MatchedWithExactName
+      << " functions with identical names\n";
+    outs() << "BOLT-INFO: matched " << MatchedWithHash
+      << " functions with hash\n";
+    outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
+      << " functions with matching LTO common names\n";
+  }
+
   // Set for parseFunctionProfile().
   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
   NormalizeByCalls = usesEvent("branches");
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 1a3a8af21d81b..c157e45e1d586 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -82,6 +82,7 @@ extern cl::opt<bool> Hugify;
 extern cl::opt<bool> Instrument;
 extern cl::opt<JumpTableSupportLevel> JumpTables;
 extern cl::opt<bool> KeepNops;
+extern cl::opt<bool> MatchingFunctionsWithHash;
 extern cl::list<std::string> ReorderData;
 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
 extern cl::opt<bool> TerminalTrap;
@@ -2982,6 +2983,9 @@ void RewriteInstance::selectFunctionsToProcess() {
     if (mustSkip(Function))
       return false;
 
+    if (opts::MatchingFunctionsWithHash)
+      return true;
+
     // If the list is not empty, only process functions from the list.
     if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) {
       // Regex check (-funcs and -funcs-file options).
@@ -2998,6 +3002,7 @@ void RewriteInstance::selectFunctionsToProcess() {
     }
 
     if (opts::Lite) {
+
       // Forcibly include functions specified in the -function-order file.
       if (opts::ReorderFunctions == ReorderFunctions::RT_USER) {
         for (const StringRef Name : Function.getNames())
@@ -3008,7 +3013,6 @@ void RewriteInstance::selectFunctionsToProcess() {
             if (ReorderFunctionsLTOCommonSet.contains(*LTOCommonName))
               return true;
       }
-
       if (ProfileReader && !ProfileReader->mayHaveProfileData(Function))
         return false;
 
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 41c89bc8aeba4..58d44a98b6218 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -128,6 +128,11 @@ cl::opt<bool>
                cl::desc("instrument code to generate accurate profile data"),
                cl::cat(BoltOptCategory));
 
+cl::opt<bool> MatchingFunctionsWithHash("stale-matching-matching-functions-with-hash",
+  cl::desc("Matching functions using hash"),
+  cl::Hidden,
+  cl::cat(BoltCategory));
+
 cl::opt<std::string>
 OutputFilename("o",
   cl::desc("<output file>"),
diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test
index 833450047284a..83819f4c539d3 100644
--- a/bolt/test/X86/hashing-based-function-matching.test
+++ b/bolt/test/X86/hashing-based-function-matching.test
@@ -6,11 +6,9 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
 # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
 # RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
-# RUN:   --print-cfg 2>&1 | FileCheck %s
+# RUN:   --print-cfg --stale-matching-matching-functions-with-hash 2>&1 --profile-ignore-hash=0 | FileCheck %s
 
-# CHECK: Binary Function "main" after building cfg
-# CHECK: Profile Acc : 100.0%
-# CHECK-NOT: BOLT-WARNING: no successor for block .LFT0 that matches index 3 or block .Ltmp0
+# CHECK: BOLT-INFO: matched 1 functions with hash
 
 #--- main.s
 .globl main

>From baec71c1a91b3ac7be09ee9774c8542430320ad0 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 14:37:10 -0700
Subject: [PATCH 11/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 603620aa483d7..dcc5d23745cb2 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -406,11 +406,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     std::unordered_map<size_t, BinaryFunction *> StrictHashToBF;
     StrictHashToBF.reserve(BC.getBinaryFunctions().size());
 
-    for (auto &[_, BF] : BC.getBinaryFunctions()) {
+    for (auto &[_, BF] : BC.getBinaryFunctions())
       StrictHashToBF[BF.getHash()] = &BF;
-    }
 
-    for (auto YamlBF : YamlBP.Functions) {
+    for (yaml::bolt::BinaryFunctionProfile& YamlBF : YamlBP.Functions) {
       if (YamlBF.Used)
         continue;
       auto It = StrictHashToBF.find(YamlBF.Hash);

>From ff68ace13b1ece9c93322a3f1833f574b493c13b Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 14:47:04 -0700
Subject: [PATCH 12/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 2 +-
 bolt/lib/Rewrite/RewriteInstance.cpp   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index dcc5d23745cb2..d04d54e1a9170 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -402,7 +402,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  if (!opts::IgnoreHash) {
+  if (opts::MatchingFunctionsWithHash) {
     std::unordered_map<size_t, BinaryFunction *> StrictHashToBF;
     StrictHashToBF.reserve(BC.getBinaryFunctions().size());
 
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index c157e45e1d586..7afc7c920318c 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -3002,7 +3002,6 @@ void RewriteInstance::selectFunctionsToProcess() {
     }
 
     if (opts::Lite) {
-
       // Forcibly include functions specified in the -function-order file.
       if (opts::ReorderFunctions == ReorderFunctions::RT_USER) {
         for (const StringRef Name : Function.getNames())
@@ -3013,6 +3012,7 @@ void RewriteInstance::selectFunctionsToProcess() {
             if (ReorderFunctionsLTOCommonSet.contains(*LTOCommonName))
               return true;
       }
+
       if (ProfileReader && !ProfileReader->mayHaveProfileData(Function))
         return false;
 

>From ef2eaaa3810e26f97088981450dee5b623ca1143 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 14:48:11 -0700
Subject: [PATCH 13/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 14 ++++++--------
 bolt/lib/Utils/CommandLineOpts.cpp     |  8 ++++----
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index d04d54e1a9170..fd1a02ff63a3b 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -372,8 +372,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   if (opts::MatchingFunctionsWithHash) {
     for (auto &[_, BF] : BC.getBinaryFunctions())
       BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
-  }
-  else {
+  } else {
     for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
       if (!BF)
         continue;
@@ -409,12 +408,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     for (auto &[_, BF] : BC.getBinaryFunctions())
       StrictHashToBF[BF.getHash()] = &BF;
 
-    for (yaml::bolt::BinaryFunctionProfile& YamlBF : YamlBP.Functions) {
+    for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
       if (YamlBF.Used)
         continue;
       auto It = StrictHashToBF.find(YamlBF.Hash);
-      if (It != StrictHashToBF.end() &&
-          !ProfiledFunctions.count(It->second)) {
+      if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) {
         auto *BF = It->second;
         matchProfileToFunction(YamlBF, *BF);
         ++MatchedWithHash;
@@ -465,11 +463,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   if (opts::Verbosity >= 2) {
     outs() << "BOLT-INFO: matched " << MatchedWithExactName
-      << " functions with identical names\n";
+           << " functions with identical names\n";
     outs() << "BOLT-INFO: matched " << MatchedWithHash
-      << " functions with hash\n";
+           << " functions with hash\n";
     outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
-      << " functions with matching LTO common names\n";
+           << " functions with matching LTO common names\n";
   }
 
   // Set for parseFunctionProfile().
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 58d44a98b6218..8144af29cba4e 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -128,10 +128,10 @@ cl::opt<bool>
                cl::desc("instrument code to generate accurate profile data"),
                cl::cat(BoltOptCategory));
 
-cl::opt<bool> MatchingFunctionsWithHash("stale-matching-matching-functions-with-hash",
-  cl::desc("Matching functions using hash"),
-  cl::Hidden,
-  cl::cat(BoltCategory));
+cl::opt<bool>
+    MatchingFunctionsWithHash("stale-matching-matching-functions-with-hash",
+                              cl::desc("Matching functions using hash"),
+                              cl::Hidden, cl::cat(BoltCategory));
 
 cl::opt<std::string>
 OutputFilename("o",

>From 3ed600c2f22356177f46e36b88eecc7ebd6e03ff Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 15:05:10 -0700
Subject: [PATCH 14/21] spr amend

Created using spr 1.3.4
---
 bolt/docs/CommandLineArgumentReference.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index 8887d1f5d5bd4..0a5af3933b453 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -798,6 +798,10 @@
   bytes. Default value: 0, i.e. split iff the size is reduced. Note that on some
   architectures the size can increase after splitting.
 
+- `--stale-matching-matching-functions-with-hash`
+
+  Turns on matching functions with exact hash
+
 - `--stale-matching-max-func-size=<uint>`
 
   The maximum size of a function to consider for inference.
@@ -1161,4 +1165,4 @@
 
 - `--print-options`
 
-  Print non-default options after command line parsing
\ No newline at end of file
+  Print non-default options after command line parsing

>From 4b13659daa0b5946e1328bfb85b6e6d32bc26c1c Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 15:41:14 -0700
Subject: [PATCH 15/21] spr amend

Created using spr 1.3.4
---
 bolt/test/X86/hashing-based-function-matching.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test
index 83819f4c539d3..a6f96ed79bbad 100644
--- a/bolt/test/X86/hashing-based-function-matching.test
+++ b/bolt/test/X86/hashing-based-function-matching.test
@@ -45,7 +45,7 @@ main:
 ---
 header:
   profile-version: 1
-  binary-name:     'profile-passthrough-block.s.tmp.exe'
+  binary-name:     'hashing-based-function-matching.s.tmp.exe'
   binary-build-id: '<unknown>'
   profile-flags:   [ lbr ]
   profile-origin:  branch profile reader

>From 0c18b86a6c5a19364d36b28cedf583082f94c6f7 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 15:18:22 -0700
Subject: [PATCH 16/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index fd1a02ff63a3b..93705ca7767b7 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -369,20 +369,12 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   uint64_t MatchedWithLTOCommonName = 0;
 
   // Computes hash for binary functions.
-  if (opts::MatchingFunctionsWithHash) {
+ if (opts::MatchingFunctionsWithHash)
     for (auto &[_, BF] : BC.getBinaryFunctions())
       BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
-  } else {
-    for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
-      if (!BF)
-        continue;
-      BinaryFunction &Function = *BF;
-
-      if (!opts::IgnoreHash)
-        Function.computeHash(YamlBP.Header.IsDFSOrder,
-                             YamlBP.Header.HashFunction);
-    }
-  }
+  else if (!opts::IgnoreHash)
+    for (BinaryFunction *BF : ProfileBFs)
+      BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
 
   // This first pass assigns profiles that match 100% by name and by hash.
   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {

>From eb6cb88c46c3118308f08ec59e41d4f4d0c51e39 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 15:30:00 -0700
Subject: [PATCH 17/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 93705ca7767b7..4a70d923a23b0 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -480,6 +480,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   BC.setNumUnusedProfiledObjects(NumUnused);
+  
+  for (BinaryFunction* BF : BC.getAllBinaryFunctions()) 
+    if (ProfiledFunctions.find(BF) == ProfiledFunctions.end())
+      BF->setIgnored();
 
   return Error::success();
 }

>From a2c9c826cea2414d17bf4096c26d60ffd814ad3b Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 15:32:54 -0700
Subject: [PATCH 18/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 4a70d923a23b0..2e0180818a9e3 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -23,6 +23,7 @@ extern cl::opt<unsigned> Verbosity;
 extern cl::OptionCategory BoltOptCategory;
 extern cl::opt<bool> InferStaleProfile;
 extern cl::opt<bool> MatchingFunctionsWithHash;
+extern cl::opt<bool> Lite;
 
 static llvm::cl::opt<bool>
     IgnoreHash("profile-ignore-hash",
@@ -481,9 +482,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   BC.setNumUnusedProfiledObjects(NumUnused);
   
-  for (BinaryFunction* BF : BC.getAllBinaryFunctions()) 
-    if (ProfiledFunctions.find(BF) == ProfiledFunctions.end())
-      BF->setIgnored();
+  if (opts::Lite)  
+    for (BinaryFunction* BF : BC.getAllBinaryFunctions()) 
+      if (ProfiledFunctions.find(BF) == ProfiledFunctions.end())
+        BF->setIgnored();
+  
 
   return Error::success();
 }

>From 4e1b758a7c5cf99f8509d708bcdea51f83e0090f Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 15:40:29 -0700
Subject: [PATCH 19/21] spr amend

Created using spr 1.3.4
---
 bolt/docs/CommandLineArgumentReference.md          |  2 +-
 bolt/lib/Profile/YAMLProfileReader.cpp             | 13 ++++++-------
 bolt/lib/Rewrite/RewriteInstance.cpp               |  4 ++--
 bolt/lib/Utils/CommandLineOpts.cpp                 |  6 +++---
 bolt/test/X86/hashing-based-function-matching.test |  2 +-
 5 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index 0a5af3933b453..994651d76342d 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -798,7 +798,7 @@
   bytes. Default value: 0, i.e. split iff the size is reduced. Note that on some
   architectures the size can increase after splitting.
 
-- `--stale-matching-matching-functions-with-hash`
+- `--match-profile-with-function-hash`
 
   Turns on matching functions with exact hash
 
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 2e0180818a9e3..94aaac532d5ca 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -22,7 +22,7 @@ namespace opts {
 extern cl::opt<unsigned> Verbosity;
 extern cl::OptionCategory BoltOptCategory;
 extern cl::opt<bool> InferStaleProfile;
-extern cl::opt<bool> MatchingFunctionsWithHash;
+extern cl::opt<bool> MatchProfileWithFunctionHash;
 extern cl::opt<bool> Lite;
 
 static llvm::cl::opt<bool>
@@ -370,7 +370,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   uint64_t MatchedWithLTOCommonName = 0;
 
   // Computes hash for binary functions.
- if (opts::MatchingFunctionsWithHash)
+  if (opts::MatchProfileWithFunctionHash)
     for (auto &[_, BF] : BC.getBinaryFunctions())
       BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
   else if (!opts::IgnoreHash)
@@ -394,7 +394,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   // Uses the strict hash of profiled and binary functions to match functions
   // that are not matched by name or common name.
-  if (opts::MatchingFunctionsWithHash) {
+  if (opts::MatchProfileWithFunctionHash) {
     std::unordered_map<size_t, BinaryFunction *> StrictHashToBF;
     StrictHashToBF.reserve(BC.getBinaryFunctions().size());
 
@@ -481,12 +481,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
   }
 
   BC.setNumUnusedProfiledObjects(NumUnused);
-  
-  if (opts::Lite)  
-    for (BinaryFunction* BF : BC.getAllBinaryFunctions()) 
+
+  if (opts::Lite)
+    for (BinaryFunction *BF : BC.getAllBinaryFunctions())
       if (ProfiledFunctions.find(BF) == ProfiledFunctions.end())
         BF->setIgnored();
-  
 
   return Error::success();
 }
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 7afc7c920318c..ec234add8a6f5 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -82,7 +82,7 @@ extern cl::opt<bool> Hugify;
 extern cl::opt<bool> Instrument;
 extern cl::opt<JumpTableSupportLevel> JumpTables;
 extern cl::opt<bool> KeepNops;
-extern cl::opt<bool> MatchingFunctionsWithHash;
+extern cl::opt<bool> MatchProfileWithFunctionHash;
 extern cl::list<std::string> ReorderData;
 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
 extern cl::opt<bool> TerminalTrap;
@@ -2983,7 +2983,7 @@ void RewriteInstance::selectFunctionsToProcess() {
     if (mustSkip(Function))
       return false;
 
-    if (opts::MatchingFunctionsWithHash)
+    if (opts::MatchProfileWithFunctionHash)
       return true;
 
     // If the list is not empty, only process functions from the list.
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 8144af29cba4e..0724e627dfd19 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -129,9 +129,9 @@ cl::opt<bool>
                cl::cat(BoltOptCategory));
 
 cl::opt<bool>
-    MatchingFunctionsWithHash("stale-matching-matching-functions-with-hash",
-                              cl::desc("Matching functions using hash"),
-                              cl::Hidden, cl::cat(BoltCategory));
+    MatchProfileWithFunctionHash("match-profile-with-function-hash",
+                                 cl::desc("Match profile with function hash"),
+                                 cl::Hidden, cl::cat(BoltCategory));
 
 cl::opt<std::string>
 OutputFilename("o",
diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test
index a6f96ed79bbad..60980b639b7e8 100644
--- a/bolt/test/X86/hashing-based-function-matching.test
+++ b/bolt/test/X86/hashing-based-function-matching.test
@@ -6,7 +6,7 @@
 # RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
 # RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
 # RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
-# RUN:   --print-cfg --stale-matching-matching-functions-with-hash 2>&1 --profile-ignore-hash=0 | FileCheck %s
+# RUN:   --print-cfg --match-profile-with-function-hash 2>&1 --profile-ignore-hash=0 | FileCheck %s
 
 # CHECK: BOLT-INFO: matched 1 functions with hash
 

>From f2ba96ab823bafcb45416a3f78900a4ead4b67b7 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Sat, 22 Jun 2024 22:09:07 -0700
Subject: [PATCH 20/21] spr amend

Created using spr 1.3.4
---
 bolt/lib/Profile/YAMLProfileReader.cpp             | 4 ++--
 bolt/test/X86/hashing-based-function-matching.test | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 94aaac532d5ca..6c4eece4ddb1b 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -406,7 +406,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
         continue;
       auto It = StrictHashToBF.find(YamlBF.Hash);
       if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) {
-        auto *BF = It->second;
+        BinaryFunction *BF = It->second;
         matchProfileToFunction(YamlBF, *BF);
         ++MatchedWithHash;
       }
@@ -484,7 +484,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   if (opts::Lite)
     for (BinaryFunction *BF : BC.getAllBinaryFunctions())
-      if (ProfiledFunctions.find(BF) == ProfiledFunctions.end())
+      if (!BF->hasProfile())
         BF->setIgnored();
 
   return Error::success();
diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test
index 60980b639b7e8..4426da085bbd9 100644
--- a/bolt/test/X86/hashing-based-function-matching.test
+++ b/bolt/test/X86/hashing-based-function-matching.test
@@ -1,5 +1,4 @@
-## Test YAMLProfileReader support for pass-through blocks in non-matching edges:
-## match the profile edge A -> C to the CFG with blocks A -> B -> C.
+## Tests function matching in YAMLProfileReader by function hash.
 
 # REQUIRES: system-linux
 # RUN: split-file %s %t

>From 6a5618851186cb88180e92ad0d9efe3aa62bfe43 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 24 Jun 2024 13:16:58 -0700
Subject: [PATCH 21/21] Drop end of line

Created using spr 1.3.4
---
 bolt/docs/CommandLineArgumentReference.md | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index 994651d76342d..7a77e8eda18b2 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -798,14 +798,15 @@
   bytes. Default value: 0, i.e. split iff the size is reduced. Note that on some
   architectures the size can increase after splitting.
 
-- `--match-profile-with-function-hash`
-
-  Turns on matching functions with exact hash
-
 - `--stale-matching-max-func-size=<uint>`
 
   The maximum size of a function to consider for inference.
 
+- `--stale-matching-min-matched-block=<uint>`
+
+  Percentage threshold of matched basic blocks at which stale profile inference
+  is executed.
+
 - `--stale-threshold=<uint>`
 
   Maximum percentage of stale functions to tolerate (default: 100)
@@ -1165,4 +1166,4 @@
 
 - `--print-options`
 
-  Print non-default options after command line parsing
+  Print non-default options after command line parsing
\ No newline at end of file



More information about the lldb-commits mailing list