[clang] [lldb] [llvm] [BOLT] Match functions with name similarity (PR #95884)
Shaw Young via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 3 11:37:29 PDT 2024
https://github.com/shawbyoung updated https://github.com/llvm/llvm-project/pull/95884
>From fab60ab1f26be1799f05d1e2b02cc6b89093b670 Mon Sep 17 00:00:00 2001
From: Sayhaan Siddiqui <sayhaan at meta.com>
Date: Mon, 17 Jun 2024 23:14:07 -0700
Subject: [PATCH 1/9] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.4
[skip ci]
---
bolt/include/bolt/Rewrite/DWARFRewriter.h | 4 +-
bolt/lib/Core/BinaryEmitter.cpp | 1 +
bolt/lib/Rewrite/DWARFRewriter.cpp | 61 ++---
clang/include/clang/Driver/Options.td | 4 +
clang/lib/Driver/ToolChains/Gnu.cpp | 29 +++
cross-project-tests/lit.cfg.py | 14 +-
cross-project-tests/lit.site.cfg.py.in | 4 +
lldb/test/API/lit.cfg.py | 5 +
lldb/test/API/lit.site.cfg.py.in | 8 +
lldb/test/Shell/helper/toolchain.py | 5 +
lldb/test/Shell/lit.site.cfg.py.in | 9 +
llvm/CMakeLists.txt | 4 +
llvm/include/llvm/MC/MCFragment.h | 22 ++
llvm/include/llvm/MC/MCObjectStreamer.h | 2 +
llvm/include/llvm/MC/MCStreamer.h | 6 +
llvm/lib/MC/MCAssembler.cpp | 118 ++++++----
llvm/lib/MC/MCExpr.cpp | 10 +-
llvm/lib/MC/MCFragment.cpp | 12 +
llvm/lib/MC/MCObjectStreamer.cpp | 5 +
llvm/lib/MC/MCStreamer.cpp | 2 +
.../lib/Target/X86/AsmParser/X86AsmParser.cpp | 24 ++
llvm/test/MC/X86/directive-avoid_end_align.s | 208 ++++++++++++++++++
22 files changed, 483 insertions(+), 74 deletions(-)
create mode 100644 llvm/test/MC/X86/directive-avoid_end_align.s
diff --git a/bolt/include/bolt/Rewrite/DWARFRewriter.h b/bolt/include/bolt/Rewrite/DWARFRewriter.h
index 8dec32de9008e..3cc9d823c815b 100644
--- a/bolt/include/bolt/Rewrite/DWARFRewriter.h
+++ b/bolt/include/bolt/Rewrite/DWARFRewriter.h
@@ -12,6 +12,7 @@
#include "bolt/Core/DIEBuilder.h"
#include "bolt/Core/DebugData.h"
#include "bolt/Core/DebugNames.h"
+#include "bolt/Core/GDBIndex.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/DIE.h"
#include "llvm/DWP/DWP.h"
@@ -131,7 +132,8 @@ class DWARFRewriter {
makeFinalLocListsSection(DWARFVersion Version);
/// Finalize type sections in the main binary.
- CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer);
+ CUOffsetMap finalizeTypeSections(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
+ GDBIndex &GDBIndexSection);
/// Process and write out CUs that are passsed in.
void finalizeCompileUnits(DIEBuilder &DIEBlder, DIEStreamer &Streamer,
diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp
index 5793963f9b80d..c231fffa0d5ff 100644
--- a/bolt/lib/Core/BinaryEmitter.cpp
+++ b/bolt/lib/Core/BinaryEmitter.cpp
@@ -487,6 +487,7 @@ void BinaryEmitter::emitFunctionBody(BinaryFunction &BF, FunctionFragment &FF,
// This assumes the second instruction in the macro-op pair will get
// assigned to its own MCRelaxableFragment. Since all JCC instructions
// are relaxable, we should be safe.
+ Streamer.emitNeverAlignCodeAtEnd(/*Alignment to avoid=*/64, *BC.STI);
}
if (!EmitCodeOnly) {
diff --git a/bolt/lib/Rewrite/DWARFRewriter.cpp b/bolt/lib/Rewrite/DWARFRewriter.cpp
index 8814ebbd10aa5..7b62999dfb2b6 100644
--- a/bolt/lib/Rewrite/DWARFRewriter.cpp
+++ b/bolt/lib/Rewrite/DWARFRewriter.cpp
@@ -185,6 +185,7 @@ namespace bolt {
class DIEStreamer : public DwarfStreamer {
DIEBuilder *DIEBldr;
DWARFRewriter &Rewriter;
+ GDBIndex &GDBIndexSection;
private:
/// Emit the compilation unit header for \p Unit in the debug_info
@@ -247,7 +248,7 @@ class DIEStreamer : public DwarfStreamer {
const uint64_t TypeSignature = cast<DWARFTypeUnit>(Unit).getTypeHash();
DIE *TypeDIE = DIEBldr->getTypeDIE(Unit);
const DIEBuilder::DWARFUnitInfo &UI = DIEBldr->getUnitInfoByDwarfUnit(Unit);
- Rewriter.addGDBTypeUnitEntry(
+ GDBIndexSection.addGDBTypeUnitEntry(
{UI.UnitOffset, TypeSignature, TypeDIE->getOffset()});
if (Unit.getVersion() < 5) {
// Switch the section to .debug_types section.
@@ -279,11 +280,12 @@ class DIEStreamer : public DwarfStreamer {
public:
DIEStreamer(DIEBuilder *DIEBldr, DWARFRewriter &Rewriter,
+ GDBIndex &GDBIndexSection,
DWARFLinkerBase::OutputFileType OutFileType,
raw_pwrite_stream &OutFile,
DWARFLinkerBase::MessageHandlerTy Warning)
: DwarfStreamer(OutFileType, OutFile, Warning), DIEBldr(DIEBldr),
- Rewriter(Rewriter){};
+ Rewriter(Rewriter), GDBIndexSection(GDBIndexSection) {};
using DwarfStreamer::emitCompileUnitHeader;
@@ -326,12 +328,11 @@ static cl::opt<bool> KeepARanges(
"keep or generate .debug_aranges section if .gdb_index is written"),
cl::Hidden, cl::cat(BoltCategory));
-static cl::opt<bool>
-DeterministicDebugInfo("deterministic-debuginfo",
- cl::desc("disables parallel execution of tasks that may produce "
- "nondeterministic debug info"),
- cl::init(true),
- cl::cat(BoltCategory));
+static cl::opt<bool> DeterministicDebugInfo(
+ "deterministic-debuginfo",
+ cl::desc("disables parallel execution of tasks that may produce "
+ "nondeterministic debug info"),
+ cl::init(true), cl::cat(BoltCategory));
static cl::opt<std::string> DwarfOutputPath(
"dwarf-output-path",
@@ -460,10 +461,11 @@ static std::optional<uint64_t> getAsAddress(const DWARFUnit &DU,
static std::unique_ptr<DIEStreamer>
createDIEStreamer(const Triple &TheTriple, raw_pwrite_stream &OutFile,
StringRef Swift5ReflectionSegmentName, DIEBuilder &DIEBldr,
- DWARFRewriter &Rewriter) {
+ DWARFRewriter &Rewriter, GDBIndex &GDBIndexSection) {
std::unique_ptr<DIEStreamer> Streamer = std::make_unique<DIEStreamer>(
- &DIEBldr, Rewriter, DWARFLinkerBase::OutputFileType::Object, OutFile,
+ &DIEBldr, Rewriter, GDBIndexSection,
+ DWARFLinkerBase::OutputFileType::Object, OutFile,
[&](const Twine &Warning, StringRef Context, const DWARFDie *) {});
Error Err = Streamer->init(TheTriple, Swift5ReflectionSegmentName);
if (Err)
@@ -484,13 +486,12 @@ emitUnit(DIEBuilder &DIEBldr, DIEStreamer &Streamer, DWARFUnit &Unit) {
return {U.UnitOffset, U.UnitLength, TypeHash};
}
-static void emitDWOBuilder(const std::string &DWOName,
- DIEBuilder &DWODIEBuilder, DWARFRewriter &Rewriter,
- DWARFUnit &SplitCU, DWARFUnit &CU,
- DWARFRewriter::DWPState &State,
- DebugLocWriter &LocWriter,
- DebugStrOffsetsWriter &StrOffstsWriter,
- DebugStrWriter &StrWriter) {
+static void
+emitDWOBuilder(const std::string &DWOName, DIEBuilder &DWODIEBuilder,
+ DWARFRewriter &Rewriter, DWARFUnit &SplitCU, DWARFUnit &CU,
+ DWARFRewriter::DWPState &State, DebugLocWriter &LocWriter,
+ DebugStrOffsetsWriter &StrOffstsWriter,
+ DebugStrWriter &StrWriter, GDBIndex &GDBIndexSection) {
// Populate debug_info and debug_abbrev for current dwo into StringRef.
DWODIEBuilder.generateAbbrevs();
DWODIEBuilder.finish();
@@ -500,8 +501,9 @@ static void emitDWOBuilder(const std::string &DWOName,
std::make_shared<raw_svector_ostream>(OutBuffer);
const object::ObjectFile *File = SplitCU.getContext().getDWARFObj().getFile();
auto TheTriple = std::make_unique<Triple>(File->makeTriple());
- std::unique_ptr<DIEStreamer> Streamer = createDIEStreamer(
- *TheTriple, *ObjOS, "DwoStreamerInitAug2", DWODIEBuilder, Rewriter);
+ std::unique_ptr<DIEStreamer> Streamer =
+ createDIEStreamer(*TheTriple, *ObjOS, "DwoStreamerInitAug2",
+ DWODIEBuilder, Rewriter, GDBIndexSection);
DWARFRewriter::UnitMetaVectorType TUMetaVector;
DWARFRewriter::UnitMeta CUMI = {0, 0, 0};
if (SplitCU.getContext().getMaxDWOVersion() >= 5) {
@@ -652,6 +654,7 @@ void DWARFRewriter::updateDebugInfo() {
DWARF5AcceleratorTable DebugNamesTable(opts::CreateDebugNames, BC,
*StrWriter);
+ GDBIndex GDBIndexSection(BC);
DWPState State;
if (opts::WriteDWP)
initDWPState(State);
@@ -704,7 +707,8 @@ void DWARFRewriter::updateDebugInfo() {
TempRangesSectionWriter->finalizeSection();
emitDWOBuilder(DWOName, DWODIEBuilder, *this, **SplitCU, *Unit, State,
- DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter);
+ DebugLocDWoWriter, DWOStrOffstsWriter, DWOStrWriter,
+ GDBIndexSection);
}
if (Unit->getVersion() >= 5) {
@@ -729,9 +733,10 @@ void DWARFRewriter::updateDebugInfo() {
std::make_unique<raw_svector_ostream>(OutBuffer);
const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile();
auto TheTriple = std::make_unique<Triple>(File->makeTriple());
- std::unique_ptr<DIEStreamer> Streamer =
- createDIEStreamer(*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this);
- CUOffsetMap OffsetMap = finalizeTypeSections(DIEBlder, *Streamer);
+ std::unique_ptr<DIEStreamer> Streamer = createDIEStreamer(
+ *TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this, GDBIndexSection);
+ CUOffsetMap OffsetMap =
+ finalizeTypeSections(DIEBlder, *Streamer, GDBIndexSection);
const bool SingleThreadedMode =
opts::NoThreads || opts::DeterministicDebugInfo;
@@ -761,7 +766,8 @@ void DWARFRewriter::updateDebugInfo() {
finalizeDebugSections(DIEBlder, DebugNamesTable, *Streamer, *ObjOS,
OffsetMap);
- updateGdbIndexSection(OffsetMap, CUIndex);
+ GDBIndexSection.updateGdbIndexSection(OffsetMap, CUIndex,
+ *ARangesSectionWriter);
}
void DWARFRewriter::updateUnitDebugInfo(
@@ -1429,7 +1435,8 @@ void DWARFRewriter::updateLineTableOffsets(const MCAsmLayout &Layout) {
}
CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder,
- DIEStreamer &Streamer) {
+ DIEStreamer &Streamer,
+ GDBIndex &GDBIndexSection) {
// update TypeUnit DW_AT_stmt_list with new .debug_line information.
auto updateLineTable = [&](const DWARFUnit &Unit) -> void {
DIE *UnitDIE = DIEBlder.getUnitDIEbyUnit(Unit);
@@ -1449,8 +1456,8 @@ CUOffsetMap DWARFRewriter::finalizeTypeSections(DIEBuilder &DIEBlder,
std::make_shared<raw_svector_ostream>(OutBuffer);
const object::ObjectFile *File = BC.DwCtx->getDWARFObj().getFile();
auto TheTriple = std::make_unique<Triple>(File->makeTriple());
- std::unique_ptr<DIEStreamer> TypeStreamer =
- createDIEStreamer(*TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this);
+ std::unique_ptr<DIEStreamer> TypeStreamer = createDIEStreamer(
+ *TheTriple, *ObjOS, "TypeStreamer", DIEBlder, *this, GDBIndexSection);
// generate debug_info and CUMap
CUOffsetMap CUMap;
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index d44faa55c456f..63bb86717bb14 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5483,6 +5483,10 @@ def pg : Flag<["-"], "pg">, HelpText<"Enable mcount instrumentation">,
MarshallingInfoFlag<CodeGenOpts<"InstrumentForProfiling">>;
def pipe : Flag<["-", "--"], "pipe">,
HelpText<"Use pipes between commands, when possible">;
+// Facebook T92898286
+def post_link_optimize : Flag<["--"], "post-link-optimize">,
+ HelpText<"Apply post-link optimizations using BOLT">;
+// End Facebook T92898286
def prebind__all__twolevel__modules : Flag<["-"], "prebind_all_twolevel_modules">;
def prebind : Flag<["-"], "prebind">;
def preload : Flag<["-"], "preload">;
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index b141e5f2adfab..f7611af5763ab 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -672,12 +672,41 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA,
}
}
+ // Facebook T92898286
+ if (Args.hasArg(options::OPT_post_link_optimize))
+ CmdArgs.push_back("-q");
+ // End Facebook T92898286
+
Args.AddAllArgs(CmdArgs, options::OPT_T);
const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath());
C.addCommand(std::make_unique<Command>(JA, *this,
ResponseFileSupport::AtFileCurCP(),
Exec, CmdArgs, Inputs, Output));
+ // Facebook T92898286
+ if (!Args.hasArg(options::OPT_post_link_optimize) || !Output.isFilename())
+ return;
+
+ const char *MvExec = Args.MakeArgString(ToolChain.GetProgramPath("mv"));
+ ArgStringList MoveCmdArgs;
+ MoveCmdArgs.push_back(Output.getFilename());
+ const char *PreBoltBin =
+ Args.MakeArgString(Twine(Output.getFilename()) + ".pre-bolt");
+ MoveCmdArgs.push_back(PreBoltBin);
+ C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+ MvExec, MoveCmdArgs, std::nullopt));
+
+ ArgStringList BoltCmdArgs;
+ const char *BoltExec =
+ Args.MakeArgString(ToolChain.GetProgramPath("llvm-bolt"));
+ BoltCmdArgs.push_back(PreBoltBin);
+ BoltCmdArgs.push_back("-reorder-blocks=reverse");
+ BoltCmdArgs.push_back("-update-debug-sections");
+ BoltCmdArgs.push_back("-o");
+ BoltCmdArgs.push_back(Output.getFilename());
+ C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::None(),
+ BoltExec, BoltCmdArgs, std::nullopt));
+ // End Facebook T92898286
}
void tools::gnutools::Assembler::ConstructJob(Compilation &C,
diff --git a/cross-project-tests/lit.cfg.py b/cross-project-tests/lit.cfg.py
index 774c4eaf4d976..619634578dfe6 100644
--- a/cross-project-tests/lit.cfg.py
+++ b/cross-project-tests/lit.cfg.py
@@ -84,7 +84,13 @@ def get_required_attr(config, attr_name):
# use_clang() and use_lld() respectively, so set them to "", if needed.
if not hasattr(config, "clang_src_dir"):
config.clang_src_dir = ""
-llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects))
+# Facebook T92898286
+should_test_bolt = get_required_attr(config, "llvm_test_bolt")
+if should_test_bolt:
+ llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects), additional_flags=["--post-link-optimize"])
+else:
+ llvm_config.use_clang(required=("clang" in config.llvm_enabled_projects))
+# End Facebook T92898286
if not hasattr(config, "lld_src_dir"):
config.lld_src_dir = ""
@@ -293,3 +299,9 @@ def get_clang_default_dwarf_version_string(triple):
# Allow 'REQUIRES: XXX-registered-target' in tests.
for arch in config.targets_to_build:
config.available_features.add(arch.lower() + "-registered-target")
+
+# Facebook T92898286
+# Ensure the user's PYTHONPATH is included.
+if "PYTHONPATH" in os.environ:
+ config.environment["PYTHONPATH"] = os.environ["PYTHONPATH"]
+# End Facebook T92898286
diff --git a/cross-project-tests/lit.site.cfg.py.in b/cross-project-tests/lit.site.cfg.py.in
index 39458dfc79afd..2d53cd377f033 100644
--- a/cross-project-tests/lit.site.cfg.py.in
+++ b/cross-project-tests/lit.site.cfg.py.in
@@ -21,6 +21,10 @@ config.mlir_src_root = "@MLIR_SOURCE_DIR@"
config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
import lit.llvm
lit.llvm.initialize(lit_config, config)
diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py
index d934349fe3ca3..d4a62c51458cc 100644
--- a/lldb/test/API/lit.cfg.py
+++ b/lldb/test/API/lit.cfg.py
@@ -248,6 +248,11 @@ def delete_module_cache(path):
if is_configured("lldb_framework_dir"):
dotest_cmd += ["--framework", config.lldb_framework_dir]
+# Facebook T92898286
+if is_configured("llvm_test_bolt"):
+ dotest_cmd += ["-E", '"--post-link-optimize"']
+# End Facebook T92898286
+
if (
"lldb-repro-capture" in config.available_features
or "lldb-repro-replay" in config.available_features
diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in
index 8b2d09ae41cd2..602f45759e48f 100644
--- a/lldb/test/API/lit.site.cfg.py.in
+++ b/lldb/test/API/lit.site.cfg.py.in
@@ -1,5 +1,9 @@
@LIT_SITE_CFG_IN_HEADER@
+#Facebook T92898286
+import lit.util
+#End Facebook T92898286
+
config.llvm_src_root = "@LLVM_SOURCE_DIR@"
config.llvm_obj_root = "@LLVM_BINARY_DIR@"
config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
@@ -39,6 +43,10 @@ config.libcxx_include_target_dir = "@LIBCXX_GENERATED_INCLUDE_TARGET_DIR@"
config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api")
config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api")
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
# Plugins
lldb_build_intel_pt = '@LLDB_BUILD_INTEL_PT@'
if lldb_build_intel_pt == '1':
diff --git a/lldb/test/Shell/helper/toolchain.py b/lldb/test/Shell/helper/toolchain.py
index 255955fc70d8c..7b7be06643166 100644
--- a/lldb/test/Shell/helper/toolchain.py
+++ b/lldb/test/Shell/helper/toolchain.py
@@ -165,6 +165,11 @@ def use_support_substitutions(config):
if config.cmake_sysroot:
host_flags += ["--sysroot={}".format(config.cmake_sysroot)]
+ # Facebook T92898286
+ if config.llvm_test_bolt:
+ host_flags += ["--post-link-optimize"]
+ # End Facebook T92898286
+
host_flags = " ".join(host_flags)
config.substitutions.append(("%clang_host", "%clang " + host_flags))
config.substitutions.append(("%clangxx_host", "%clangxx " + host_flags))
diff --git a/lldb/test/Shell/lit.site.cfg.py.in b/lldb/test/Shell/lit.site.cfg.py.in
index b69e7bce1bc0b..fe8323734b7db 100644
--- a/lldb/test/Shell/lit.site.cfg.py.in
+++ b/lldb/test/Shell/lit.site.cfg.py.in
@@ -1,5 +1,10 @@
@LIT_SITE_CFG_IN_HEADER@
+#Facebook T92898286
+import lit.util
+#End Facebook T92898286
+
+
config.llvm_src_root = "@LLVM_SOURCE_DIR@"
config.llvm_obj_root = "@LLVM_BINARY_DIR@"
config.llvm_tools_dir = lit_config.substitute("@LLVM_TOOLS_DIR@")
@@ -31,6 +36,10 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-shell")
config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-shell")
+# Facebook T92898286
+config.llvm_test_bolt = lit.util.pythonize_bool("@LLVM_TEST_BOLT@")
+# End Facebook T92898286
+
import lit.llvm
lit.llvm.initialize(lit_config, config)
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 3208147101c0d..ecd36d2564e4f 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -709,6 +709,10 @@ set(LLVM_LIB_FUZZING_ENGINE "" CACHE PATH
option(LLVM_USE_SPLIT_DWARF
"Use -gsplit-dwarf when compiling llvm and --gdb-index when linking." OFF)
+# Facebook T92898286
+option(LLVM_TEST_BOLT "Enable BOLT testing in non-BOLT tests that use clang" OFF)
+# End Facebook T92898286
+
# Define an option controlling whether we should build for 32-bit on 64-bit
# platforms, where supported.
if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT (WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "AIX"))
diff --git a/llvm/include/llvm/MC/MCFragment.h b/llvm/include/llvm/MC/MCFragment.h
index a9b19dc56f16a..256d98423e030 100644
--- a/llvm/include/llvm/MC/MCFragment.h
+++ b/llvm/include/llvm/MC/MCFragment.h
@@ -33,6 +33,7 @@ class MCFragment : public ilist_node_with_parent<MCFragment, MCSection> {
public:
enum FragmentType : uint8_t {
FT_Align,
+ FT_NeverAlign,
FT_Data,
FT_CompactEncodedInst,
FT_Fill,
@@ -344,6 +345,27 @@ class MCAlignFragment : public MCFragment {
}
};
+class MCNeverAlignFragment : public MCFragment {
+ /// The alignment the end of the next fragment should avoid.
+ unsigned Alignment;
+
+ /// When emitting Nops some subtargets have specific nop encodings.
+ const MCSubtargetInfo &STI;
+
+public:
+ MCNeverAlignFragment(unsigned Alignment, const MCSubtargetInfo &STI,
+ MCSection *Sec = nullptr)
+ : MCFragment(FT_NeverAlign, false, Sec), Alignment(Alignment), STI(STI) {}
+
+ unsigned getAlignment() const { return Alignment; }
+
+ const MCSubtargetInfo &getSubtargetInfo() const { return STI; }
+
+ static bool classof(const MCFragment *F) {
+ return F->getKind() == MCFragment::FT_NeverAlign;
+ }
+};
+
class MCFillFragment : public MCFragment {
uint8_t ValueSize;
/// Value to use for filling bytes.
diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index e212d54613980..c7d760721e369 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -157,6 +157,8 @@ class MCObjectStreamer : public MCStreamer {
unsigned MaxBytesToEmit = 0) override;
void emitCodeAlignment(Align ByteAlignment, const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit = 0) override;
+ void emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
+ const MCSubtargetInfo &STI) override;
void emitValueToOffset(const MCExpr *Offset, unsigned char Value,
SMLoc Loc) override;
void emitDwarfLocDirective(unsigned FileNo, unsigned Line, unsigned Column,
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index b7468cf70a664..dd813192d9ca0 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -887,6 +887,12 @@ class MCStreamer {
virtual void emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit = 0);
+ /// If the end of the fragment following this NeverAlign fragment ever gets
+ /// aligned to \p ByteAlignment, this fragment emits a single nop before the
+ /// following fragment to break this end-alignment.
+ virtual void emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
+ const MCSubtargetInfo &STI);
+
/// Emit some number of copies of \p Value until the byte offset \p
/// Offset is reached.
///
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index ad30b5ce9e631..62baeb93ea7d0 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -298,6 +298,43 @@ bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, const MCFixup &Fixup,
return IsResolved;
}
+/// Check if the branch crosses the boundary.
+///
+/// \param StartAddr start address of the fused/unfused branch.
+/// \param Size size of the fused/unfused branch.
+/// \param BoundaryAlignment alignment requirement of the branch.
+/// \returns true if the branch cross the boundary.
+static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size,
+ Align BoundaryAlignment) {
+ uint64_t EndAddr = StartAddr + Size;
+ return (StartAddr >> Log2(BoundaryAlignment)) !=
+ ((EndAddr - 1) >> Log2(BoundaryAlignment));
+}
+
+/// Check if the branch is against the boundary.
+///
+/// \param StartAddr start address of the fused/unfused branch.
+/// \param Size size of the fused/unfused branch.
+/// \param BoundaryAlignment alignment requirement of the branch.
+/// \returns true if the branch is against the boundary.
+static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size,
+ Align BoundaryAlignment) {
+ uint64_t EndAddr = StartAddr + Size;
+ return (EndAddr & (BoundaryAlignment.value() - 1)) == 0;
+}
+
+/// Check if the branch needs padding.
+///
+/// \param StartAddr start address of the fused/unfused branch.
+/// \param Size size of the fused/unfused branch.
+/// \param BoundaryAlignment alignment requirement of the branch.
+/// \returns true if the branch needs padding.
+static bool needPadding(uint64_t StartAddr, uint64_t Size,
+ Align BoundaryAlignment) {
+ return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) ||
+ isAgainstBoundary(StartAddr, Size, BoundaryAlignment);
+}
+
uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
const MCFragment &F) const {
assert(getBackendPtr() && "Requires assembler backend");
@@ -358,6 +395,41 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
return Size;
}
+ case MCFragment::FT_NeverAlign: {
+ // Disclaimer: NeverAlign fragment size depends on the size of its immediate
+ // successor, but NeverAlign need not be a MCRelaxableFragment.
+ // NeverAlign fragment size is recomputed if the successor is relaxed:
+ // - If RelaxableFragment is relaxed, it gets invalidated by marking its
+ // predecessor as LastValidFragment.
+ // - This forces the assembler to call MCAsmLayout::layoutFragment on that
+ // relaxable fragment, which in turn will always ask the predecessor to
+ // compute its size (see "computeFragmentSize(prev)" in layoutFragment).
+ //
+ // In short, the simplest way to ensure that computeFragmentSize() is sane
+ // is to establish the following rule: it should never examine fragments
+ // after the current fragment in the section. If we logically need to
+ // examine any fragment after the current fragment, we need to do that using
+ // relaxation, inside MCAssembler::layoutSectionOnce.
+ const MCNeverAlignFragment &NAF = cast<MCNeverAlignFragment>(F);
+ const MCFragment *NF = F.getNextNode();
+ uint64_t Offset = Layout.getFragmentOffset(&NAF);
+ size_t NextFragSize = 0;
+ if (const auto *NextFrag = dyn_cast<MCRelaxableFragment>(NF)) {
+ NextFragSize = NextFrag->getContents().size();
+ } else if (const auto *NextFrag = dyn_cast<MCDataFragment>(NF)) {
+ NextFragSize = NextFrag->getContents().size();
+ } else {
+ llvm_unreachable("Didn't find the expected fragment after NeverAlign");
+ }
+ // Check if the next fragment ends at the alignment we want to avoid.
+ if (isAgainstBoundary(Offset, NextFragSize, Align(NAF.getAlignment()))) {
+ // Avoid this alignment by introducing minimum nop.
+ assert(getBackend().getMinimumNopSize() != NAF.getAlignment());
+ return getBackend().getMinimumNopSize();
+ }
+ return 0;
+ }
+
case MCFragment::FT_Org: {
const MCOrgFragment &OF = cast<MCOrgFragment>(F);
MCValue Value;
@@ -581,6 +653,15 @@ static void writeFragment(raw_ostream &OS, const MCAssembler &Asm,
break;
}
+ case MCFragment::FT_NeverAlign: {
+ const MCNeverAlignFragment &NAF = cast<MCNeverAlignFragment>(F);
+ if (!Asm.getBackend().writeNopData(OS, FragmentSize,
+ &NAF.getSubtargetInfo()))
+ report_fatal_error("unable to write nop sequence of " +
+ Twine(FragmentSize) + " bytes");
+ break;
+ }
+
case MCFragment::FT_Data:
++stats::EmittedDataFragments;
OS << cast<MCDataFragment>(F).getContents();
@@ -1052,43 +1133,6 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) {
return OldSize != LF.getContents().size();
}
-/// Check if the branch crosses the boundary.
-///
-/// \param StartAddr start address of the fused/unfused branch.
-/// \param Size size of the fused/unfused branch.
-/// \param BoundaryAlignment alignment requirement of the branch.
-/// \returns true if the branch cross the boundary.
-static bool mayCrossBoundary(uint64_t StartAddr, uint64_t Size,
- Align BoundaryAlignment) {
- uint64_t EndAddr = StartAddr + Size;
- return (StartAddr >> Log2(BoundaryAlignment)) !=
- ((EndAddr - 1) >> Log2(BoundaryAlignment));
-}
-
-/// Check if the branch is against the boundary.
-///
-/// \param StartAddr start address of the fused/unfused branch.
-/// \param Size size of the fused/unfused branch.
-/// \param BoundaryAlignment alignment requirement of the branch.
-/// \returns true if the branch is against the boundary.
-static bool isAgainstBoundary(uint64_t StartAddr, uint64_t Size,
- Align BoundaryAlignment) {
- uint64_t EndAddr = StartAddr + Size;
- return (EndAddr & (BoundaryAlignment.value() - 1)) == 0;
-}
-
-/// Check if the branch needs padding.
-///
-/// \param StartAddr start address of the fused/unfused branch.
-/// \param Size size of the fused/unfused branch.
-/// \param BoundaryAlignment alignment requirement of the branch.
-/// \returns true if the branch needs padding.
-static bool needPadding(uint64_t StartAddr, uint64_t Size,
- Align BoundaryAlignment) {
- return mayCrossBoundary(StartAddr, Size, BoundaryAlignment) ||
- isAgainstBoundary(StartAddr, Size, BoundaryAlignment);
-}
-
bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout,
MCBoundaryAlignFragment &BF) {
// BoundaryAlignFragment that doesn't need to align any fragment should not be
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index b065d03651c45..9add574b0e5b8 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -675,14 +675,8 @@ static void AttemptToFoldSymbolOffsetDifference(
if (FA == FB) {
Reverse = SA.getOffset() < SB.getOffset();
} else if (!isa<MCDummyFragment>(FA)) {
- // Testing FA < FB is slow. Use setLayoutOrder to speed up computation.
- // The formal layout order will be finalized in MCAssembler::layout.
- if (FA->getLayoutOrder() == 0 || FB->getLayoutOrder()== 0) {
- unsigned LayoutOrder = 0;
- for (MCFragment &F : *FA->getParent())
- F.setLayoutOrder(++LayoutOrder);
- }
- Reverse = FA->getLayoutOrder() < FB->getLayoutOrder();
+ Reverse = std::find_if(std::next(FA->getIterator()), SecA.end(),
+ [&](auto &I) { return &I == FB; }) != SecA.end();
}
uint64_t SAOffset = SA.getOffset(), SBOffset = SB.getOffset();
diff --git a/llvm/lib/MC/MCFragment.cpp b/llvm/lib/MC/MCFragment.cpp
index 7d0826802d0af..6e09caaab0957 100644
--- a/llvm/lib/MC/MCFragment.cpp
+++ b/llvm/lib/MC/MCFragment.cpp
@@ -274,6 +274,9 @@ void MCFragment::destroy() {
case FT_Align:
delete cast<MCAlignFragment>(this);
return;
+ case FT_NeverAlign:
+ delete cast<MCNeverAlignFragment>(this);
+ return;
case FT_Data:
delete cast<MCDataFragment>(this);
return;
@@ -342,6 +345,9 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
OS << "<";
switch (getKind()) {
case MCFragment::FT_Align: OS << "MCAlignFragment"; break;
+ case MCFragment::FT_NeverAlign:
+ OS << "MCNeverAlignFragment";
+ break;
case MCFragment::FT_Data: OS << "MCDataFragment"; break;
case MCFragment::FT_CompactEncodedInst:
OS << "MCCompactEncodedInstFragment"; break;
@@ -381,6 +387,12 @@ LLVM_DUMP_METHOD void MCFragment::dump() const {
<< " MaxBytesToEmit:" << AF->getMaxBytesToEmit() << ">";
break;
}
+ case MCFragment::FT_NeverAlign: {
+ const MCNeverAlignFragment *NAF = cast<MCNeverAlignFragment>(this);
+ OS << "\n ";
+ OS << " Alignment:" << NAF->getAlignment() << ">";
+ break;
+ }
case MCFragment::FT_Data: {
const auto *DF = cast<MCDataFragment>(this);
OS << "\n ";
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 0ccade91677a4..117475b7dd90b 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -658,6 +658,11 @@ void MCObjectStreamer::emitCodeAlignment(Align Alignment,
cast<MCAlignFragment>(getCurrentFragment())->setEmitNops(true, STI);
}
+void MCObjectStreamer::emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
+ const MCSubtargetInfo &STI) {
+ insert(new MCNeverAlignFragment(ByteAlignment, STI));
+}
+
void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
unsigned char Value,
SMLoc Loc) {
diff --git a/llvm/lib/MC/MCStreamer.cpp b/llvm/lib/MC/MCStreamer.cpp
index 199d865ea3496..a97cba6c89972 100644
--- a/llvm/lib/MC/MCStreamer.cpp
+++ b/llvm/lib/MC/MCStreamer.cpp
@@ -1235,6 +1235,8 @@ void MCStreamer::emitValueToAlignment(Align Alignment, int64_t Value,
unsigned MaxBytesToEmit) {}
void MCStreamer::emitCodeAlignment(Align Alignment, const MCSubtargetInfo *STI,
unsigned MaxBytesToEmit) {}
+void MCStreamer::emitNeverAlignCodeAtEnd(unsigned ByteAlignment,
+ const MCSubtargetInfo &STI) {}
void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value,
SMLoc Loc) {}
void MCStreamer::emitBundleAlignMode(Align Alignment) {}
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 6623106109316..6c6bd2cf31e86 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1153,6 +1153,7 @@ class X86AsmParser : public MCTargetAsmParser {
bool parseDirectiveArch();
bool parseDirectiveNops(SMLoc L);
bool parseDirectiveEven(SMLoc L);
+ bool parseDirectiveAvoidEndAlign(SMLoc L);
bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
/// CodeView FPO data directives.
@@ -4601,6 +4602,8 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
return false;
} else if (IDVal == ".nops")
return parseDirectiveNops(DirectiveID.getLoc());
+ else if (IDVal == ".avoid_end_align")
+ return parseDirectiveAvoidEndAlign(DirectiveID.getLoc());
else if (IDVal == ".even")
return parseDirectiveEven(DirectiveID.getLoc());
else if (IDVal == ".cv_fpo_proc")
@@ -4695,6 +4698,27 @@ bool X86AsmParser::parseDirectiveEven(SMLoc L) {
return false;
}
+/// Directive for NeverAlign fragment testing, not for general usage!
+/// parseDirectiveAvoidEndAlign
+/// ::= .avoid_end_align alignment
+bool X86AsmParser::parseDirectiveAvoidEndAlign(SMLoc L) {
+ int64_t Alignment = 0;
+ SMLoc AlignmentLoc;
+ AlignmentLoc = getTok().getLoc();
+ if (getParser().checkForValidSection() ||
+ getParser().parseAbsoluteExpression(Alignment))
+ return true;
+
+ if (getParser().parseEOL("unexpected token in directive"))
+ return true;
+
+ if (Alignment <= 0)
+ return Error(AlignmentLoc, "expected a positive alignment");
+
+ getParser().getStreamer().emitNeverAlignCodeAtEnd(Alignment, getSTI());
+ return false;
+}
+
/// ParseDirectiveCode
/// ::= .code16 | .code32 | .code64
bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
diff --git a/llvm/test/MC/X86/directive-avoid_end_align.s b/llvm/test/MC/X86/directive-avoid_end_align.s
new file mode 100644
index 0000000000000..1d748401edc12
--- /dev/null
+++ b/llvm/test/MC/X86/directive-avoid_end_align.s
@@ -0,0 +1,208 @@
+# RUN: llvm-mc -triple=x86_64 -filetype=obj %s | llvm-objdump --no-show-raw-insn -d - | FileCheck %s
+# RUN: not llvm-mc -triple=x86_64 --defsym ERR=1 %s -o /dev/null 2>&1 | FileCheck %s --check-prefix=ERR
+
+# avoid_end_align has no effect since test doesn't end at alignment boundary:
+.avoid_end_align 64
+# CHECK-NOT: nop
+ testl %eax, %eax
+# CHECK: testl %eax, %eax
+ je .LBB0
+
+.fill 58, 1, 0x00
+# NeverAlign followed by MCDataFragment:
+# avoid_end_align inserts nop because `test` would end at alignment boundary:
+.avoid_end_align 64
+# CHECK: 3e: nop
+ testl %eax, %eax
+# CHECK-NEXT: 3f: testl %eax, %eax
+ je .LBB0
+# CHECK-NEXT: 41: je
+.LBB0:
+ retq
+
+.p2align 6
+.L0:
+.nops 57
+ int3
+# NeverAlign followed by RelaxableFragment:
+.avoid_end_align 64
+# CHECK: ba: nop
+ cmpl $(.L1-.L0), %eax
+# CHECK-NEXT: bb: cmpl
+ je .L0
+# CHECK-NEXT: c1: je
+.nops 65
+.L1:
+
+###############################################################################
+# Experiment A:
+# Check that NeverAlign doesn't introduce infinite loops in layout.
+# Control:
+# 1. NeverAlign fragment is not added,
+# 2. Short formats of cmp and jcc are used (3 and 2 bytes respectively),
+# 3. cmp and jcc are placed such that to be split by 64B alignment boundary.
+# 4. jcc would be relaxed to a longer format if at least one byte is added
+# between .L10 and je itself, e.g. by adding a NeverAlign padding byte,
+# or relaxing cmp instruction.
+# 5. cmp would be relaxed to a longer format if at least one byte is added
+# between .L11 and .L12, e.g. due to relaxing jcc instruction.
+.p2align 6
+# CHECK: 140: int3
+.fill 2, 1, 0xcc
+.L10:
+.nops 122
+ int3
+# CHECK: 1bc: int3
+# no avoid_end_align here
+# CHECK-NOT: nop
+ cmp $(.L12-.L11), %eax
+# CHECK: 1bd: cmpl
+.L11:
+ je .L10
+# CHECK-NEXT: 1c0: je
+.nops 125
+.L12:
+
+# Experiment:
+# Same setup as control, except NeverAlign fragment is added before cmp.
+# Expected effect:
+# 1. NeverAlign pads cmp+jcc by one byte since cmp and jcc are split by a 64B
+# alignment boundary,
+# 2. This extra byte forces jcc relaxation to a longer format (Control rule #4),
+# 3. This results in an cmp relaxation (Control rule #5),
+# 4. Which in turn makes NeverAlign fragment unnecessary as cmp and jcc
+# are no longer split by an alignment boundary (cmp crosses the boundary).
+# 5. NeverAlign padding is removed.
+# 6. cmp and jcc instruction remain in relaxed form.
+# 7. Relaxation converges, layout succeeds.
+.p2align 6
+# CHECK: 240: int3
+.fill 2, 1, 0xcc
+.L20:
+.nops 122
+ int3
+# CHECK: 2bc: int3
+.avoid_end_align 64
+# CHECK-NOT: nop
+ cmp $(.L22-.L21), %eax
+# CHECK-NEXT: 2bd: cmpl
+.L21:
+ je .L20
+# CHECK-NEXT: 2c3: je
+.nops 125
+.L22:
+
+###############################################################################
+# Experiment B: similar to exp A, but we check that once NeverAlign padding is
+# removed from the layout (exp A, experiment step 5), the increased distance
+# between the symbols L33 and L34 triggers the relaxation of instruction at
+# label L32.
+#
+# Control 1: using a one-byte instruction at L33 (site of NeverAlign) leads to
+# steps 2-3 of exp A, experiment:
+# 2. This extra byte forces jcc relaxation to a longer format (Control rule #4),
+# 3. This results in an cmp relaxation (Control rule #5),
+# => short cmp under L32
+.p2align 6
+# CHECK: 380: int3
+.fill 2, 1, 0xcc
+.L30:
+.nops 122
+ int3
+# CHECK: 3fc: int3
+ hlt
+#.avoid_end_align 64
+.L33:
+ cmp $(.L32-.L31), %eax
+# CHECK: 3fe: cmpl
+.L31:
+ je .L30
+# CHECK-NEXT: 404: je
+.nops 114
+.p2align 1
+ int3
+ int3
+# CHECK: 47c: int3
+.L34:
+.nops 9
+.L32:
+ cmp $(.L33-.L34), %eax
+# CHECK: 487: cmp
+# note that the size of cmp is 48a-487 == 3 bytes (distance is exactly -128)
+ int3
+# CHECK-NEXT: 48a: int3
+
+# Control 2: leaving out a byte at L43 (site of NeverAlign), plus
+# relaxed jcc and cmp leads to a relaxed cmp under L42 (-129 as cmp's immediate)
+.p2align 6
+# CHECK: 4c0: int3
+.fill 2, 1, 0xcc
+.L40:
+.nops 122
+ int3
+# CHECK: 53c: int3
+# int3
+#.avoid_end_align 64
+.L43:
+ cmp $(.L42-.L41+0x100), %eax
+# CHECK: 53d: cmpl
+.L41:
+ je .L40+0x100
+# CHECK-NEXT: 543: je
+.nops 114
+.p2align 1
+ int3
+ int3
+# CHECK: 5bc: int3
+.L44:
+.nops 9
+.L42:
+ cmp $(.L43-.L44), %eax
+# CHECK: 5c7: cmp
+# note that the size of cmp is 5cd-5c7 == 6 bytes (distance is exactly -129)
+ int3
+# CHECK-NEXT: 5cd: int3
+
+# Experiment
+# Checking if removing NeverAlign padding at L53 as a result of alignment and
+# relaxation of cmp and jcc following it (see exp A), thus reproducing the case
+# in Control 2 (getting a relaxed cmp under L52), is handled correctly.
+.p2align 6
+# CHECK: 600: int3
+.fill 2, 1, 0xcc
+.L50:
+.nops 122
+ int3
+# CHECK: 67c: int3
+.avoid_end_align 64
+.L53:
+# CHECK-NOT: nop
+ cmp $(.L52-.L51), %eax
+# CHECK-NEXT: 67d: cmpl
+.L51:
+ je .L50
+# CHECK-NEXT: 683: je
+.nops 114
+.p2align 1
+ int3
+ int3
+# CHECK: 6fc: int3
+.L54:
+.nops 9
+.L52:
+ cmp $(.L53-.L54), %eax
+# CHECK: 707: cmp
+# note that the size of cmp is 70d-707 == 6 bytes (distance is exactly -129)
+ int3
+# CHECK-NEXT: 70d: int3
+
+.ifdef ERR
+# ERR: {{.*}}.s:[[#@LINE+1]]:17: error: unknown token in expression
+.avoid_end_align
+# ERR: {{.*}}.s:[[#@LINE+1]]:18: error: expected absolute expression
+.avoid_end_align x
+# ERR: {{.*}}.s:[[#@LINE+1]]:18: error: expected a positive alignment
+.avoid_end_align 0
+# ERR: {{.*}}.s:[[#@LINE+1]]:20: error: unexpected token in directive
+.avoid_end_align 64, 0
+.endif
>From 34652b2eebc62218c50a23509ce99937385c30e6 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 23:42:00 -0700
Subject: [PATCH 2/9] spr amend
Created using spr 1.3.4
---
bolt/lib/Profile/YAMLProfileReader.cpp | 73 ++++++++++++++++++++------
1 file changed, 56 insertions(+), 17 deletions(-)
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 66cabc236f4b2..c9f6d88f0b13a 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -424,36 +424,75 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
// Uses name similarity to match functions that were not matched by name.
uint64_t MatchedWithDemangledName = 0;
- if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
-
- std::unordered_map<std::string, BinaryFunction *> NameToBinaryFunction;
- NameToBinaryFunction.reserve(BC.getBinaryFunctions().size());
- for (auto &[_, BF] : BC.getBinaryFunctions()) {
+ if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
+ auto DemangleName = [&](const char* String) {
int Status = 0;
- char *DemangledName = abi::__cxa_demangle(BF.getOneName().str().c_str(),
+ char *DemangledName = abi::__cxa_demangle(String,
nullptr, nullptr, &Status);
- if (Status == 0)
- NameToBinaryFunction[std::string(DemangledName)] = &BF;
+ return Status == 0 ? new std::string(DemangledName) : nullptr;
+ };
+
+ auto DeriveNameSpace = [&](std::string DemangledName) {
+ size_t LParen = std::string(DemangledName).find("(");
+ std::string FunctionName = std::string(DemangledName).substr(0, LParen);
+ size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
+ return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator);
+ };
+
+ std::unordered_map<std::string, std::vector<BinaryFunction *>> NamespaceToBFs;
+ NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
+
+ for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
+ std::string* DemangledName = DemangleName(BF->getOneName().str().c_str());
+ if (!DemangledName)
+ continue;
+ std::string Namespace = DeriveNameSpace(*DemangledName);
+ auto It = NamespaceToBFs.find(Namespace);
+ if (It == NamespaceToBFs.end())
+ NamespaceToBFs[Namespace] = {BF};
+ else
+ It->second.push_back(BF);
}
for (auto YamlBF : YamlBP.Functions) {
if (YamlBF.Used)
continue;
- int Status = 0;
- char *DemangledName =
- abi::__cxa_demangle(YamlBF.Name.c_str(), nullptr, nullptr, &Status);
- if (Status != 0)
+ std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
+ if (!YamlBFDemangledName)
continue;
- auto It = NameToBinaryFunction.find(DemangledName);
- if (It == NameToBinaryFunction.end())
+ std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
+ auto It = NamespaceToBFs.find(Namespace);
+ if (It == NamespaceToBFs.end())
continue;
- BinaryFunction *BF = It->second;
- matchProfileToFunction(YamlBF, *BF);
- ++MatchedWithDemangledName;
+ std::vector<BinaryFunction *> BFs = It->second;
+
+ unsigned MinEditDistance = UINT_MAX;
+ BinaryFunction *ClosestNameBF = nullptr;
+
+ for (BinaryFunction *BF : BFs) {
+ if (ProfiledFunctions.count(BF))
+ continue;
+ std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str());
+ if (!BFDemangledName)
+ continue;
+ unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+ if (BFEditDistance < MinEditDistance) {
+ MinEditDistance = BFEditDistance;
+ ClosestNameBF = BF;
+ }
+ }
+
+ if (ClosestNameBF &&
+ MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+ matchProfileToFunction(YamlBF, *ClosestNameBF);
+ ++MatchedWithDemangledName;
+ }
}
}
+ outs() << MatchedWithDemangledName << ": functions matched by name similarity\n";
+
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
if (!YamlBF.Used && opts::Verbosity >= 1)
errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
>From 2d23bbd6b9ce4f0786ae8ceb39b1b008b4ca9c4d Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Thu, 20 Jun 2024 23:45:27 -0700
Subject: [PATCH 3/9] spr amend
Created using spr 1.3.4
---
bolt/lib/Profile/YAMLProfileReader.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index c9f6d88f0b13a..cf4a5393df8f4 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -491,8 +491,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
}
}
- outs() << MatchedWithDemangledName << ": functions matched by name similarity\n";
-
for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions)
if (!YamlBF.Used && opts::Verbosity >= 1)
errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
>From 9e6bb260197ca22219887c9158d1d19529301064 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 11:14:15 -0700
Subject: [PATCH 4/9] spr amend
Created using spr 1.3.4
---
bolt/lib/Profile/YAMLProfileReader.cpp | 50 +++++++++++++-------------
1 file changed, 24 insertions(+), 26 deletions(-)
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index cf4a5393df8f4..6aef9ea566858 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -11,13 +11,13 @@
#include "bolt/Core/BinaryFunction.h"
#include "bolt/Passes/MCF.h"
#include "bolt/Profile/ProfileYAMLMapping.h"
+#include "bolt/Utils/NameResolver.h"
#include "bolt/Utils/Utils.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/edit_distance.h"
+#include "llvm/Demangle/Demangle.h"
#include "llvm/Support/CommandLine.h"
-#include <cxxabi.h>
-
using namespace llvm;
namespace opts {
@@ -426,43 +426,40 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
uint64_t MatchedWithDemangledName = 0;
if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
- auto DemangleName = [&](const char* String) {
- int Status = 0;
- char *DemangledName = abi::__cxa_demangle(String,
- nullptr, nullptr, &Status);
- return Status == 0 ? new std::string(DemangledName) : nullptr;
+ auto DemangleName = [&](std::string &FunctionName) {
+ StringRef RestoredName = NameResolver::restore(FunctionName);
+ return demangle(RestoredName);
};
+ ItaniumPartialDemangler ItaniumPartialDemangler;
auto DeriveNameSpace = [&](std::string DemangledName) {
- size_t LParen = std::string(DemangledName).find("(");
- std::string FunctionName = std::string(DemangledName).substr(0, LParen);
- size_t ScopeResolutionOperator = std::string(FunctionName).rfind("::");
- return ScopeResolutionOperator == std::string::npos ? std::string("") : std::string(DemangledName).substr(0, ScopeResolutionOperator);
+ std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
+ size_t BufferSize = Buffer.size();
+ char *NameSpace = ItaniumPartialDemangler.getFunctionDeclContextName(
+ &Buffer[0], &BufferSize);
+ return NameSpace ? std::string(NameSpace) : std::string("");
};
- std::unordered_map<std::string, std::vector<BinaryFunction *>> NamespaceToBFs;
+ std::unordered_map<std::string, std::vector<BinaryFunction *>>
+ NamespaceToBFs;
+
NamespaceToBFs.reserve(BC.getBinaryFunctions().size());
for (BinaryFunction *BF : BC.getAllBinaryFunctions()) {
- std::string* DemangledName = DemangleName(BF->getOneName().str().c_str());
- if (!DemangledName)
- continue;
- std::string Namespace = DeriveNameSpace(*DemangledName);
+ std::string DemangledName = BF->getDemangledName();
+ std::string Namespace = DeriveNameSpace(DemangledName);
auto It = NamespaceToBFs.find(Namespace);
if (It == NamespaceToBFs.end())
NamespaceToBFs[Namespace] = {BF};
else
It->second.push_back(BF);
}
-
for (auto YamlBF : YamlBP.Functions) {
if (YamlBF.Used)
continue;
- std::string* YamlBFDemangledName = DemangleName(YamlBF.Name.c_str());
- if (!YamlBFDemangledName)
- continue;
- std::string Namespace = DeriveNameSpace(*YamlBFDemangledName);
- auto It = NamespaceToBFs.find(Namespace);
+ std::string YamlBFDemangledName = DemangleName(YamlBF.Name);
+ std::string YamlBFNamespace = DeriveNameSpace(YamlBFDemangledName);
+ auto It = NamespaceToBFs.find(YamlBFNamespace);
if (It == NamespaceToBFs.end())
continue;
std::vector<BinaryFunction *> BFs = It->second;
@@ -473,10 +470,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
for (BinaryFunction *BF : BFs) {
if (ProfiledFunctions.count(BF))
continue;
- std::string *BFDemangledName = DemangleName(BF->getOneName().str().c_str());
- if (!BFDemangledName)
+ if (BF->size() != YamlBF.NumBasicBlocks)
continue;
- unsigned BFEditDistance = StringRef(*BFDemangledName).edit_distance(*YamlBFDemangledName);
+ std::string BFDemangledName = BF->getDemangledName();
+ unsigned BFEditDistance =
+ StringRef(BFDemangledName).edit_distance(YamlBFDemangledName);
if (BFEditDistance < MinEditDistance) {
MinEditDistance = BFEditDistance;
ClosestNameBF = BF;
@@ -484,7 +482,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
}
if (ClosestNameBF &&
- MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+ MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
matchProfileToFunction(YamlBF, *ClosestNameBF);
++MatchedWithDemangledName;
}
>From 669afca2beb4b1f70cd3f8aabeaf8e227161751b Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 14:13:13 -0700
Subject: [PATCH 5/9] spr amend
Created using spr 1.3.4
---
bolt/lib/Profile/YAMLProfileReader.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 82139af2bd34a..0cf12c9765959 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -426,7 +426,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
uint64_t MatchedWithDemangledName = 0;
if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
- outs() << "starting name similarity matching\n";
auto DemangleName = [&](std::string &FunctionName) {
StringRef RestoredName = NameResolver::restore(FunctionName);
return demangle(RestoredName);
>From 9c021f883374e82012d6aa7228c2d99bd368e7d8 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 14:17:22 -0700
Subject: [PATCH 6/9] spr amend
Created using spr 1.3.4
---
bolt/lib/Profile/YAMLProfileReader.cpp | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 0cf12c9765959..c0a17a475bae0 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -423,7 +423,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
matchProfileToFunction(YamlBF, *BF);
// Uses name similarity to match functions that were not matched by name.
- uint64_t MatchedWithDemangledName = 0;
+ uint64_t MatchedWithNameSimilarity = 0;
if (opts::NameSimilarityFunctionMatchingThreshold > 0) {
auto DemangleName = [&](std::string &FunctionName) {
@@ -490,7 +490,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
if (ClosestNameBF &&
MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
matchProfileToFunction(YamlBF, *ClosestNameBF);
- ++MatchedWithDemangledName;
+ ++MatchedWithNameSimilarity;
}
}
}
@@ -500,6 +500,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
<< '\n';
+ if (opts::Verbosity >= 2) {
+ outs() << "BOLT-INFO: matched " << MatchedWithNameSimilarity
+ << " functions with similar names\n";
+ }
+
// Set for parseFunctionProfile().
NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
NormalizeByCalls = usesEvent("branches");
>From 9fc1899e6f84a6c133e941e69f112717d15eefad Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Fri, 21 Jun 2024 15:11:05 -0700
Subject: [PATCH 7/9] spr amend
Created using spr 1.3.4
---
bolt/lib/Profile/YAMLProfileReader.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index c0a17a475bae0..60899ead52f85 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -486,7 +486,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
}
}
-
if (ClosestNameBF &&
MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
matchProfileToFunction(YamlBF, *ClosestNameBF);
>From d687bc035e5f279a8af526381a8af7acb2fc67bf Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Mon, 24 Jun 2024 16:33:14 -0700
Subject: [PATCH 8/9] Added test
Created using spr 1.3.4
---
bolt/lib/Profile/YAMLProfileReader.cpp | 3 +-
.../name-similarity-function-matching.test | 64 +++++++++++++++++++
2 files changed, 65 insertions(+), 2 deletions(-)
create mode 100644 bolt/test/X86/name-similarity-function-matching.test
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 60899ead52f85..cbbb0f96f358b 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -458,7 +458,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
It->second.push_back(BF);
}
- size_t I = 0; size_t N = YamlBP.Functions.size();
for (auto YamlBF : YamlBP.Functions) {
if (YamlBF.Used)
continue;
@@ -487,7 +486,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
}
if (ClosestNameBF &&
- MinEditDistance < opts::NameSimilarityFunctionMatchingThreshold) {
+ MinEditDistance <= opts::NameSimilarityFunctionMatchingThreshold) {
matchProfileToFunction(YamlBF, *ClosestNameBF);
++MatchedWithNameSimilarity;
}
diff --git a/bolt/test/X86/name-similarity-function-matching.test b/bolt/test/X86/name-similarity-function-matching.test
new file mode 100644
index 0000000000000..1480a2165c389
--- /dev/null
+++ b/bolt/test/X86/name-similarity-function-matching.test
@@ -0,0 +1,64 @@
+## Tests function matching in YAMLProfileReader by name similarity.
+
+# REQUIRES: system-linux
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
+# RUN: --print-cfg --name-similarity-function-matching-threshold=1 2>&1 --funcs=main --profile-ignore-hash=0 | FileCheck %s
+
+# CHECK: BOLT-INFO: matched 1 functions with similar names
+
+#--- main.s
+.globl main
+.type main, @function
+main:
+ .cfi_startproc
+.LBB00:
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $16, %rsp
+ testq %rax, %rax
+ js .LBB03
+.LBB01:
+ jne .LBB04
+.LBB02:
+ nop
+.LBB03:
+ xorl %eax, %eax
+ addq $16, %rsp
+ popq %rbp
+ retq
+.LBB04:
+ xorl %eax, %eax
+ addq $16, %rsp
+ popq %rbp
+ retq
+## For relocations against .text
+.LBB05:
+ call exit
+ .cfi_endproc
+ .size main, .-main
+
+#--- yaml
+---
+header:
+ profile-version: 1
+ binary-name: 'hashing-based-function-matching.s.tmp.exe'
+ binary-build-id: '<unknown>'
+ profile-flags: [ lbr ]
+ profile-origin: branch profile reader
+ profile-events: ''
+ dfs-order: false
+ hash-func: xxh3
+functions:
+ - name: main2
+ fid: 0
+ hash: 0x0000000000000001
+ exec: 1
+ nblocks: 6
+ blocks:
+ - bid: 1
+ insns: 1
+ succ: [ { bid: 3, cnt: 1} ]
+...
>From 60047ab888b944b4b7d9fde7176e18ff4886d163 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung at gmail.com>
Date: Wed, 3 Jul 2024 09:58:00 -0700
Subject: [PATCH 9/9] Changing buffer initialization in DeriveNamespace
Created using spr 1.3.4
---
bolt/lib/Profile/YAMLProfileReader.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 4c3dacd781da9..63222147bedd6 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -371,7 +371,7 @@ uint64_t YAMLProfileReader::matchWithNameSimilarity(BinaryContext &BC) {
if (Demangler.partialDemangle(DemangledName.c_str()))
return std::string("");
std::vector<char> Buffer(DemangledName.begin(), DemangledName.end());
- size_t BufferSize = Buffer.size();
+ size_t BufferSize;
char *NameSpace =
Demangler.getFunctionDeclContextName(&Buffer[0], &BufferSize);
return std::string(NameSpace, BufferSize);
More information about the cfe-commits
mailing list