[llvm] b4a8c0e - [LTO][MC] Discard non-prevailing defined symbols in module-level assembly
Yuanfang Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 18 15:34:11 PDT 2021
Author: Yuanfang Chen
Date: 2021-03-18T15:33:42-07:00
New Revision: b4a8c0ebb6d49f757c687833d85f843aaeb19133
URL: https://github.com/llvm/llvm-project/commit/b4a8c0ebb6d49f757c687833d85f843aaeb19133
DIFF: https://github.com/llvm/llvm-project/commit/b4a8c0ebb6d49f757c687833d85f843aaeb19133.diff
LOG: [LTO][MC] Discard non-prevailing defined symbols in module-level assembly
This is the alternative approach to D96931.
In LTO, for each module with inlineasm block, prepend directive ".lto_discard <sym>, <sym>*" to the beginning of the inline
asm. ".lto_discard" is both a module inlineasm block marker and (optionally) provides a list of symbols to be discarded.
In MC while emitting for inlineasm, discard symbol binding & symbol
definitions according to ".lto_disard".
Reviewed By: MaskRay
Differential Revision: https://reviews.llvm.org/D98762
Added:
llvm/test/LTO/X86/inline-asm-lto-discard.ll
llvm/test/LTO/X86/inline-asm-lto-discard2.ll
llvm/test/MC/ELF/lto-discard.s
Modified:
llvm/include/llvm/MC/MCContext.h
llvm/include/llvm/MC/MCParser/MCAsmParser.h
llvm/lib/LTO/LTO.cpp
llvm/lib/MC/MCParser/AsmParser.cpp
llvm/lib/MC/MCParser/ELFAsmParser.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 106763c5d7c2..f07e5a89b101 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -396,7 +396,6 @@ namespace llvm {
void initInlineSourceManager();
SourceMgr *getInlineSourceManager() {
- assert(InlineSrcMgr);
return InlineSrcMgr.get();
}
std::vector<const MDNode *> &getLocInfos() { return LocInfos; }
diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index 02cc22009196..24d4ada5fa0b 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -182,6 +182,8 @@ class MCAsmParser {
virtual void setParsingMSInlineAsm(bool V) = 0;
virtual bool isParsingMSInlineAsm() = 0;
+ virtual bool discardLTOSymbol(StringRef) const { return false; }
+
virtual bool isParsingMasm() const { return false; }
virtual bool defineMacro(StringRef Name, StringRef Value) { return true; }
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 8bcb1600925d..3cd8c78c42e6 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -11,7 +11,9 @@
//===----------------------------------------------------------------------===//
#include "llvm/LTO/LTO.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -752,6 +754,7 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
Skip();
std::set<const Comdat *> NonPrevailingComdats;
+ SmallSet<StringRef, 2> NonPrevailingAsmSymbols;
for (const InputFile::Symbol &Sym : Syms) {
assert(ResI != ResE);
SymbolResolution Res = *ResI++;
@@ -798,7 +801,14 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
DefaultStorageClass);
}
+ } else if (auto *AS = Msym.dyn_cast<ModuleSymbolTable::AsmSymbol *>()) {
+ // Collect non-prevailing symbols.
+ if (!Res.Prevailing)
+ NonPrevailingAsmSymbols.insert(AS->first);
+ } else {
+ llvm_unreachable("unknown symbol type");
}
+
// Common resolution: collect the maximum size/alignment over all commons.
// We also record if we see an instance of a common as prevailing, so that
// if none is prevailing we can ignore it later.
@@ -812,11 +822,29 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
CommonRes.Align = max(*SymAlign, CommonRes.Align);
CommonRes.Prevailing |= Res.Prevailing;
}
-
}
+
if (!M.getComdatSymbolTable().empty())
for (GlobalValue &GV : M.global_values())
handleNonPrevailingComdat(GV, NonPrevailingComdats);
+
+ // Prepend ".lto_discard <sym>, <sym>*" directive to each module inline asm
+ // block.
+ if (!M.getModuleInlineAsm().empty()) {
+ std::string NewIA = ".lto_discard";
+ if (!NonPrevailingAsmSymbols.empty()) {
+ // Don't dicard a symbol if there is a live .symver for it.
+ ModuleSymbolTable::CollectAsmSymvers(
+ M, [&](StringRef Name, StringRef Alias) {
+ if (!NonPrevailingAsmSymbols.count(Alias))
+ NonPrevailingAsmSymbols.erase(Name);
+ });
+ NewIA += " " + llvm::join(NonPrevailingAsmSymbols, ", ");
+ }
+ NewIA += "\n";
+ M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm());
+ }
+
assert(MsymI == MsymE);
return std::move(Mod);
}
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 3ef51e69ab7e..261d1e9394eb 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
@@ -168,6 +169,8 @@ class AsmParser : public MCAsmParser {
/// List of forward directional labels for diagnosis at the end.
SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
+ SmallSet<StringRef, 2> LTODiscardSymbols;
+
/// AssemblerDialect. ~OU means unset value and use value provided by MAI.
unsigned AssemblerDialect = ~0U;
@@ -235,6 +238,10 @@ class AsmParser : public MCAsmParser {
}
bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
+ bool discardLTOSymbol(StringRef Name) const override {
+ return LTODiscardSymbols.contains(Name);
+ }
+
bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
unsigned &NumOutputs, unsigned &NumInputs,
SmallVectorImpl<std::pair<void *,bool>> &OpDecls,
@@ -516,6 +523,7 @@ class AsmParser : public MCAsmParser {
DK_ADDRSIG,
DK_ADDRSIG_SYM,
DK_PSEUDO_PROBE,
+ DK_LTO_DISCARD,
DK_END
};
@@ -682,6 +690,9 @@ class AsmParser : public MCAsmParser {
// .pseudoprobe
bool parseDirectivePseudoProbe();
+ // ".lto_discard"
+ bool parseDirectiveLTODiscard();
+
// Directives to support address-significance tables.
bool parseDirectiveAddrsig();
bool parseDirectiveAddrsigSym();
@@ -892,6 +903,8 @@ bool AsmParser::enabledGenDwarfForAssembly() {
}
bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
+ LTODiscardSymbols.clear();
+
// Create the initial section, if requested.
if (!NoInitialTextSection)
Out.InitSections(false);
@@ -1770,7 +1783,6 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
StringMap<DirectiveKind>::const_iterator DirKindIt =
DirectiveKindMap.find(IDVal.lower());
DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
-
? DK_NO_DIRECTIVE
: DirKindIt->getValue();
switch (DirKind) {
@@ -1868,6 +1880,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
Lex();
}
+ if (discardLTOSymbol(IDVal))
+ return false;
+
getTargetParser().doBeforeLabelEmit(Sym);
// Emit the label.
@@ -2208,6 +2223,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
return parseDirectiveAddrsigSym();
case DK_PSEUDO_PROBE:
return parseDirectivePseudoProbe();
+ case DK_LTO_DISCARD:
+ return parseDirectiveLTODiscard();
}
return Error(IDLoc, "unknown directive");
@@ -2852,6 +2869,9 @@ bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
return false;
}
+ if (discardLTOSymbol(Name))
+ return false;
+
// Do the assignment.
Out.emitAssignment(Sym, Value);
if (NoDeadStrip)
@@ -4870,6 +4890,10 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
SMLoc Loc = getTok().getLoc();
if (parseIdentifier(Name))
return Error(Loc, "expected identifier");
+
+ if (discardLTOSymbol(Name))
+ return false;
+
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
// Assembler local symbols don't make any sense here. Complain loudly.
@@ -5493,6 +5517,7 @@ void AsmParser::initializeDirectiveKindMap() {
DirectiveKindMap[".addrsig"] = DK_ADDRSIG;
DirectiveKindMap[".addrsig_sym"] = DK_ADDRSIG_SYM;
DirectiveKindMap[".pseudoprobe"] = DK_PSEUDO_PROBE;
+ DirectiveKindMap[".lto_discard"] = DK_LTO_DISCARD;
}
MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
@@ -5806,6 +5831,27 @@ bool AsmParser::parseDirectivePseudoProbe() {
return false;
}
+/// parseDirectiveLTODiscard
+/// ::= ".lto_discard" [ identifier ( , identifier )* ]
+/// The LTO library emits this directive to discard non-prevailing symbols.
+/// We ignore symbol assignments and attribute changes for the specified
+/// symbols.
+bool AsmParser::parseDirectiveLTODiscard() {
+ auto ParseOp = [&]() -> bool {
+ StringRef Name;
+ SMLoc Loc = getTok().getLoc();
+ if (parseIdentifier(Name))
+ return Error(Loc, "expected identifier");
+ LTODiscardSymbols.insert(Name);
+ return false;
+ };
+
+ LTODiscardSymbols.clear();
+ if (parseMany(ParseOp))
+ return addErrorSuffix(" in directive");
+ return false;
+}
+
// We are comparing pointers, but the pointers are relative to a single string.
// Thus, this should always be deterministic.
static int rewritesSort(const AsmRewrite *AsmRewriteA,
diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 5b3f0225bba9..70d69fc8dd32 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -182,6 +182,12 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
if (getParser().parseIdentifier(Name))
return TokError("expected identifier in directive");
+ if (getParser().discardLTOSymbol(Name)) {
+ if (getLexer().is(AsmToken::EndOfStatement))
+ break;
+ continue;
+ }
+
MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
getStreamer().emitSymbolAttribute(Sym, Attr);
diff --git a/llvm/test/LTO/X86/inline-asm-lto-discard.ll b/llvm/test/LTO/X86/inline-asm-lto-discard.ll
new file mode 100644
index 000000000000..4893eb186cfb
--- /dev/null
+++ b/llvm/test/LTO/X86/inline-asm-lto-discard.ll
@@ -0,0 +1,87 @@
+; Check that non-prevailing symbols in module inline assembly are discarded
+; during regular LTO otherwise the final symbol binding could be wrong.
+
+; RUN: split-file %s %t
+; RUN: opt %t/t1.ll -o %t1
+; RUN: opt %t/t2.ll -o %t2
+; RUN: opt %t/t3.ll -o %t3
+; RUN: opt %t/t4.ll -o %t4
+
+; RUN: llvm-lto2 run -o %to1 -save-temps %t1 %t2 \
+; RUN: -r %t1,foo,px \
+; RUN: -r %t2,foo, \
+; RUN: -r %t2,bar,pl
+; RUN: llvm-dis < %to1.0.0.preopt.bc | FileCheck %s --check-prefix=ASM1
+; RUN: llvm-nm %to1.0 | FileCheck %s --check-prefix=SYM
+; RUN: llvm-objdump -d --disassemble-symbols=foo %to1.0 \
+; RUN: | FileCheck %s --check-prefix=DEF
+
+; RUN: llvm-lto2 run -o %to2 -save-temps %t2 %t3 \
+; RUN: -r %t2,foo, \
+; RUN: -r %t2,bar,pl \
+; RUN: -r %t3,foo,px
+; RUN: llvm-dis < %to2.0.0.preopt.bc | FileCheck %s --check-prefix=ASM2
+; RUN: llvm-nm %to2.0 | FileCheck %s --check-prefix=SYM
+; RUN: llvm-objdump -d --disassemble-symbols=foo %to2.0 \
+; RUN: | FileCheck %s --check-prefix=DEF
+
+; Check that ".symver" is properly handled.
+; RUN: llvm-lto2 run -o %to3 -save-temps %t4 \
+; RUN: -r %t4,bar, \
+; RUN: -r %t4,foo, \
+; RUN: -r %t4,foo@@VER1,px
+; RUN: llvm-dis < %to3.0.0.preopt.bc | FileCheck %s --check-prefix=ASM3
+
+; ASM1: module asm ".lto_discard foo"
+; ASM1-NEXT: module asm ".weak foo"
+; ASM1-NEXT: module asm ".equ foo,bar"
+
+; ASM2: module asm ".lto_discard foo"
+; ASM2-NEXT: module asm ".weak foo"
+; ASM2-NEXT: module asm ".equ foo,bar"
+; ASM2-NEXT: module asm ".lto_discard"
+; ASM2-NEXT: module asm " .global foo ; foo: leal 2(%rdi), %eax"
+
+; ASM3-NOT: module asm ".lto_discard foo"
+
+; SYM: T foo
+
+; DEF: leal 2(%rdi), %eax
+
+;--- t1.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local i32 @foo(i32 %0) {
+ %2 = add nsw i32 %0, 2
+ ret i32 %2
+}
+
+;--- t2.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".weak foo"
+module asm ".equ foo,bar"
+
+ at llvm.compiler.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @bar to i8*)], section "llvm.metadata"
+
+define internal i32 @bar(i32 %0) {
+ %2 = add nsw i32 %0, 1
+ ret i32 %2
+}
+
+;--- t3.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm " .global foo ; foo: leal 2(%rdi), %eax"
+
+;--- t4.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".global foo"
+module asm "foo: call bar"
+module asm ".symver foo,foo@@@VER1"
+module asm ".symver bar,bar@@@VER1"
diff --git a/llvm/test/LTO/X86/inline-asm-lto-discard2.ll b/llvm/test/LTO/X86/inline-asm-lto-discard2.ll
new file mode 100644
index 000000000000..5d111d0a52e3
--- /dev/null
+++ b/llvm/test/LTO/X86/inline-asm-lto-discard2.ll
@@ -0,0 +1,29 @@
+; Check that
+; 1. ".lto_discard" works as module inlineasm marker and its argument symbols
+; are discarded.
+; 2. there is no reassignment error in the presence of ".lto_discard"
+; RUN: llc < %s | FileCheck %s
+
+; CHECK: .data
+; CHECK-NOT: .weak foo
+; CHECK-NOT: .set foo, bar
+; CHECK: .globl foo
+; CHECK: foo:
+; CHECK: .byte 1
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".lto_discard foo"
+module asm " .text"
+module asm "bar:"
+module asm " .data"
+module asm ".weak foo"
+module asm ".set foo, bar"
+module asm ".weak foo"
+module asm ".set foo, bar"
+
+module asm ".lto_discard"
+module asm ".globl foo"
+module asm "foo:"
+module asm " .byte 1"
diff --git a/llvm/test/MC/ELF/lto-discard.s b/llvm/test/MC/ELF/lto-discard.s
new file mode 100644
index 000000000000..75a7d7ea5e91
--- /dev/null
+++ b/llvm/test/MC/ELF/lto-discard.s
@@ -0,0 +1,31 @@
+// Check that ".lto_discard" ignores symbol assignments and attribute changes
+// for the specified symbols.
+// RUN: llvm-mc -triple x86_64 < %s | FileCheck %s
+
+// Check that ".lto_discard" only accepts identifiers.
+// RUN: not llvm-mc -filetype=obj -triple x86_64 --defsym ERR=1 %s 2>&1 |\
+// RUN: FileCheck %s --check-prefix=ERR
+
+// CHECK: .weak foo
+// CHECK: foo:
+// CHECK: .byte 1
+// CHECK: .weak bar
+// CHECK: bar:
+// CHECK: .byte 2
+
+.lto_discard foo
+.weak foo
+foo:
+ .byte 1
+
+.lto_discard
+.weak bar
+bar:
+ .byte 2
+
+
+.ifdef ERR
+.text
+# ERR: {{.*}}.s:[[#@LINE+1]]:14: error: expected identifier in directive
+.lto_discard 1
+.endif
More information about the llvm-commits
mailing list