[llvm] b4a8c0e - [LTO][MC] Discard non-prevailing defined symbols in module-level assembly

Yuanfang Chen via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 18 15:34:11 PDT 2021


Author: Yuanfang Chen
Date: 2021-03-18T15:33:42-07:00
New Revision: b4a8c0ebb6d49f757c687833d85f843aaeb19133

URL: https://github.com/llvm/llvm-project/commit/b4a8c0ebb6d49f757c687833d85f843aaeb19133
DIFF: https://github.com/llvm/llvm-project/commit/b4a8c0ebb6d49f757c687833d85f843aaeb19133.diff

LOG: [LTO][MC] Discard non-prevailing defined symbols in module-level assembly

This is the alternative approach to D96931.

In LTO, for each module with inlineasm block, prepend directive ".lto_discard <sym>, <sym>*" to the beginning of the inline
asm.  ".lto_discard" is both a module inlineasm block marker and (optionally) provides a list of symbols to be discarded.

In MC while emitting for inlineasm, discard symbol binding & symbol
definitions according to ".lto_disard".

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D98762

Added: 
    llvm/test/LTO/X86/inline-asm-lto-discard.ll
    llvm/test/LTO/X86/inline-asm-lto-discard2.ll
    llvm/test/MC/ELF/lto-discard.s

Modified: 
    llvm/include/llvm/MC/MCContext.h
    llvm/include/llvm/MC/MCParser/MCAsmParser.h
    llvm/lib/LTO/LTO.cpp
    llvm/lib/MC/MCParser/AsmParser.cpp
    llvm/lib/MC/MCParser/ELFAsmParser.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 106763c5d7c2..f07e5a89b101 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -396,7 +396,6 @@ namespace llvm {
 
     void initInlineSourceManager();
     SourceMgr *getInlineSourceManager() {
-      assert(InlineSrcMgr);
       return InlineSrcMgr.get();
     }
     std::vector<const MDNode *> &getLocInfos() { return LocInfos; }

diff  --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index 02cc22009196..24d4ada5fa0b 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -182,6 +182,8 @@ class MCAsmParser {
   virtual void setParsingMSInlineAsm(bool V) = 0;
   virtual bool isParsingMSInlineAsm() = 0;
 
+  virtual bool discardLTOSymbol(StringRef) const { return false; }
+
   virtual bool isParsingMasm() const { return false; }
 
   virtual bool defineMacro(StringRef Name, StringRef Value) { return true; }

diff  --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index 8bcb1600925d..3cd8c78c42e6 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -11,7 +11,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LTO/LTO.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/StackSafetyAnalysis.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -752,6 +754,7 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
   Skip();
 
   std::set<const Comdat *> NonPrevailingComdats;
+  SmallSet<StringRef, 2> NonPrevailingAsmSymbols;
   for (const InputFile::Symbol &Sym : Syms) {
     assert(ResI != ResE);
     SymbolResolution Res = *ResI++;
@@ -798,7 +801,14 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
           GV->setDLLStorageClass(GlobalValue::DLLStorageClassTypes::
                                  DefaultStorageClass);
       }
+    } else if (auto *AS = Msym.dyn_cast<ModuleSymbolTable::AsmSymbol *>()) {
+      // Collect non-prevailing symbols.
+      if (!Res.Prevailing)
+        NonPrevailingAsmSymbols.insert(AS->first);
+    } else {
+      llvm_unreachable("unknown symbol type");
     }
+
     // Common resolution: collect the maximum size/alignment over all commons.
     // We also record if we see an instance of a common as prevailing, so that
     // if none is prevailing we can ignore it later.
@@ -812,11 +822,29 @@ LTO::addRegularLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
         CommonRes.Align = max(*SymAlign, CommonRes.Align);
       CommonRes.Prevailing |= Res.Prevailing;
     }
-
   }
+
   if (!M.getComdatSymbolTable().empty())
     for (GlobalValue &GV : M.global_values())
       handleNonPrevailingComdat(GV, NonPrevailingComdats);
+
+  // Prepend ".lto_discard <sym>, <sym>*" directive to each module inline asm
+  // block.
+  if (!M.getModuleInlineAsm().empty()) {
+    std::string NewIA = ".lto_discard";
+    if (!NonPrevailingAsmSymbols.empty()) {
+      // Don't dicard a symbol if there is a live .symver for it.
+      ModuleSymbolTable::CollectAsmSymvers(
+          M, [&](StringRef Name, StringRef Alias) {
+            if (!NonPrevailingAsmSymbols.count(Alias))
+              NonPrevailingAsmSymbols.erase(Name);
+          });
+      NewIA += " " + llvm::join(NonPrevailingAsmSymbols, ", ");
+    }
+    NewIA += "\n";
+    M.setModuleInlineAsm(NewIA + M.getModuleInlineAsm());
+  }
+
   assert(MsymI == MsymE);
   return std::move(Mod);
 }

diff  --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 3ef51e69ab7e..261d1e9394eb 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -168,6 +169,8 @@ class AsmParser : public MCAsmParser {
   /// List of forward directional labels for diagnosis at the end.
   SmallVector<std::tuple<SMLoc, CppHashInfoTy, MCSymbol *>, 4> DirLabels;
 
+  SmallSet<StringRef, 2> LTODiscardSymbols;
+
   /// AssemblerDialect. ~OU means unset value and use value provided by MAI.
   unsigned AssemblerDialect = ~0U;
 
@@ -235,6 +238,10 @@ class AsmParser : public MCAsmParser {
   }
   bool isParsingMSInlineAsm() override { return ParsingMSInlineAsm; }
 
+  bool discardLTOSymbol(StringRef Name) const override {
+    return LTODiscardSymbols.contains(Name);
+  }
+
   bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
                         unsigned &NumOutputs, unsigned &NumInputs,
                         SmallVectorImpl<std::pair<void *,bool>> &OpDecls,
@@ -516,6 +523,7 @@ class AsmParser : public MCAsmParser {
     DK_ADDRSIG,
     DK_ADDRSIG_SYM,
     DK_PSEUDO_PROBE,
+    DK_LTO_DISCARD,
     DK_END
   };
 
@@ -682,6 +690,9 @@ class AsmParser : public MCAsmParser {
   // .pseudoprobe
   bool parseDirectivePseudoProbe();
 
+  // ".lto_discard"
+  bool parseDirectiveLTODiscard();
+
   // Directives to support address-significance tables.
   bool parseDirectiveAddrsig();
   bool parseDirectiveAddrsigSym();
@@ -892,6 +903,8 @@ bool AsmParser::enabledGenDwarfForAssembly() {
 }
 
 bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
+  LTODiscardSymbols.clear();
+
   // Create the initial section, if requested.
   if (!NoInitialTextSection)
     Out.InitSections(false);
@@ -1770,7 +1783,6 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
   StringMap<DirectiveKind>::const_iterator DirKindIt =
       DirectiveKindMap.find(IDVal.lower());
   DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end())
-
                               ? DK_NO_DIRECTIVE
                               : DirKindIt->getValue();
   switch (DirKind) {
@@ -1868,6 +1880,9 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
       Lex();
     }
 
+    if (discardLTOSymbol(IDVal))
+      return false;
+
     getTargetParser().doBeforeLabelEmit(Sym);
 
     // Emit the label.
@@ -2208,6 +2223,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
       return parseDirectiveAddrsigSym();
     case DK_PSEUDO_PROBE:
       return parseDirectivePseudoProbe();
+    case DK_LTO_DISCARD:
+      return parseDirectiveLTODiscard();
     }
 
     return Error(IDLoc, "unknown directive");
@@ -2852,6 +2869,9 @@ bool AsmParser::parseAssignment(StringRef Name, bool allow_redef,
     return false;
   }
 
+  if (discardLTOSymbol(Name))
+    return false;
+
   // Do the assignment.
   Out.emitAssignment(Sym, Value);
   if (NoDeadStrip)
@@ -4870,6 +4890,10 @@ bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
     SMLoc Loc = getTok().getLoc();
     if (parseIdentifier(Name))
       return Error(Loc, "expected identifier");
+
+    if (discardLTOSymbol(Name))
+      return false;
+
     MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
     // Assembler local symbols don't make any sense here. Complain loudly.
@@ -5493,6 +5517,7 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".addrsig"] = DK_ADDRSIG;
   DirectiveKindMap[".addrsig_sym"] = DK_ADDRSIG_SYM;
   DirectiveKindMap[".pseudoprobe"] = DK_PSEUDO_PROBE;
+  DirectiveKindMap[".lto_discard"] = DK_LTO_DISCARD;
 }
 
 MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
@@ -5806,6 +5831,27 @@ bool AsmParser::parseDirectivePseudoProbe() {
   return false;
 }
 
+/// parseDirectiveLTODiscard
+///  ::= ".lto_discard" [ identifier ( , identifier )* ]
+/// The LTO library emits this directive to discard non-prevailing symbols.
+/// We ignore symbol assignments and attribute changes for the specified
+/// symbols.
+bool AsmParser::parseDirectiveLTODiscard() {
+  auto ParseOp = [&]() -> bool {
+    StringRef Name;
+    SMLoc Loc = getTok().getLoc();
+    if (parseIdentifier(Name))
+      return Error(Loc, "expected identifier");
+    LTODiscardSymbols.insert(Name);
+    return false;
+  };
+
+  LTODiscardSymbols.clear();
+  if (parseMany(ParseOp))
+    return addErrorSuffix(" in directive");
+  return false;
+}
+
 // We are comparing pointers, but the pointers are relative to a single string.
 // Thus, this should always be deterministic.
 static int rewritesSort(const AsmRewrite *AsmRewriteA,

diff  --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 5b3f0225bba9..70d69fc8dd32 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -182,6 +182,12 @@ bool ELFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) {
       if (getParser().parseIdentifier(Name))
         return TokError("expected identifier in directive");
 
+      if (getParser().discardLTOSymbol(Name)) {
+        if (getLexer().is(AsmToken::EndOfStatement))
+          break;
+        continue;
+      }
+
       MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
       getStreamer().emitSymbolAttribute(Sym, Attr);

diff  --git a/llvm/test/LTO/X86/inline-asm-lto-discard.ll b/llvm/test/LTO/X86/inline-asm-lto-discard.ll
new file mode 100644
index 000000000000..4893eb186cfb
--- /dev/null
+++ b/llvm/test/LTO/X86/inline-asm-lto-discard.ll
@@ -0,0 +1,87 @@
+; Check that non-prevailing symbols in module inline assembly are discarded
+; during regular LTO otherwise the final symbol binding could be wrong.
+
+; RUN: split-file %s %t
+; RUN: opt %t/t1.ll -o %t1
+; RUN: opt %t/t2.ll -o %t2
+; RUN: opt %t/t3.ll -o %t3
+; RUN: opt %t/t4.ll -o %t4
+
+; RUN: llvm-lto2 run -o %to1 -save-temps %t1 %t2 \
+; RUN:  -r %t1,foo,px \
+; RUN:  -r %t2,foo, \
+; RUN:  -r %t2,bar,pl
+; RUN: llvm-dis < %to1.0.0.preopt.bc | FileCheck %s --check-prefix=ASM1
+; RUN: llvm-nm %to1.0 | FileCheck %s --check-prefix=SYM
+; RUN: llvm-objdump -d --disassemble-symbols=foo %to1.0 \
+; RUN:   | FileCheck %s --check-prefix=DEF
+
+; RUN: llvm-lto2 run -o %to2 -save-temps %t2 %t3 \
+; RUN:  -r %t2,foo, \
+; RUN:  -r %t2,bar,pl \
+; RUN:  -r %t3,foo,px
+; RUN: llvm-dis < %to2.0.0.preopt.bc | FileCheck %s --check-prefix=ASM2
+; RUN: llvm-nm %to2.0 | FileCheck %s --check-prefix=SYM
+; RUN: llvm-objdump -d --disassemble-symbols=foo %to2.0 \
+; RUN:   | FileCheck %s --check-prefix=DEF
+
+; Check that ".symver" is properly handled.
+; RUN: llvm-lto2 run -o %to3 -save-temps %t4 \
+; RUN:  -r %t4,bar, \
+; RUN:  -r %t4,foo, \
+; RUN:  -r %t4,foo@@VER1,px
+; RUN: llvm-dis < %to3.0.0.preopt.bc | FileCheck %s --check-prefix=ASM3
+
+; ASM1:      module asm ".lto_discard foo"
+; ASM1-NEXT: module asm ".weak foo"
+; ASM1-NEXT: module asm ".equ foo,bar"
+
+; ASM2:      module asm ".lto_discard foo"
+; ASM2-NEXT: module asm ".weak foo"
+; ASM2-NEXT: module asm ".equ foo,bar"
+; ASM2-NEXT: module asm ".lto_discard"
+; ASM2-NEXT: module asm " .global foo ; foo: leal    2(%rdi), %eax"
+
+; ASM3-NOT:  module asm ".lto_discard foo"
+
+; SYM: T foo
+
+; DEF: leal    2(%rdi), %eax
+
+;--- t1.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define dso_local i32 @foo(i32 %0) {
+  %2 = add nsw i32 %0, 2
+  ret i32 %2
+}
+
+;--- t2.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".weak foo"
+module asm ".equ foo,bar"
+
+ at llvm.compiler.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @bar to i8*)], section "llvm.metadata"
+
+define internal i32 @bar(i32 %0) {
+  %2 = add nsw i32 %0, 1
+  ret i32 %2
+}
+
+;--- t3.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm " .global foo ; foo: leal    2(%rdi), %eax"
+
+;--- t4.ll
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".global foo"
+module asm "foo: call bar"
+module asm ".symver foo,foo@@@VER1"
+module asm ".symver bar,bar@@@VER1"

diff  --git a/llvm/test/LTO/X86/inline-asm-lto-discard2.ll b/llvm/test/LTO/X86/inline-asm-lto-discard2.ll
new file mode 100644
index 000000000000..5d111d0a52e3
--- /dev/null
+++ b/llvm/test/LTO/X86/inline-asm-lto-discard2.ll
@@ -0,0 +1,29 @@
+; Check that
+; 1. ".lto_discard" works as module inlineasm marker and its argument symbols
+;    are discarded.
+; 2. there is no reassignment error in the presence of ".lto_discard"
+; RUN: llc < %s | FileCheck %s
+
+; CHECK:    .data
+; CHECK-NOT:  .weak  foo
+; CHECK-NOT:  .set   foo, bar
+; CHECK:      .globl foo
+; CHECK:      foo:
+; CHECK:        .byte 1
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm ".lto_discard foo"
+module asm "	.text"
+module asm "bar:"
+module asm "	.data"
+module asm ".weak foo"
+module asm ".set   foo, bar"
+module asm ".weak foo"
+module asm ".set   foo, bar"
+
+module asm ".lto_discard"
+module asm ".globl foo"
+module asm "foo:"
+module asm "   .byte 1"

diff  --git a/llvm/test/MC/ELF/lto-discard.s b/llvm/test/MC/ELF/lto-discard.s
new file mode 100644
index 000000000000..75a7d7ea5e91
--- /dev/null
+++ b/llvm/test/MC/ELF/lto-discard.s
@@ -0,0 +1,31 @@
+// Check that ".lto_discard" ignores symbol assignments and attribute changes
+// for the specified symbols.
+// RUN: llvm-mc -triple x86_64 < %s | FileCheck %s
+
+// Check that ".lto_discard" only accepts identifiers.
+// RUN: not llvm-mc -filetype=obj -triple x86_64 --defsym ERR=1 %s 2>&1 |\
+// RUN:         FileCheck %s --check-prefix=ERR
+
+// CHECK: .weak foo
+// CHECK: foo:
+// CHECK:    .byte 1
+// CHECK: .weak bar
+// CHECK: bar:
+// CHECK:    .byte 2
+
+.lto_discard foo
+.weak foo
+foo:
+    .byte 1
+
+.lto_discard
+.weak bar
+bar:
+    .byte 2
+
+
+.ifdef ERR
+.text
+# ERR: {{.*}}.s:[[#@LINE+1]]:14: error: expected identifier in directive
+.lto_discard 1
+.endif


        


More information about the llvm-commits mailing list