[LLVMdev] [PATCH 1/2] Implement symbol offsets

Ben Gamari bgamari.foss at gmail.com
Sun May 25 17:41:10 PDT 2014


This implements Option #2 of the global symbol offset proposal[1].

[1] http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061511.html
---
 docs/BitCodeFormat.rst                |  5 ++++-
 docs/LangRef.rst                      | 23 +++++++++++++++++++----
 include/llvm/IR/Function.h            |  4 ++++
 lib/AsmParser/LLLexer.cpp             |  1 +
 lib/AsmParser/LLParser.cpp            | 21 +++++++++++++++++++--
 lib/AsmParser/LLParser.h              |  1 +
 lib/AsmParser/LLToken.h               |  1 +
 lib/Bitcode/Reader/BitcodeReader.cpp  |  8 +++++---
 lib/Bitcode/Writer/BitcodeWriter.cpp  |  3 ++-
 lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 18 ++++++++++++++++--
 lib/IR/AsmWriter.cpp                  |  2 ++
 lib/IR/Function.cpp                   |  9 +++++++++
 test/Feature/symbol_offset.ll         | 10 ++++++++++
 13 files changed, 93 insertions(+), 13 deletions(-)
 create mode 100644 test/Feature/symbol_offset.ll

diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst
index fce1e37..0da0b6e 100644
--- a/docs/BitCodeFormat.rst
+++ b/docs/BitCodeFormat.rst
@@ -727,7 +727,7 @@ global variable. The operand fields are:
 MODULE_CODE_FUNCTION Record
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-``[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prefix, dllstorageclass]``
+``[FUNCTION, type, callingconv, isproto, linkage, paramattr, alignment, section, visibility, gc, prefix, offset, dllstorageclass]``
 
 The ``FUNCTION`` record (code 8) marks the declaration or definition of a
 function. The operand fields are:
@@ -773,6 +773,9 @@ function. The operand fields are:
 * *prefix*: If non-zero, the value index of the prefix data for this function,
   plus 1.
 
+* *offset*: The offset of the symbol value relative to the beginning
+  of the function.
+
 * *dllstorageclass*: An encoding of the `dllstorageclass`_ of this function
 
 MODULE_CODE_ALIAS Record
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index fa8d3c0..3fb0fb8 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -611,8 +611,9 @@ an optional ``unnamed_addr`` attribute, a return type, an optional
 name, a (possibly empty) argument list (each with optional :ref:`parameter
 attributes <paramattrs>`), optional :ref:`function attributes <fnattrs>`,
 an optional section, an optional alignment, an optional :ref:`garbage
-collector name <gc>`, an optional :ref:`prefix <prefixdata>`, an opening
-curly brace, a list of basic blocks, and a closing curly brace.
+collector name <gc>`, an optional :ref:`prefix <prefixdata>`, an optional
+:ref:`symbol_offset <symboloffset>`, an opening curly brace, a list of
+basic blocks, and a closing curly brace.
 
 LLVM function declarations consist of the "``declare``" keyword, an
 optional :ref:`linkage type <linkage>`, an optional :ref:`visibility
@@ -621,7 +622,8 @@ an optional :ref:`calling convention <callingconv>`,
 an optional ``unnamed_addr`` attribute, a return type, an optional
 :ref:`parameter attribute <paramattrs>` for the return type, a function
 name, a possibly empty list of arguments, an optional alignment, an optional
-:ref:`garbage collector name <gc>` and an optional :ref:`prefix <prefixdata>`.
+:ref:`garbage collector name <gc>`, an optional :ref:`prefix <prefixdata>`, and
+an optional :ref:`symbol_offset <symbolofffset>`.
 
 A function definition contains a list of basic blocks, forming the CFG (Control
 Flow Graph) for the function. Each basic block may optionally start with a label
@@ -657,7 +659,7 @@ Syntax::
            [cconv] [ret attrs]
            <ResultType> @<FunctionName> ([argument list])
            [unnamed_addr] [fn Attrs] [section "name"] [align N]
-           [gc] [prefix Constant] { ... }
+           [gc] [prefix Constant] [symbol_offset N] { ... }
 
 .. _langref_aliases:
 
@@ -893,6 +895,9 @@ the inliner and other passes to reason about the semantics of the function
 definition without needing to reason about the prefix data.  Obviously this
 makes the format of the prefix data highly target dependent.
 
+Alternatively, the :ref:`symbol_offset` attribute can be used to move
+the function entry point to after the prefix data.
+
 Prefix data is laid out as if it were an initializer for a global variable
 of the prefix data's type.  No padding is automatically placed between the
 prefix data and the function body.  If padding is required, it must be part
@@ -919,6 +924,16 @@ A function may have prefix data but no body.  This has similar semantics
 to the ``available_externally`` linkage in that the data may be used by the
 optimizers but will not be emitted in the object file.
 
+.. _symboloffset:
+
+Symbol Offset
+-------------
+
+The `symbol_offset` attribute allows the value of the symbol in the
+produced object file to be offset from the beginning of the function
+definition. This can be used in conjunction with the :ref:`prefix`
+attribute to insert prefix data before the definition of a function.
+
 .. _attrgrp:
 
 Attribute Groups
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index 22444bd..0332e74 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -86,6 +86,7 @@ private:
   mutable ArgumentListType ArgumentList;  ///< The formal arguments
   ValueSymbolTable *SymTab;               ///< Symbol table of args/instructions
   AttributeSet AttributeSets;             ///< Parameter attributes
+  signed SymbolOffset;                    ///< Symbol offset
 
   // HasLazyArguments is stored in Value::SubclassData.
   /*bool HasLazyArguments;*/
@@ -445,6 +446,9 @@ public:
   Constant *getPrefixData() const;
   void setPrefixData(Constant *PrefixData);
 
+  signed getSymbolOffset() const;
+  void setSymbolOffset(signed Offset);
+
   /// viewCFG - This function is meant for use from the debugger.  You can just
   /// say 'call F->viewCFG()' and a ghostview window should pop up from the
   /// program, displaying the CFG of the current function with the code for each
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 44a3412..d5d9c13 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -548,6 +548,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(inteldialect);
   KEYWORD(gc);
   KEYWORD(prefix);
+  KEYWORD(symbol_offset);
 
   KEYWORD(ccc);
   KEYWORD(fastcc);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 3282e8a..8988ffd 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -1128,6 +1128,19 @@ bool LLParser::ParseStringConstant(std::string &Result) {
   return false;
 }
 
+/// ParseInt32
+///   ::= int32
+bool LLParser::ParseInt32(signed &Val) {
+  if (Lex.getKind() != lltok::APSInt)
+    return TokError("expected integer");
+  int64_t Val64 = Lex.getAPSIntVal().getLimitedValue(0x7FFFFFFFULL+1);
+  if (Val64 != signed(Val64))
+    return TokError("expected 32-bit integer (too large)");
+  Val = Val64;
+  Lex.Lex();
+  return false;
+}
+
 /// ParseUInt32
 ///   ::= uint32
 bool LLParser::ParseUInt32(unsigned &Val) {
@@ -3016,7 +3029,7 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
 /// FunctionHeader
 ///   ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
 ///       OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
-///       OptionalAlign OptGC OptionalPrefix
+///       OptionalAlign OptGC OptionalPrefix OptionalSymbolOffset
 bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   // Parse the linkage.
   LocTy LinkageLoc = Lex.getLoc();
@@ -3097,6 +3110,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   bool UnnamedAddr;
   LocTy UnnamedAddrLoc;
   Constant *Prefix = nullptr;
+  signed Offset = 0;
 
   if (ParseArgumentList(ArgList, isVarArg) ||
       ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
@@ -3109,7 +3123,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       (EatIfPresent(lltok::kw_gc) &&
        ParseStringConstant(GC)) ||
       (EatIfPresent(lltok::kw_prefix) &&
-       ParseGlobalTypeAndValue(Prefix)))
+       ParseGlobalTypeAndValue(Prefix)) ||
+      (EatIfPresent(lltok::kw_symbol_offset) &&
+       ParseInt32(Offset)))
     return true;
 
   if (FuncAttrs.contains(Attribute::Builtin))
@@ -3209,6 +3225,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   Fn->setSection(Section);
   if (!GC.empty()) Fn->setGC(GC.c_str());
   Fn->setPrefixData(Prefix);
+  Fn->setSymbolOffset(Offset);
   ForwardRefAttrGroups[Fn] = FwdRefAttrGrps;
 
   // Add all of the arguments we parsed to the function.
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index e2bf462..f5bcab3 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -189,6 +189,7 @@ namespace llvm {
       return false;
     }
     bool ParseStringConstant(std::string &Result);
+    bool ParseInt32(signed &Val);
     bool ParseUInt32(unsigned &Val);
     bool ParseUInt32(unsigned &Val, LocTy &Loc) {
       Loc = Lex.getLoc();
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index b6b7d82..b18857c 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -84,6 +84,7 @@ namespace lltok {
     kw_inteldialect,
     kw_gc,
     kw_prefix,
+    kw_symbol_offset,
     kw_c,
 
     kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 4170f98..904468d 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1934,7 +1934,7 @@ error_code BitcodeReader::ParseModule(bool Resume) {
     }
     // FUNCTION:  [type, callingconv, isproto, linkage, paramattr,
     //             alignment, section, visibility, gc, unnamed_addr,
-    //             dllstorageclass]
+    //             offset, dllstorageclass]
     case bitc::MODULE_CODE_FUNCTION: {
       if (Record.size() < 8)
         return Error(InvalidRecord);
@@ -1977,9 +1977,11 @@ error_code BitcodeReader::ParseModule(bool Resume) {
       Func->setUnnamedAddr(UnnamedAddr);
       if (Record.size() > 10 && Record[10] != 0)
         FunctionPrefixes.push_back(std::make_pair(Func, Record[10]-1));
-
       if (Record.size() > 11)
-        Func->setDLLStorageClass(GetDecodedDLLStorageClass(Record[11]));
+        Func->setSymbolOffset(Record[11]);
+
+      if (Record.size() > 12)
+        Func->setDLLStorageClass(GetDecodedDLLStorageClass(Record[12]));
       else
         UpgradeDLLImportExportLinkage(Func, Record[3]);
 
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index db254e6..151a4fa 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -642,7 +642,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
   // Emit the function proto information.
   for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
     // FUNCTION:  [type, callingconv, isproto, linkage, paramattrs, alignment,
-    //             section, visibility, gc, unnamed_addr, prefix]
+    //             section, visibility, gc, unnamed_addr, prefix, offset]
     Vals.push_back(VE.getTypeID(F->getType()));
     Vals.push_back(F->getCallingConv());
     Vals.push_back(F->isDeclaration());
@@ -655,6 +655,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE,
     Vals.push_back(F->hasUnnamedAddr());
     Vals.push_back(F->hasPrefixData() ? (VE.getValueID(F->getPrefixData()) + 1)
                                       : 0);
+    Vals.push_back(F->getSymbolOffset());
     Vals.push_back(getEncodedDLLStorageClass(F));
 
     unsigned AbbrevToUse = 0;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7de9c6d..eed1b3b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -556,10 +556,24 @@ void AsmPrinter::EmitFunctionHeader() {
 /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the
 /// function.  This can be overridden by targets as required to do custom stuff.
 void AsmPrinter::EmitFunctionEntryLabel() {
+  const Function *F = MF->getFunction();
+
   // The function label could have already been emitted if two symbols end up
   // conflicting due to asm renaming.  Detect this and emit an error.
-  if (CurrentFnSym->isUndefined())
-    return OutStreamer.EmitLabel(CurrentFnSym);
+  if (CurrentFnSym->isUndefined()) {
+    if (F->getSymbolOffset() != 0) {
+      MCSymbol *dummySym = OutContext.CreateTempSymbol();
+      OutStreamer.EmitLabel(dummySym);
+
+      const MCExpr *symRefExpr = MCSymbolRefExpr::Create(dummySym, OutContext);
+      const MCExpr *constExpr = MCConstantExpr::Create(F->getSymbolOffset(), OutContext);
+      const MCExpr *addExpr = MCBinaryExpr::CreateAdd(symRefExpr, constExpr, OutContext);
+      OutStreamer.EmitAssignment(CurrentFnSym, addExpr);
+      return;
+    } else {
+      return OutStreamer.EmitLabel(CurrentFnSym);
+    }
+  }
 
   report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
                      "' label emitted multiple times to assembly file");
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index 0fef0d0..9d39e4c 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -1657,6 +1657,8 @@ void AssemblyWriter::printFunction(const Function *F) {
     Out << " prefix ";
     writeOperand(F->getPrefixData(), true);
   }
+  if (F->getSymbolOffset() != 0)
+    Out << " symbol_offset " << F->getSymbolOffset();
   if (F->isDeclaration()) {
     Out << '\n';
   } else {
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index fe32c46..d158f9c 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -381,6 +381,7 @@ void Function::copyAttributesFrom(const GlobalValue *Src) {
     setPrefixData(SrcF->getPrefixData());
   else
     setPrefixData(nullptr);
+  setSymbolOffset(SrcF->getSymbolOffset());
 }
 
 /// getIntrinsicID - This method returns the ID number of the specified
@@ -806,3 +807,11 @@ void Function::setPrefixData(Constant *PrefixData) {
   }
   setValueSubclassData(SCData);
 }
+
+signed Function::getSymbolOffset() const {
+  return this->SymbolOffset;
+}
+
+void Function::setSymbolOffset(signed Offset) {
+  this->SymbolOffset = Offset;
+}
diff --git a/test/Feature/symbol_offset.ll b/test/Feature/symbol_offset.ll
new file mode 100644
index 0000000..a9784f6
--- /dev/null
+++ b/test/Feature/symbol_offset.ll
@@ -0,0 +1,10 @@
+; RUN: llvm-as < %s | llvm-dis > %t1.ll
+; RUN: FileCheck %s < %t1.ll
+; RUN: llvm-as < %t1.ll | llvm-dis > %t2.ll
+; RUN: diff %t1.ll %t2.ll
+; RUN: opt -O3 -S < %t1.ll | FileCheck %s
+
+; CHECK: f(){{.*}}symbol_offset 1
+define void @f() symbol_offset 1 {
+  ret void
+}
-- 
1.9.2




More information about the llvm-dev mailing list