[llvm] 8db981d - Add sanitizer-specific GlobalValue attributes.

Mitch Phillips via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 10 12:45:17 PDT 2022


Author: Mitch Phillips
Date: 2022-06-10T12:28:18-07:00
New Revision: 8db981d463ee266919907f2554194d05f96f7191

URL: https://github.com/llvm/llvm-project/commit/8db981d463ee266919907f2554194d05f96f7191
DIFF: https://github.com/llvm/llvm-project/commit/8db981d463ee266919907f2554194d05f96f7191.diff

LOG: Add sanitizer-specific GlobalValue attributes.

Plan is the migrate the global variable metadata for sanitizers, that's
currently carried around generally in the 'llvm.asan.globals' section,
onto the global variable itself.

This patch adds the attribute and plumbs it through the LLVM IR and
bitcode formats, but is a no-op other than that so far.

Reviewed By: vitalybuka, kstoimenov

Differential Revision: https://reviews.llvm.org/D126100

Added: 
    

Modified: 
    llvm/include/llvm/AsmParser/LLParser.h
    llvm/include/llvm/AsmParser/LLToken.h
    llvm/include/llvm/IR/GlobalValue.h
    llvm/lib/AsmParser/LLLexer.cpp
    llvm/lib/AsmParser/LLParser.cpp
    llvm/lib/Bitcode/Reader/BitcodeReader.cpp
    llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
    llvm/lib/IR/AsmWriter.cpp
    llvm/lib/IR/Globals.cpp
    llvm/lib/IR/LLVMContextImpl.h
    llvm/test/Assembler/globalvariable-attributes.ll
    llvm/test/Bitcode/compatibility.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index dd8d4daee5133..3389475b2c9aa 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -515,6 +515,7 @@ namespace llvm {
     bool parseGlobalValueVector(SmallVectorImpl<Constant *> &Elts,
                                 Optional<unsigned> *InRangeOp = nullptr);
     bool parseOptionalComdat(StringRef GlobalName, Comdat *&C);
+    bool parseSanitizer(GlobalVariable *GV);
     bool parseMetadataAsValue(Value *&V, PerFunctionState &PFS);
     bool parseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
                               PerFunctionState *PFS);

diff  --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index 11a7a6768e91c..230a1662cc049 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -391,6 +391,19 @@ enum Kind {
   kw_bit,
   kw_varFlags,
 
+  // GV's with __attribute__((no_sanitize("address"))), or things in
+  // -fsanitize-ignorelist when built with ASan.
+  kw_no_sanitize_address,
+  // GV's with __attribute__((no_sanitize("hwaddress"))), or things in
+  // -fsanitize-ignorelist when built with HWASan.
+  kw_no_sanitize_hwaddress,
+  // GV's with __attribute__((no_sanitize("memtag"))), or things in
+  // -fsanitize-ignorelist when built with memory tagging.
+  kw_no_sanitize_memtag,
+  // GV's where the clang++ frontend (when ASan is used) notes that this is
+  // dynamically initialized, and thus needs ODR detection.
+  kw_sanitize_address_dyninit,
+
   // Unsigned Valued tokens (UIntVal).
   LabelID,    // 42:
   GlobalID,   // @42

diff  --git a/llvm/include/llvm/IR/GlobalValue.h b/llvm/include/llvm/IR/GlobalValue.h
index f704b1c9e7954..0f901e0fca506 100644
--- a/llvm/include/llvm/IR/GlobalValue.h
+++ b/llvm/include/llvm/IR/GlobalValue.h
@@ -79,14 +79,15 @@ class GlobalValue : public Constant {
         ValueType(Ty), Visibility(DefaultVisibility),
         UnnamedAddrVal(unsigned(UnnamedAddr::None)),
         DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal),
-        HasLLVMReservedName(false), IsDSOLocal(false), HasPartition(false) {
+        HasLLVMReservedName(false), IsDSOLocal(false), HasPartition(false),
+        HasSanitizerMetadata(false) {
     setLinkage(Linkage);
     setName(Name);
   }
 
   Type *ValueType;
 
-  static const unsigned GlobalValueSubClassDataBits = 16;
+  static const unsigned GlobalValueSubClassDataBits = 15;
 
   // All bitfields use unsigned as the underlying type so that MSVC will pack
   // them.
@@ -111,9 +112,14 @@ class GlobalValue : public Constant {
   /// https://lld.llvm.org/Partitions.html).
   unsigned HasPartition : 1;
 
+  /// True if this symbol has sanitizer metadata available. Should only happen
+  /// if sanitizers were enabled when building the translation unit which
+  /// contains this GV.
+  unsigned HasSanitizerMetadata : 1;
+
 private:
   // Give subclasses access to what otherwise would be wasted padding.
-  // (16 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1) == 32.
+  // (15 + 4 + 2 + 2 + 2 + 3 + 1 + 1 + 1 + 1) == 32.
   unsigned SubClassData : GlobalValueSubClassDataBits;
 
   friend class Constant;
@@ -288,6 +294,39 @@ class GlobalValue : public Constant {
   StringRef getPartition() const;
   void setPartition(StringRef Part);
 
+  // ASan, HWASan and Memtag sanitizers have some instrumentation that applies
+  // specifically to global variables. This instrumentation is implicitly
+  // applied to all global variables when built with -fsanitize=*. What we need
+  // is a way to persist the information that a certain global variable should
+  // *not* have sanitizers applied, which occurs if:
+  //   1. The global variable is in the sanitizer ignore list, or
+  //   2. The global variable is created by the sanitizers itself for internal
+  //      usage, or
+  //   3. The global variable has __attribute__((no_sanitize("..."))) or
+  //      __attribute__((disable_sanitizer_instrumentation)).
+  //
+  // This is important, a some IR passes like GlobalMerge can delete global
+  // variables and replace them with new ones. If the old variables were marked
+  // to be unsanitized, then the new ones should also be.
+  struct SanitizerMetadata {
+    SanitizerMetadata()
+        : NoAddress(false), NoHWAddress(false), NoMemtag(false),
+          IsDynInit(false) {}
+    unsigned NoAddress : 1;
+    unsigned NoHWAddress : 1;
+    unsigned NoMemtag : 1;
+
+    // ASan-specific metadata. Is this global variable dynamically initialized
+    // (from a C++ language perspective), and should therefore be checked for
+    // ODR violations.
+    unsigned IsDynInit : 1;
+  };
+
+  bool hasSanitizerMetadata() const { return HasSanitizerMetadata; }
+  const SanitizerMetadata &getSanitizerMetadata() const;
+  void setSanitizerMetadata(const SanitizerMetadata &Meta);
+  void removeSanitizerMetadata();
+
   static LinkageTypes getLinkOnceLinkage(bool ODR) {
     return ODR ? LinkOnceODRLinkage : LinkOnceAnyLinkage;
   }

diff  --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index f9c6d555e9d1a..30e6f85992088 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -580,6 +580,11 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(prefix);
   KEYWORD(prologue);
 
+  KEYWORD(no_sanitize_address);
+  KEYWORD(no_sanitize_hwaddress);
+  KEYWORD(no_sanitize_memtag);
+  KEYWORD(sanitize_address_dyninit);
+
   KEYWORD(ccc);
   KEYWORD(fastcc);
   KEYWORD(coldcc);

diff  --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index aac1f01eeed5e..29ef40a59d145 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -1103,6 +1103,45 @@ bool LLParser::parseAliasOrIFunc(const std::string &Name, LocTy NameLoc,
   return false;
 }
 
+static bool isSanitizer(lltok::Kind Kind) {
+  switch (Kind) {
+  case lltok::kw_no_sanitize_address:
+  case lltok::kw_no_sanitize_hwaddress:
+  case lltok::kw_no_sanitize_memtag:
+  case lltok::kw_sanitize_address_dyninit:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool LLParser::parseSanitizer(GlobalVariable *GV) {
+  using SanitizerMetadata = GlobalValue::SanitizerMetadata;
+  SanitizerMetadata Meta;
+  if (GV->hasSanitizerMetadata())
+    Meta = GV->getSanitizerMetadata();
+
+  switch (Lex.getKind()) {
+  case lltok::kw_no_sanitize_address:
+    Meta.NoAddress = true;
+    break;
+  case lltok::kw_no_sanitize_hwaddress:
+    Meta.NoHWAddress = true;
+    break;
+  case lltok::kw_no_sanitize_memtag:
+    Meta.NoMemtag = true;
+    break;
+  case lltok::kw_sanitize_address_dyninit:
+    Meta.IsDynInit = true;
+    break;
+  default:
+    return tokError("non-sanitizer token passed to LLParser::parseSanitizer()");
+  }
+  GV->setSanitizerMetadata(Meta);
+  Lex.Lex();
+  return false;
+}
+
 /// parseGlobal
 ///   ::= GlobalVar '=' OptionalLinkage OptionalPreemptionSpecifier
 ///       OptionalVisibility OptionalDLLStorageClass
@@ -1221,6 +1260,9 @@ bool LLParser::parseGlobal(const std::string &Name, LocTy NameLoc,
     } else if (Lex.getKind() == lltok::MetadataVar) {
       if (parseGlobalObjectMetadataAttachment(*GV))
         return true;
+    } else if (isSanitizer(Lex.getKind())) {
+      if (parseSanitizer(GV))
+        return true;
     } else {
       Comdat *C;
       if (parseOptionalComdat(Name, C))

diff  --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 9dd2b1286552d..cd253c13e7b1e 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -3441,6 +3441,19 @@ static void inferDSOLocal(GlobalValue *GV) {
     GV->setDSOLocal(true);
 }
 
+GlobalValue::SanitizerMetadata deserializeSanitizerMetadata(unsigned V) {
+  GlobalValue::SanitizerMetadata Meta;
+  if (V & (1 << 0))
+    Meta.NoAddress = true;
+  if (V & (1 << 1))
+    Meta.NoHWAddress = true;
+  if (V & (1 << 2))
+    Meta.NoMemtag = true;
+  if (V & (1 << 3))
+    Meta.IsDynInit = true;
+  return Meta;
+}
+
 Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
   // v1: [pointer type, isconst, initid, linkage, alignment, section,
   // visibility, threadlocal, unnamed_addr, externally_initialized,
@@ -3544,6 +3557,12 @@ Error BitcodeReader::parseGlobalVarRecord(ArrayRef<uint64_t> Record) {
   if (Record.size() > 15)
     NewGV->setPartition(StringRef(Strtab.data() + Record[14], Record[15]));
 
+  if (Record.size() > 16 && Record[16] != UINT_MAX) {
+    llvm::GlobalValue::SanitizerMetadata Meta =
+        deserializeSanitizerMetadata(Record[16]);
+    NewGV->setSanitizerMetadata(Meta);
+  }
+
   return Error::success();
 }
 

diff  --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 4e1729febe57d..691b332a8be9a 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1223,6 +1223,14 @@ static StringEncoding getStringEncoding(StringRef Str) {
   return SE_Fixed7;
 }
 
+static_assert(sizeof(GlobalValue::SanitizerMetadata) <= sizeof(unsigned),
+              "Sanitizer Metadata is too large for naive serialization.");
+static unsigned
+serializeSanitizerMetadata(const GlobalValue::SanitizerMetadata &Meta) {
+  return Meta.NoAddress | (Meta.NoHWAddress << 1) |
+         (Meta.NoMemtag << 2) | (Meta.IsDynInit << 3);
+}
+
 /// Emit top-level description of module, including target triple, inline asm,
 /// descriptors for global variables, and function prototype info.
 /// Returns the bit offset to backpatch with the location of the real VST.
@@ -1346,7 +1354,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
     // GLOBALVAR: [strtab offset, strtab size, type, isconst, initid,
     //             linkage, alignment, section, visibility, threadlocal,
     //             unnamed_addr, externally_initialized, dllstorageclass,
-    //             comdat, attributes, DSO_Local]
+    //             comdat, attributes, DSO_Local, GlobalSanitizer]
     Vals.push_back(addToStrtab(GV.getName()));
     Vals.push_back(GV.getName().size());
     Vals.push_back(VE.getTypeID(GV.getValueType()));
@@ -1363,7 +1371,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
         GV.isExternallyInitialized() ||
         GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass ||
         GV.hasComdat() || GV.hasAttributes() || GV.isDSOLocal() ||
-        GV.hasPartition()) {
+        GV.hasPartition() || GV.hasSanitizerMetadata()) {
       Vals.push_back(getEncodedVisibility(GV));
       Vals.push_back(getEncodedThreadLocalMode(GV));
       Vals.push_back(getEncodedUnnamedAddr(GV));
@@ -1377,6 +1385,11 @@ void ModuleBitcodeWriter::writeModuleInfo() {
       Vals.push_back(GV.isDSOLocal());
       Vals.push_back(addToStrtab(GV.getPartition()));
       Vals.push_back(GV.getPartition().size());
+
+      if (GV.hasSanitizerMetadata())
+        Vals.push_back(serializeSanitizerMetadata(GV.getSanitizerMetadata()));
+      else
+        Vals.push_back(UINT_MAX);
     } else {
       AbbrevToUse = SimpleGVarAbbrev;
     }

diff  --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 44096eba86a22..e680927d34066 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -3535,6 +3535,19 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) {
     Out << '"';
   }
 
+  using SanitizerMetadata = llvm::GlobalValue::SanitizerMetadata;
+  if (GV->hasSanitizerMetadata()) {
+    SanitizerMetadata MD = GV->getSanitizerMetadata();
+    if (MD.NoAddress)
+      Out << ", no_sanitize_address";
+    if (MD.NoHWAddress)
+      Out << ", no_sanitize_hwaddress";
+    if (MD.NoMemtag)
+      Out << ", no_sanitize_memtag";
+    if (MD.IsDynInit)
+      Out << ", sanitize_address_dyninit";
+  }
+
   maybePrintComdat(Out, *GV);
   if (MaybeAlign A = GV->getAlign())
     Out << ", align " << A->value();

diff  --git a/llvm/lib/IR/Globals.cpp b/llvm/lib/IR/Globals.cpp
index 47e8bc0a916db..4f53f8ecf9df6 100644
--- a/llvm/lib/IR/Globals.cpp
+++ b/llvm/lib/IR/Globals.cpp
@@ -67,6 +67,10 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
   setDLLStorageClass(Src->getDLLStorageClass());
   setDSOLocal(Src->isDSOLocal());
   setPartition(Src->getPartition());
+  if (Src->hasSanitizerMetadata())
+    setSanitizerMetadata(Src->getSanitizerMetadata());
+  else
+    removeSanitizerMetadata();
 }
 
 void GlobalValue::removeFromParent() {
@@ -217,6 +221,27 @@ void GlobalValue::setPartition(StringRef S) {
   HasPartition = !S.empty();
 }
 
+using SanitizerMetadata = GlobalValue::SanitizerMetadata;
+const SanitizerMetadata &GlobalValue::getSanitizerMetadata() const {
+  assert(hasSanitizerMetadata());
+  assert(getContext().pImpl->GlobalValueSanitizerMetadata.count(this));
+  return getContext().pImpl->GlobalValueSanitizerMetadata[this];
+}
+
+void GlobalValue::setSanitizerMetadata(const SanitizerMetadata &Meta) {
+  getContext().pImpl->GlobalValueSanitizerMetadata[this] = Meta;
+  HasSanitizerMetadata = true;
+}
+
+void GlobalValue::removeSanitizerMetadata() {
+  DenseMap<const GlobalValue *, SanitizerMetadata> &MetadataMap =
+      getContext().pImpl->GlobalValueSanitizerMetadata;
+  auto It = MetadataMap.find(this);
+  if (It != MetadataMap.end())
+    MetadataMap.erase(It);
+  HasSanitizerMetadata = false;
+}
+
 StringRef GlobalObject::getSectionImpl() const {
   assert(hasSection());
   return getContext().pImpl->GlobalObjectSections[this];

diff  --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h
index 8db171527b6a0..04b0b11b18f3e 100644
--- a/llvm/lib/IR/LLVMContextImpl.h
+++ b/llvm/lib/IR/LLVMContextImpl.h
@@ -1503,6 +1503,9 @@ class LLVMContextImpl {
   /// Collection of per-GlobalValue partitions used in this context.
   DenseMap<const GlobalValue *, StringRef> GlobalValuePartitions;
 
+  DenseMap<const GlobalValue *, GlobalValue::SanitizerMetadata>
+      GlobalValueSanitizerMetadata;
+
   /// DiscriminatorTable - This table maps file:line locations to an
   /// integer representing the next DWARF path discriminator to assign to
   /// instructions in 
diff erent blocks at the same location.

diff  --git a/llvm/test/Assembler/globalvariable-attributes.ll b/llvm/test/Assembler/globalvariable-attributes.ll
index 64227a451c256..bdb880343247d 100644
--- a/llvm/test/Assembler/globalvariable-attributes.ll
+++ b/llvm/test/Assembler/globalvariable-attributes.ll
@@ -4,6 +4,11 @@
 @g2 = global i32 2, align 4 "key3" = "value3"
 @g3 = global i32 2 #0
 @g4 = global i32 2, align 4 "key5" = "value5" #0
+ at g5 = global i32 2, no_sanitize_address, align 4
+ at g6 = global i32 2, no_sanitize_hwaddress, align 4
+ at g7 = global i32 2, no_sanitize_memtag, align 4
+ at g8 = global i32 2, sanitize_address_dyninit, align 4
+ at g9 = global i32 2, no_sanitize_address, no_sanitize_hwaddress, no_sanitize_memtag, align 4
 
 attributes #0 = { "string" = "value" nobuiltin norecurse }
 
@@ -11,6 +16,11 @@ attributes #0 = { "string" = "value" nobuiltin norecurse }
 ; CHECK: @g2 = global i32 2, align 4 #1
 ; CHECK: @g3 = global i32 2 #2
 ; CHECK: @g4 = global i32 2, align 4 #3
+; CHECK: @g5 = global i32 2, no_sanitize_address, align 4
+; CHECK: @g6 = global i32 2, no_sanitize_hwaddress, align 4
+; CHECK: @g7 = global i32 2, no_sanitize_memtag, align 4
+; CHECK: @g8 = global i32 2, sanitize_address_dyninit, align 4
+; CHECK: @g9 = global i32 2, no_sanitize_address, no_sanitize_hwaddress, no_sanitize_memtag, align 4
 
 ; CHECK: attributes #0 = { "key"="value" "key2"="value2" }
 ; CHECK: attributes #1 = { "key3"="value3" }

diff  --git a/llvm/test/Bitcode/compatibility.ll b/llvm/test/Bitcode/compatibility.ll
index a7217ae18872e..2cc23f7714cbd 100644
--- a/llvm/test/Bitcode/compatibility.ll
+++ b/llvm/test/Bitcode/compatibility.ll
@@ -203,6 +203,18 @@ declare void @g.f1()
 @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
 ; CHECK: @llvm.global_dtors = appending global [1 x %pri.func.data] [%pri.func.data { i32 0, void ()* @g.f1, i8* @g.used3 }], section "llvm.metadata"
 
+; Global Variables -- sanitizers
+ at g.no_sanitize_address = global i32 0, no_sanitize_address
+ at g.no_sanitize_hwaddress = global i32 0, no_sanitize_hwaddress
+ at g.no_sanitize_memtag = global i32 0, no_sanitize_memtag
+ at g.no_sanitize_multiple = global i32 0, no_sanitize_address, no_sanitize_hwaddress, no_sanitize_memtag
+ at g.sanitize_address_dyninit = global i32 0, sanitize_address_dyninit
+; CHECK: @g.no_sanitize_address = global i32 0, no_sanitize_address
+; CHECK: @g.no_sanitize_hwaddress = global i32 0, no_sanitize_hwaddress
+; CHECK: @g.no_sanitize_memtag = global i32 0, no_sanitize_memtag
+; CHECK: @g.no_sanitize_multiple = global i32 0, no_sanitize_address, no_sanitize_hwaddress, no_sanitize_memtag
+; CHECK: @g.sanitize_address_dyninit = global i32 0, sanitize_address_dyninit
+
 ;; Aliases
 ; Format: @<Name> = [Linkage] [Visibility] [DLLStorageClass] [ThreadLocal]
 ;                   [unnamed_addr] alias <AliaseeTy> @<Aliasee>


        


More information about the llvm-commits mailing list