[compiler-rt] bf9814b - [SanitizerBinaryMetadata] Emit constants as ULEB128

Marco Elver via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 8 04:12:53 PST 2023


Author: Marco Elver
Date: 2023-02-08T13:12:34+01:00
New Revision: bf9814b70560637e322056135fb9a8e3c650e828

URL: https://github.com/llvm/llvm-project/commit/bf9814b70560637e322056135fb9a8e3c650e828
DIFF: https://github.com/llvm/llvm-project/commit/bf9814b70560637e322056135fb9a8e3c650e828.diff

LOG: [SanitizerBinaryMetadata] Emit constants as ULEB128

Emit all constant integers produced by SanitizerBinaryMetadata as
ULEB128 to further reduce binary space used. Increasing the version is
not necessary given this change depends on (and will land) along with
the bump to v2.

To support this, the !pcsections metadata format is extended to allow
for per-section options, encoded in the first MD operator which must
always be a string and contain the section: "<section>!<options>".

Reviewed By: dvyukov

Differential Revision: https://reviews.llvm.org/D143484

Added: 
    

Modified: 
    clang/test/CodeGen/sanitize-metadata.c
    compiler-rt/test/metadata/common.h
    llvm/docs/PCSectionsMetadata.rst
    llvm/include/llvm/CodeGen/AsmPrinter.h
    llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
    llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
    llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
    llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
    llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
    llvm/test/CodeGen/X86/pcsections.ll
    llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/sanitize-metadata.c b/clang/test/CodeGen/sanitize-metadata.c
index 881d148b1aff..0dbcd886f262 100644
--- a/clang/test/CodeGen/sanitize-metadata.c
+++ b/clang/test/CodeGen/sanitize-metadata.c
@@ -31,6 +31,6 @@ int atomics() {
 // CHECK-LABEL: __sanitizer_metadata_covered.module_dtor
 // CHECK: call void @__sanitizer_metadata_covered_del(i32 2, ptr @__start_sanmd_covered, ptr @__stop_sanmd_covered)
 
-// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered", ![[ATOMICS_COVERED_AUX:[0-9]+]]}
-// ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i8 1}
-// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics"}
+// ATOMICS: ![[ATOMICS_COVERED]] = !{!"sanmd_covered!C", ![[ATOMICS_COVERED_AUX:[0-9]+]]}
+// ATOMICS: ![[ATOMICS_COVERED_AUX]] = !{i64 1}
+// ATOMICS: ![[ATOMIC_OP]] = !{!"sanmd_atomics!C"}

diff  --git a/compiler-rt/test/metadata/common.h b/compiler-rt/test/metadata/common.h
index 9a7774f4b2f3..af0e00902628 100644
--- a/compiler-rt/test/metadata/common.h
+++ b/compiler-rt/test/metadata/common.h
@@ -25,6 +25,20 @@ template <typename T> T consume(const char *&pos, const char *end) {
   return v;
 }
 
+uint64_t consume_uleb128(const char *&pos, const char *end) {
+  uint64_t val = 0;
+  int shift = 0;
+  uint8_t cur;
+  do {
+    cur = *pos++;
+    val |= uint64_t{cur & 0x7fu} << shift;
+    shift += 7;
+  } while (cur & 0x80);
+  assert(shift < 64);
+  assert(pos <= end);
+  return val;
+}
+
 constexpr uint32_t kSanitizerBinaryMetadataUARHasSize = 1 << 2;
 
 uint32_t meta_version;
@@ -45,13 +59,13 @@ void __sanitizer_metadata_covered_add(uint32_t version, const char *start,
     const auto base = reinterpret_cast<uintptr_t>(pos);
     const intptr_t offset = offset_ptr_sized ? consume<intptr_t>(pos, end)
                                              : consume<int32_t>(pos, end);
-    [[maybe_unused]] const uint32_t size = consume<uint32_t>(pos, end);
-    const uint32_t features = consume<uint8_t>(pos, end);
-    uint32_t stack_args = 0;
+    [[maybe_unused]] const uint64_t size = consume_uleb128(pos, end);
+    const uint64_t features = consume_uleb128(pos, end);
+    uint64_t stack_args = 0;
     if (features & kSanitizerBinaryMetadataUARHasSize)
-      stack_args = consume<uint32_t>(pos, end);
+      stack_args = consume_uleb128(pos, end);
     if (const char *name = symbolize(base + offset))
-      printf("%s: features=%x stack_args=%u\n", name, features, stack_args);
+      printf("%s: features=%lx stack_args=%lu\n", name, features, stack_args);
   }
   meta_version = version;
   meta_start = start;

diff  --git a/llvm/docs/PCSectionsMetadata.rst b/llvm/docs/PCSectionsMetadata.rst
index 3be8af7b4576..4238ce687e3c 100644
--- a/llvm/docs/PCSectionsMetadata.rst
+++ b/llvm/docs/PCSectionsMetadata.rst
@@ -59,6 +59,18 @@ The size of each entry depends on the code model. With large and medium sized
 code models, the entry size matches pointer size. For any smaller code model
 the entry size is just 32 bits.
 
+Encoding Options
+----------------
+
+Optional encoding options can be passed in the first ``MDString`` operator:
+``<section>!<options>``. The following options are available:
+
+    * ``C`` -- Compress constant integers of size 2-8 bytes as ULEB128; this
+      includes the function size (but excludes the PC entry).
+
+For example, ``foo!C`` will emit into section ``foo`` with all constants
+encoded as ULEB128.
+
 Guarantees on Code Generation
 =============================
 

diff  --git a/llvm/include/llvm/CodeGen/AsmPrinter.h b/llvm/include/llvm/CodeGen/AsmPrinter.h
index 33fda248120b..3c20fc06abef 100644
--- a/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -643,6 +643,13 @@ class AsmPrinter : public MachineFunctionPass {
   /// Emit a long long directive and value.
   void emitInt64(uint64_t Value) const;
 
+  /// Emit the specified signed leb128 value.
+  void emitSLEB128(int64_t Value, const char *Desc = nullptr) const;
+
+  /// Emit the specified unsigned leb128 value.
+  void emitULEB128(uint64_t Value, const char *Desc = nullptr,
+                   unsigned PadTo = 0) const;
+
   /// Emit something like ".long Hi-Lo" where the size in bytes of the directive
   /// is specified by Size and Hi/Lo specify the labels.  This implicitly uses
   /// .set if it is available.
@@ -670,13 +677,6 @@ class AsmPrinter : public MachineFunctionPass {
   // Dwarf Emission Helper Routines
   //===------------------------------------------------------------------===//
 
-  /// Emit the specified signed leb128 value.
-  void emitSLEB128(int64_t Value, const char *Desc = nullptr) const;
-
-  /// Emit the specified unsigned leb128 value.
-  void emitULEB128(uint64_t Value, const char *Desc = nullptr,
-                   unsigned PadTo = 0) const;
-
   /// Emit a .byte 42 directive that corresponds to an encoding.  If verbose
   /// assembly output is enabled, we output comments describing the encoding.
   /// Desc is a string saying what the encoding is specifying (e.g. "LSDA").

diff  --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
index c41d5e3e58ef..6887dd46d624 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
@@ -30,11 +30,11 @@ inline constexpr int kSanitizerBinaryMetadataAtomicsBit = 0;
 inline constexpr int kSanitizerBinaryMetadataUARBit = 1;
 inline constexpr int kSanitizerBinaryMetadataUARHasSizeBit = 2;
 
-inline constexpr uint32_t kSanitizerBinaryMetadataAtomics =
+inline constexpr uint64_t kSanitizerBinaryMetadataAtomics =
     1 << kSanitizerBinaryMetadataAtomicsBit;
-inline constexpr uint32_t kSanitizerBinaryMetadataUAR =
+inline constexpr uint64_t kSanitizerBinaryMetadataUAR =
     1 << kSanitizerBinaryMetadataUARBit;
-inline constexpr uint32_t kSanitizerBinaryMetadataUARHasSize =
+inline constexpr uint64_t kSanitizerBinaryMetadataUARHasSize =
     1 << kSanitizerBinaryMetadataUARHasSizeBit;
 
 inline constexpr char kSanitizerBinaryMetadataCoveredSection[] =

diff  --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 5d58d46a1048..39e6ffed9aad 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1496,9 +1496,22 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
     // constants may appear, which will simply be emitted into the current
     // section (the user of MD_pcsections decides the format of encoded data).
     assert(isa<MDString>(MD.getOperand(0)) && "first operand not a string");
+    bool ConstULEB128 = false;
     for (const MDOperand &MDO : MD.operands()) {
       if (auto *S = dyn_cast<MDString>(MDO)) {
-        SwitchSection(S->getString());
+        // Found string, start of new section!
+        // Find options for this section "<section>!<opts>" - supported options:
+        //   C = Compress constant integers of size 2-8 bytes as ULEB128.
+        const StringRef SecWithOpt = S->getString();
+        const size_t OptStart = SecWithOpt.find('!'); // likely npos
+        const StringRef Sec = SecWithOpt.substr(0, OptStart);
+        const StringRef Opts = SecWithOpt.substr(OptStart); // likely empty
+        ConstULEB128 = Opts.find('C') != StringRef::npos;
+#ifndef NDEBUG
+        for (char O : Opts)
+          assert((O == '!' || O == 'C') && "Invalid !pcsections options");
+#endif
+        SwitchSection(Sec);
         const MCSymbol *Prev = Syms.front();
         for (const MCSymbol *Sym : Syms) {
           if (Sym == Prev || !Deltas) {
@@ -1510,17 +1523,30 @@ void AsmPrinter::emitPCSections(const MachineFunction &MF) {
             // `base + addr`.
             emitLabelDifference(Sym, Base, RelativeRelocSize);
           } else {
-            emitLabelDifference(Sym, Prev, 4);
+            // Emit delta between symbol and previous symbol.
+            if (ConstULEB128)
+              emitLabelDifferenceAsULEB128(Sym, Prev);
+            else
+              emitLabelDifference(Sym, Prev, 4);
           }
           Prev = Sym;
         }
       } else {
+        // Emit auxiliary data after PC.
         assert(isa<MDNode>(MDO) && "expecting either string or tuple");
         const auto *AuxMDs = cast<MDNode>(MDO);
         for (const MDOperand &AuxMDO : AuxMDs->operands()) {
           assert(isa<ConstantAsMetadata>(AuxMDO) && "expecting a constant");
-          const auto *C = cast<ConstantAsMetadata>(AuxMDO);
-          emitGlobalConstant(F.getParent()->getDataLayout(), C->getValue());
+          const Constant *C = cast<ConstantAsMetadata>(AuxMDO)->getValue();
+          const DataLayout &DL = F.getParent()->getDataLayout();
+          const uint64_t Size = DL.getTypeStoreSize(C->getType());
+
+          if (auto *CI = dyn_cast<ConstantInt>(C);
+              CI && ConstULEB128 && Size > 1 && Size <= 8) {
+            emitULEB128(CI->getZExtValue());
+          } else {
+            emitGlobalConstant(DL, C);
+          }
         }
       }
     }
@@ -2788,6 +2814,22 @@ void AsmPrinter::emitInt16(int Value) const { OutStreamer->emitInt16(Value); }
 /// Emit a long directive and value.
 void AsmPrinter::emitInt32(int Value) const { OutStreamer->emitInt32(Value); }
 
+/// EmitSLEB128 - emit the specified signed leb128 value.
+void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
+  if (isVerbose() && Desc)
+    OutStreamer->AddComment(Desc);
+
+  OutStreamer->emitSLEB128IntValue(Value);
+}
+
+void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
+                             unsigned PadTo) const {
+  if (isVerbose() && Desc)
+    OutStreamer->AddComment(Desc);
+
+  OutStreamer->emitULEB128IntValue(Value, PadTo);
+}
+
 /// Emit a long long directive and value.
 void AsmPrinter::emitInt64(uint64_t Value) const {
   OutStreamer->emitInt64(Value);
@@ -2801,6 +2843,12 @@ void AsmPrinter::emitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
   OutStreamer->emitAbsoluteSymbolDiff(Hi, Lo, Size);
 }
 
+/// Emit something like ".uleb128 Hi-Lo".
+void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
+                                              const MCSymbol *Lo) const {
+  OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
+}
+
 /// EmitLabelPlusOffset - Emit something like ".long Label+Offset"
 /// where the size in bytes of the directive is specified by Size and Label
 /// specifies the label.  This implicitly uses .set if it is available.

diff  --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ecaa64afab4d..e330c57d341a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -32,28 +32,6 @@ using namespace llvm;
 // Dwarf Emission Helper Routines
 //===----------------------------------------------------------------------===//
 
-/// EmitSLEB128 - emit the specified signed leb128 value.
-void AsmPrinter::emitSLEB128(int64_t Value, const char *Desc) const {
-  if (isVerbose() && Desc)
-    OutStreamer->AddComment(Desc);
-
-  OutStreamer->emitSLEB128IntValue(Value);
-}
-
-void AsmPrinter::emitULEB128(uint64_t Value, const char *Desc,
-                             unsigned PadTo) const {
-  if (isVerbose() && Desc)
-    OutStreamer->AddComment(Desc);
-
-  OutStreamer->emitULEB128IntValue(Value, PadTo);
-}
-
-/// Emit something like ".uleb128 Hi-Lo".
-void AsmPrinter::emitLabelDifferenceAsULEB128(const MCSymbol *Hi,
-                                              const MCSymbol *Lo) const {
-  OutStreamer->emitAbsoluteSymbolDiffAsULEB128(Hi, Lo);
-}
-
 static const char *DecodeDWARFEncoding(unsigned Encoding) {
   switch (Encoding) {
   case dwarf::DW_EH_PE_absptr:

diff  --git a/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp b/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
index 2107e00ee2e5..cc29bdce1210 100644
--- a/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
+++ b/llvm/lib/CodeGen/SanitizerBinaryMetadata.cpp
@@ -52,7 +52,7 @@ bool MachineSanitizerBinaryMetadata::runOnMachineFunction(MachineFunction &MF) {
   if (!MD)
     return false;
   const auto &Section = *cast<MDString>(MD->getOperand(0));
-  if (!Section.getString().equals(kSanitizerBinaryMetadataCoveredSection))
+  if (!Section.getString().startswith(kSanitizerBinaryMetadataCoveredSection))
     return false;
   auto &AuxMDs = *cast<MDTuple>(MD->getOperand(1));
   // Assume it currently only has features.

diff  --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
index 0af5bce9574b..64ae7272cc2d 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
@@ -35,15 +35,16 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/StringSaver.h"
 #include "llvm/TargetParser/Triple.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
 #include <array>
 #include <cstdint>
-#include <limits>
 
 using namespace llvm;
 
@@ -148,7 +149,7 @@ class SanitizerBinaryMetadata {
   // to determine if a memory operation is atomic or not in modules compiled
   // with SanitizerBinaryMetadata.
   bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
-             uint32_t &FeatureMask);
+             uint64_t &FeatureMask);
 
   // Get start/end section marker pointer.
   GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
@@ -169,6 +170,8 @@ class SanitizerBinaryMetadata {
   const SanitizerBinaryMetadataOptions Options;
   const Triple TargetTriple;
   IRBuilder<> IRB;
+  BumpPtrAllocator Alloc;
+  UniqueStringSaver StringPool{Alloc};
 };
 
 bool SanitizerBinaryMetadata::run() {
@@ -245,7 +248,7 @@ void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
 
   // The metadata features enabled for this function, stored along covered
   // metadata (if enabled).
-  uint32_t FeatureMask = 0;
+  uint64_t FeatureMask = 0;
   // Don't emit unnecessary covered metadata for all functions to save space.
   bool RequiresCovered = false;
 
@@ -270,10 +273,8 @@ void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
     const auto *MI = &MetadataInfo::Covered;
     MIS.insert(MI);
     const StringRef Section = getSectionName(MI->SectionSuffix);
-    // The feature mask will be placed after the size of the function.
-    assert(FeatureMask <= std::numeric_limits<uint8_t>::max() &&
-           "Increase feature mask bytes and bump version");
-    Constant *CFM = IRB.getInt8(FeatureMask);
+    // The feature mask will be placed after the function size.
+    Constant *CFM = IRB.getInt64(FeatureMask);
     F.setMetadata(LLVMContext::MD_pcsections,
                   MDB.createPCSections({{Section, {CFM}}}));
   }
@@ -380,7 +381,7 @@ bool maybeSharedMutable(const Value *Addr) {
 }
 
 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
-                                    MDBuilder &MDB, uint32_t &FeatureMask) {
+                                    MDBuilder &MDB, uint64_t &FeatureMask) {
   SmallVector<const MetadataInfo *, 1> InstMetadata;
   bool RequiresCovered = false;
 
@@ -435,8 +436,9 @@ SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
 }
 
 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
-  // FIXME: Other TargetTriple (req. string pool)
-  return SectionSuffix;
+  // FIXME: Other TargetTriples.
+  // Request ULEB128 encoding for all integer constants.
+  return StringPool.save(SectionSuffix + "!C");
 }
 
 Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {

diff  --git a/llvm/test/CodeGen/X86/pcsections.ll b/llvm/test/CodeGen/X86/pcsections.ll
index 29be7f979f29..8457edfd4879 100644
--- a/llvm/test/CodeGen/X86/pcsections.ll
+++ b/llvm/test/CodeGen/X86/pcsections.ll
@@ -137,9 +137,32 @@ entry:
   ret void
 }
 
+define void @multiple_uleb128() !pcsections !6 {
+; CHECK-LABEL: multiple_uleb128:
+; CHECK:       .section	section_aux,"awo", at progbits,.text
+; CHECK-NEXT:  .Lpcsection_base8:
+; DEFCM-NEXT:  .long	.Lfunc_begin3-.Lpcsection_base8
+; LARGE-NEXT:  .quad	.Lfunc_begin3-.Lpcsection_base8
+; CHECK-NEXT:  .uleb128	.Lfunc_end6-.Lfunc_begin3
+; CHECK-NEXT:  .byte	42
+; CHECK-NEXT:  .ascii	"\345\216&"
+; CHECK-NEXT:  .byte	255
+; CHECK-NEXT:  .section	section_aux_21264,"awo", at progbits,.text
+; CHECK-NEXT:  .Lpcsection_base9:
+; DEFCM-NEXT:  .long	.Lfunc_begin3-.Lpcsection_base9
+; LARGE-NEXT:  .quad	.Lfunc_begin3-.Lpcsection_base9
+; CHECK-NEXT:  .long	.Lfunc_end6-.Lfunc_begin3
+; CHECK-NEXT:  .long	21264
+; CHECK-NEXT:  .text
+entry:
+  ret void
+}
+
 !0 = !{!"section_no_aux"}
 !1 = !{!"section_aux", !3}
 !2 = !{!"section_aux_42", !4, !"section_aux_21264", !5}
 !3 = !{i32 10, i32 20, i32 30}
 !4 = !{i32 42}
 !5 = !{i32 21264}
+!6 = !{!"section_aux!C", !7, !"section_aux_21264", !5}
+!7 = !{i64 42, i32 624485, i8 255}

diff  --git a/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll b/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll
index cef890f954a4..7d4086c49503 100644
--- a/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll
+++ b/llvm/test/Instrumentation/SanitizerBinaryMetadata/atomics.ll
@@ -2071,6 +2071,6 @@ entry:
 ; CHECK-DAG: ret:
 ; CHECK-NEXT:  ret void
 
-; CHECK: !0 = !{!"sanmd_covered", !1}
-; CHECK: !1 = !{i8 1}
-; CHECK: !2 = !{!"sanmd_atomics"}
+; CHECK: !0 = !{!"sanmd_covered!C", !1}
+; CHECK: !1 = !{i64 1}
+; CHECK: !2 = !{!"sanmd_atomics!C"}


        


More information about the llvm-commits mailing list