[compiler-rt] 960b4c3 - [SanitizerBinaryMetadata] Treat constant globals and non-escaping addresses specially

Marco Elver via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 3 06:35:44 PST 2023


Author: Marco Elver
Date: 2023-02-03T15:35:24+01:00
New Revision: 960b4c3b5d2bd4bffacfd9382d7c10563faefb89

URL: https://github.com/llvm/llvm-project/commit/960b4c3b5d2bd4bffacfd9382d7c10563faefb89
DIFF: https://github.com/llvm/llvm-project/commit/960b4c3b5d2bd4bffacfd9382d7c10563faefb89.diff

LOG: [SanitizerBinaryMetadata] Treat constant globals and non-escaping addresses specially

For atomics metadata, we can make data race analysis more efficient by
entirely ignoring functions that include memory accesses but which only
access non-escaping (non-shared) and/or non-mutable memory. Such
functions will not be considered to be covered by "atomics" metadata,
resulting in the following benefits:

  1. reduces "covered" metadata; and
  2. allows data race analysis to skip such functions.

Reviewed By: dvyukov

Differential Revision: https://reviews.llvm.org/D143159

Added: 
    llvm/test/Instrumentation/SanitizerBinaryMetadata/shared-mutable.ll

Modified: 
    compiler-rt/test/metadata/covered.cpp
    llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
    llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
    llvm/test/Instrumentation/SanitizerBinaryMetadata/pretend-atomic-access.ll

Removed: 
    


################################################################################
diff  --git a/compiler-rt/test/metadata/covered.cpp b/compiler-rt/test/metadata/covered.cpp
index 6fb4a91f10199..609a8c044c0d1 100644
--- a/compiler-rt/test/metadata/covered.cpp
+++ b/compiler-rt/test/metadata/covered.cpp
@@ -7,6 +7,8 @@
 // RUN: %clangxx %s -o %t -fexperimental-sanitize-metadata=atomics,uar && %t | FileCheck -check-prefix=CHECK-AU %s
 // RUN: %clangxx %s -o %t -fexperimental-sanitize-metadata=covered,atomics,uar && %t | FileCheck -check-prefix=CHECK-CAU %s
 
+const int const_global = 42;
+
 __attribute__((noinline, not_tail_called)) void escape(const volatile void *p) {
   static const volatile void *sink;
   sink = p;
@@ -19,10 +21,10 @@ __attribute__((noinline, not_tail_called)) void escape(const volatile void *p) {
 // CHECK-C:      empty: features=0
 // CHECK-A-NOT:  empty:
 // CHECK-U-NOT:  empty:
-// CHECK-CA:     empty: features=1
+// CHECK-CA:     empty: features=0
 // CHECK-CU:     empty: features=0
 // CHECK-AU-NOT: empty:
-// CHECK-CAU:    empty: features=1
+// CHECK-CAU:    empty: features=0
 void empty() {}
 
 // CHECK-C:  normal: features=0
@@ -37,6 +39,15 @@ void normal() {
   escape(&x);
 }
 
+// CHECK-C:      with_const_global: features=0
+// CHECK-A-NOT:  with_const_global:
+// CHECK-U-NOT:  with_const_global:
+// CHECK-CA:     with_const_global: features=0
+// CHECK-CU:     with_const_global: features=0
+// CHECK-AU-NOT: with_const_global:
+// CHECK-CAU:    with_const_global: features=0
+int with_const_global() { return *((const volatile int *)&const_global); }
+
 // CHECK-C:     with_atomic: features=0
 // CHECK-A:     with_atomic: features=1
 // CHECK-U-NOT: with_atomic:
@@ -86,6 +97,7 @@ int ellipsis_with_atomic(int *p, ...) {
 #define FUNCTIONS                                                              \
   FN(empty);                                                                   \
   FN(normal);                                                                  \
+  FN(with_const_global);                                                       \
   FN(with_atomic);                                                             \
   FN(with_atomic_escape);                                                      \
   FN(ellipsis);                                                                \

diff  --git a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
index 67e22d1aa6818..beb67d51d55eb 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h
@@ -29,7 +29,6 @@ struct SanitizerBinaryMetadataOptions {
 inline constexpr int kSanitizerBinaryMetadataAtomicsBit = 0;
 inline constexpr int kSanitizerBinaryMetadataUARBit = 1;
 
-inline constexpr uint32_t kSanitizerBinaryMetadataNone = 0;
 inline constexpr uint32_t kSanitizerBinaryMetadataAtomics =
     1 << kSanitizerBinaryMetadataAtomicsBit;
 inline constexpr uint32_t kSanitizerBinaryMetadataUAR =

diff  --git a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
index 26e4d53c34809..87d0de93d450c 100644
--- a/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
+++ b/llvm/lib/Transforms/Instrumentation/SanitizerBinaryMetadata.cpp
@@ -17,6 +17,8 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
@@ -60,7 +62,6 @@ class MetadataInfo {
 public:
   const StringRef FunctionPrefix;
   const StringRef SectionSuffix;
-  const uint32_t FeatureMask;
 
   static const MetadataInfo Covered;
   static const MetadataInfo Atomics;
@@ -68,16 +69,13 @@ class MetadataInfo {
 private:
   // Forbid construction elsewhere.
   explicit constexpr MetadataInfo(StringRef FunctionPrefix,
-                                  StringRef SectionSuffix, uint32_t Feature)
-      : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix),
-        FeatureMask(Feature) {}
+                                  StringRef SectionSuffix)
+      : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}
 };
-const MetadataInfo MetadataInfo::Covered{"__sanitizer_metadata_covered",
-                                         kSanitizerBinaryMetadataCoveredSection,
-                                         kSanitizerBinaryMetadataNone};
-const MetadataInfo MetadataInfo::Atomics{"__sanitizer_metadata_atomics",
-                                         kSanitizerBinaryMetadataAtomicsSection,
-                                         kSanitizerBinaryMetadataAtomics};
+const MetadataInfo MetadataInfo::Covered{
+    "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};
+const MetadataInfo MetadataInfo::Atomics{
+    "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};
 
 // The only instances of MetadataInfo are the constants above, so a set of
 // them may simply store pointers to them. To deterministically generate code,
@@ -131,14 +129,6 @@ class SanitizerBinaryMetadata {
   bool run();
 
 private:
-  // Return enabled feature mask of per-instruction metadata.
-  uint32_t getEnabledPerInstructionFeature() const {
-    uint32_t FeatureMask = 0;
-    if (Options.Atomics)
-      FeatureMask |= MetadataInfo::Atomics.FeatureMask;
-    return FeatureMask;
-  }
-
   uint32_t getVersion() const {
     uint32_t Version = kVersionBase;
     const auto CM = Mod.getCodeModel();
@@ -172,7 +162,7 @@ class SanitizerBinaryMetadata {
   Twine getSectionEnd(StringRef SectionSuffix);
 
   // Returns true if the access to the address should be considered "atomic".
-  bool pretendAtomicAccess(Value *Addr);
+  bool pretendAtomicAccess(const Value *Addr);
 
   Module &Mod;
   const SanitizerBinaryMetadataOptions Options;
@@ -251,13 +241,11 @@ void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
 
   // The metadata features enabled for this function, stored along covered
   // metadata (if enabled).
-  uint32_t FeatureMask = getEnabledPerInstructionFeature();
+  uint32_t FeatureMask = 0;
   // Don't emit unnecessary covered metadata for all functions to save space.
   bool RequiresCovered = false;
-  // We can only understand if we need to set UAR feature after looking
-  // at the instructions. So we need to check instructions even if FeatureMask
-  // is empty.
-  if (FeatureMask || Options.UAR) {
+
+  if (Options.Atomics || Options.UAR) {
     for (BasicBlock &BB : F)
       for (Instruction &I : BB)
         RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
@@ -342,14 +330,17 @@ bool useAfterReturnUnsafe(Instruction &I) {
   return false;
 }
 
-bool SanitizerBinaryMetadata::pretendAtomicAccess(Value *Addr) {
-  assert(Addr && "Expected non-null Addr");
+bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
+  if (!Addr)
+    return false;
 
   Addr = Addr->stripInBoundsOffsets();
   auto *GV = dyn_cast<GlobalVariable>(Addr);
   if (!GV)
     return false;
 
+  // Some compiler-generated accesses are known racy, to avoid false positives
+  // in data-race analysis pretend they're atomic.
   if (GV->hasSection()) {
     const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();
     const auto ProfSec =
@@ -357,7 +348,6 @@ bool SanitizerBinaryMetadata::pretendAtomicAccess(Value *Addr) {
     if (GV->getSection().endswith(ProfSec))
       return true;
   }
-
   if (GV->getName().startswith("__llvm_gcov") ||
       GV->getName().startswith("__llvm_gcda"))
     return true;
@@ -365,37 +355,55 @@ bool SanitizerBinaryMetadata::pretendAtomicAccess(Value *Addr) {
   return false;
 }
 
+// Returns true if the memory at `Addr` may be shared with other threads.
+bool maybeSharedMutable(const Value *Addr) {
+  // By default assume memory may be shared.
+  if (!Addr)
+    return true;
+
+  if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
+      !PointerMayBeCaptured(Addr, true, true))
+    return false; // Object is on stack but does not escape.
+
+  Addr = Addr->stripInBoundsOffsets();
+  if (auto *GV = dyn_cast<GlobalVariable>(Addr)) {
+    if (GV->isConstant())
+      return false; // Shared, but not mutable.
+  }
+
+  return true;
+}
+
 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
                                     MDBuilder &MDB, uint32_t &FeatureMask) {
   SmallVector<const MetadataInfo *, 1> InstMetadata;
   bool RequiresCovered = false;
 
+  // Only call if at least 1 type of metadata is requested.
+  assert(Options.UAR || Options.Atomics);
+
   if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
     if (useAfterReturnUnsafe(I))
       FeatureMask |= kSanitizerBinaryMetadataUAR;
   }
 
-  if (Options.Atomics && I.mayReadOrWriteMemory()) {
-    auto SSID = getAtomicSyncScopeID(&I);
-    bool IsAtomic = SSID.has_value() && *SSID != SyncScope::SingleThread;
-
-    if (!IsAtomic) {
-      // Check to pretend some compiler-generated accesses are atomic, to avoid
-      // false positives in data-race analysis.
-      Value *Addr = nullptr;
-      if (auto *SI = dyn_cast<StoreInst>(&I))
-        Addr = SI->getPointerOperand();
-      else if (auto *LI = dyn_cast<LoadInst>(&I))
-        Addr = LI->getPointerOperand();
-      if (Addr)
-        IsAtomic = pretendAtomicAccess(Addr);
-    }
-
-    if (IsAtomic) {
-      NumMetadataAtomics++;
-      InstMetadata.push_back(&MetadataInfo::Atomics);
+  if (Options.Atomics) {
+    const Value *Addr = nullptr;
+    if (auto *SI = dyn_cast<StoreInst>(&I))
+      Addr = SI->getPointerOperand();
+    else if (auto *LI = dyn_cast<LoadInst>(&I))
+      Addr = LI->getPointerOperand();
+
+    if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {
+      auto SSID = getAtomicSyncScopeID(&I);
+      if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||
+          pretendAtomicAccess(Addr)) {
+        NumMetadataAtomics++;
+        InstMetadata.push_back(&MetadataInfo::Atomics);
+      }
+      FeatureMask |= kSanitizerBinaryMetadataAtomics;
+      RequiresCovered = true;
     }
-    RequiresCovered = true;
   }
 
   // Attach MD_pcsections to instruction.

diff  --git a/llvm/test/Instrumentation/SanitizerBinaryMetadata/pretend-atomic-access.ll b/llvm/test/Instrumentation/SanitizerBinaryMetadata/pretend-atomic-access.ll
index 92e8c543ed4f6..e54468156f31f 100644
--- a/llvm/test/Instrumentation/SanitizerBinaryMetadata/pretend-atomic-access.ll
+++ b/llvm/test/Instrumentation/SanitizerBinaryMetadata/pretend-atomic-access.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
 ; RUN: opt < %s -passes='module(sanmd-module)' -sanitizer-metadata-atomics -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -13,8 +13,8 @@ target triple = "x86_64-unknown-linux-gnu"
 @__llvm_gcov_global_state_pred = internal global i32 0
 @__llvm_gcda_foo = internal global i32 0
 
-define i32 @test_gep() sanitize_thread {
-; CHECK-LABEL: @test_gep(
+define i32 @test_gep() {
+; CHECK-LABEL: define {{[^@]+}}@test_gep() !pcsections !0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[PGOCOUNT:%.*]] = load i64, ptr @__profc_test_gep, align 8, !pcsections !2
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[PGOCOUNT]], 1
@@ -43,8 +43,8 @@ entry:
   ret i32 1
 }
 
-define i32 @test_bitcast() sanitize_thread {
-; CHECK-LABEL: @test_bitcast(
+define i32 @test_bitcast() {
+; CHECK-LABEL: define {{[^@]+}}@test_bitcast() !pcsections !0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr @__profc_test_bitcast, align 8, !pcsections !2
 ; CHECK-NEXT:    [[DOTPROMOTED5:%.*]] = load i64, ptr @__profc_test_bitcast_foo, align 8, !pcsections !2
@@ -64,8 +64,8 @@ entry:
   ret i32 undef
 }
 
-define void @test_load() sanitize_thread {
-; CHECK-LABEL: @test_load(
+define void @test_load() {
+; CHECK-LABEL: define {{[^@]+}}@test_load() !pcsections !0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @__llvm_gcov_global_state_pred, align 4, !pcsections !2
 ; CHECK-NEXT:    store i32 1, ptr @__llvm_gcov_global_state_pred, align 4, !pcsections !2

diff  --git a/llvm/test/Instrumentation/SanitizerBinaryMetadata/shared-mutable.ll b/llvm/test/Instrumentation/SanitizerBinaryMetadata/shared-mutable.ll
new file mode 100644
index 0000000000000..b47b2e1055bfb
--- /dev/null
+++ b/llvm/test/Instrumentation/SanitizerBinaryMetadata/shared-mutable.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt < %s -passes='module(sanmd-module)' -sanitizer-metadata-atomics -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @escape(ptr)
+
+ at sink = global ptr null, align 4
+ at const_global = external constant i32
+ at non_const_global = global i32 0, align 4
+ at const_global_array = external constant [10 x i32]
+
+define i32 @notcaptured() {
+; CHECK-LABEL: define {{[^@]+}}@notcaptured() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 42, ptr [[PTR]], align 4
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret i32 [[TMP]]
+;
+entry:
+  %ptr = alloca i32, align 4
+  store i32 42, ptr %ptr, align 4
+  %tmp = load i32, ptr %ptr, align 4
+  ret i32 %tmp
+}
+
+define void @captured0() {
+; CHECK-LABEL: define {{[^@]+}}@captured0() !pcsections !0 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @escape(ptr [[PTR]])
+; CHECK-NEXT:    store i32 42, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr = alloca i32, align 4
+  ; escapes due to call
+  call void @escape(ptr %ptr)
+  store i32 42, ptr %ptr, align 4
+  ret void
+}
+
+define void @captured1() {
+; CHECK-LABEL: define {{[^@]+}}@captured1() !pcsections !0 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store ptr [[PTR]], ptr @sink, align 8
+; CHECK-NEXT:    store i32 42, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr = alloca i32, align 4
+  ; escapes due to store into global
+  store ptr %ptr, ptr @sink, align 8
+  store i32 42, ptr %ptr, align 4
+  ret void
+}
+
+define void @captured2() {
+; CHECK-LABEL: define {{[^@]+}}@captured2() !pcsections !0 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    store ptr [[PTR]], ptr [[TMP]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[TMP]], align 8
+; CHECK-NEXT:    store ptr [[TMP0]], ptr @sink, align 8
+; CHECK-NEXT:    store i32 42, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr = alloca i32, align 4
+  %tmp = alloca ptr, align 8
+  ; transitive escape
+  store ptr %ptr, ptr %tmp, align 8
+  %0 = load ptr, ptr %tmp, align 8
+  store ptr %0, ptr @sink, align 8
+  store i32 42, ptr %ptr, align 4
+  ret void
+}
+
+define i32 @read_from_const_global() {
+; CHECK-LABEL: define {{[^@]+}}@read_from_const_global() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @const_global, align 4
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %0 = load i32, ptr @const_global, align 4
+  ret i32 %0
+}
+
+define i32 @read_from_non_const_global() {
+; CHECK-LABEL: define {{[^@]+}}@read_from_non_const_global() !pcsections !0 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @non_const_global, align 4
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %0 = load i32, ptr @non_const_global, align 4
+  ret i32 %0
+}
+
+define i32 @read_from_const_global_array(i32 %idx) {
+; CHECK-LABEL: define {{[^@]+}}@read_from_const_global_array
+; CHECK-SAME: (i32 [[IDX:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IDX]] to i64
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], ptr @const_global_array, i64 0, i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    ret i32 [[TMP0]]
+;
+entry:
+  %idxprom = sext i32 %idx to i64
+  %arrayidx = getelementptr inbounds [10 x i32], ptr @const_global_array, i64 0, i64 %idxprom
+  %0 = load i32, ptr %arrayidx, align 4
+  ret i32 %0
+}
+
+define i32 @notcaptured_and_global_access() {
+; CHECK-LABEL: define {{[^@]+}}@notcaptured_and_global_access() !pcsections !0 {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr @non_const_global, align 4
+; CHECK-NEXT:    store i32 [[A]], ptr [[PTR]], align 4
+; CHECK-NEXT:    [[B:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret i32 [[B]]
+;
+entry:
+  %ptr = alloca i32, align 4
+  %a = load i32, ptr @non_const_global, align 4
+  store i32 %a, ptr %ptr, align 4
+  %b = load i32, ptr %ptr, align 4
+  ret i32 %b
+}
+
+define i32 @notcaptured_and_const_global_access() {
+; CHECK-LABEL: define {{[^@]+}}@notcaptured_and_const_global_access() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[A:%.*]] = load i32, ptr @const_global, align 4
+; CHECK-NEXT:    store i32 [[A]], ptr [[PTR]], align 4
+; CHECK-NEXT:    [[B:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    ret i32 [[B]]
+;
+entry:
+  %ptr = alloca i32, align 4
+  %a = load i32, ptr @const_global, align 4
+  store i32 %a, ptr %ptr, align 4
+  %b = load i32, ptr %ptr, align 4
+  ret i32 %b
+}


        


More information about the llvm-commits mailing list