[llvm] 3031a25 - [MSan] Fix determinism issue when using msan-track-origins.

Jordan Rupprecht via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 1 09:16:58 PDT 2022


Author: Jordan Rupprecht
Date: 2022-09-01T09:15:57-07:00
New Revision: 3031a250dea9a2e92de8ed0d10dd8d471c6a1cb7

URL: https://github.com/llvm/llvm-project/commit/3031a250dea9a2e92de8ed0d10dd8d471c6a1cb7
DIFF: https://github.com/llvm/llvm-project/commit/3031a250dea9a2e92de8ed0d10dd8d471c6a1cb7.diff

LOG: [MSan] Fix determinism issue when using msan-track-origins.

When instrumenting `alloca`s, we use a `SmallSet` (i.e. `SmallPtrSet`). When there are fewer elements than the `SmallSet` size, it behaves like a vector, offering stable iteration order. Once we have too many `alloca`s to instrument, the iteration order becomes unstable. This manifests as non-deterministic builds because of the global constant we create while instrumenting the alloca.

The test added is a simple IR file, but was discovered while building `libcxx/src/filesystem/operations.cpp` from libc++. A reduced C++ example from that:

```
// clang++ -fsanitize=memory -fsanitize-memory-track-origins \
//   -fno-discard-value-names -S -emit-llvm \
//   -c op.cpp -o op.ll
struct Foo {
  ~Foo();
};
bool func1(Foo);
void func2(Foo);
void func3(int) {
  int f_st, t_st;
  Foo f, t;
  func1(f) || func1(f) || func1(t) || func1(f) && func1(t);
  func2(f);
}
```

Reviewed By: kda

Differential Revision: https://reviews.llvm.org/D133034

Added: 
    llvm/test/Instrumentation/MemorySanitizer/stable_set_alloca_origin.ll

Modified: 
    llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index d4864cb30fc9..ce7a80ccbf11 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -146,7 +146,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -1078,7 +1078,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   };
   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
-  SmallSet<AllocaInst *, 16> AllocaSet;
+  SmallSetVector<AllocaInst *, 16> AllocaSet;
   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
   SmallVector<StoreInst *, 16> StoreList;
 
@@ -1353,7 +1353,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (InstrumentLifetimeStart) {
       for (auto Item : LifetimeStartList) {
         instrumentAlloca(*Item.second, Item.first);
-        AllocaSet.erase(Item.second);
+        AllocaSet.remove(Item.second);
       }
     }
     // Poison the allocas for which we didn't instrument the corresponding

diff  --git a/llvm/test/Instrumentation/MemorySanitizer/stable_set_alloca_origin.ll b/llvm/test/Instrumentation/MemorySanitizer/stable_set_alloca_origin.ll
new file mode 100644
index 000000000000..999085575f08
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/stable_set_alloca_origin.ll
@@ -0,0 +1,55 @@
+; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -S          \
+; RUN: -passes=msan 2>&1 | FileCheck %s
+; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=2 -S          \
+; RUN: -passes=msan 2>&1 | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @func() sanitize_memory {
+entry:
+  %0 = alloca i32, i32 0, align 4
+  %1 = alloca i32, i32 0, align 4
+  %2 = alloca i32, i32 0, align 4
+  %3 = alloca i32, i32 0, align 4
+  %4 = alloca i32, i32 0, align 4
+  %5 = alloca i32, i32 0, align 4
+  %6 = alloca i32, i32 0, align 4
+  %7 = alloca i32, i32 0, align 4
+  %8 = alloca i32, i32 0, align 4
+  %9 = alloca i32, i32 0, align 4
+  %10 = alloca i32, i32 0, align 4
+  %11 = alloca i32, i32 0, align 4
+  %12 = alloca i32, i32 0, align 4
+  %13 = alloca i32, i32 0, align 4
+  %14 = alloca i32, i32 0, align 4
+  %15 = alloca i32, i32 0, align 4
+  %16 = alloca i32, i32 0, align 4
+  %17 = alloca i32, i32 0, align 4
+  %18 = alloca i32, i32 0, align 4
+  %19 = alloca i32, i32 0, align 4
+  %20 = alloca i32, i32 0, align 4
+  ret void
+}
+
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @0, ptr @1)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @2, ptr @3)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @4, ptr @5)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @6, ptr @7)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @8, ptr @9)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @10, ptr @11)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @12, ptr @13)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @14, ptr @15)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @16, ptr @17)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @18, ptr @19)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @20, ptr @21)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @22, ptr @23)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @24, ptr @25)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @26, ptr @27)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @28, ptr @29)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @30, ptr @31)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @32, ptr @33)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @34, ptr @35)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @36, ptr @37)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @38, ptr @39)
+; CHECK: call void @__msan_set_alloca_origin_with_descr(ptr %{{[0-9]+}}, i64 0, ptr @40, ptr @41)


        


More information about the llvm-commits mailing list