[compiler-rt] ec277b6 - [MSAN] Separate id ptr from constant string for variable names used in track origins.

Kevin Athey via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 12 08:47:40 PDT 2022


Author: Kevin Athey
Date: 2022-08-12T08:47:36-07:00
New Revision: ec277b67eb36332c2f3c48e1967b2540af681716

URL: https://github.com/llvm/llvm-project/commit/ec277b67eb36332c2f3c48e1967b2540af681716
DIFF: https://github.com/llvm/llvm-project/commit/ec277b67eb36332c2f3c48e1967b2540af681716.diff

LOG: [MSAN] Separate id ptr from constant string for variable names used in track origins.

The goal is to reduce the size of the MSAN with track origins binary, by making
the variable name locations constant which will allow the linker to compress
them.

Follows: https://reviews.llvm.org/D131415

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D131631

Added: 
    

Modified: 
    compiler-rt/lib/msan/msan.cpp
    compiler-rt/lib/msan/msan_interface_internal.h
    llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
    llvm/test/Instrumentation/MemorySanitizer/alloca.ll

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/msan/msan.cpp b/compiler-rt/lib/msan/msan.cpp
index 3bc48c06646a4..6f92d8590c72a 100644
--- a/compiler-rt/lib/msan/msan.cpp
+++ b/compiler-rt/lib/msan/msan.cpp
@@ -303,19 +303,21 @@ u32 ChainOrigin(u32 id, StackTrace *stack) {
   return chained.raw_id();
 }
 
-// 'descr' is created at compile time and contains '----' in the beginning.
-// When we see descr for the first time we replace '----' with a uniq id
-// and set the origin to (id | (31-th bit)).
-static inline void SetAllocaOrigin(void *a, uptr size, char *descr, uptr pc) {
+// Current implementation separates the 'id_ptr' from the 'descr' and makes
+// 'descr' constant.
+// Previous implementation 'descr' is created at compile time and contains
+// '----' in the beginning.  When we see descr for the first time we replace
+// '----' with a uniq id and set the origin to (id | (31-th bit)).
+static inline void SetAllocaOrigin(void *a, uptr size, u32 *id_ptr, char *descr,
+                                   uptr pc) {
   static const u32 dash = '-';
   static const u32 first_timer =
       dash + (dash << 8) + (dash << 16) + (dash << 24);
-  u32 *id_ptr = (u32 *)descr;
   u32 id = *id_ptr;
-  if (id == first_timer) {
+  if (id == 0 || id == first_timer) {
     u32 idx = atomic_fetch_add(&NumStackOriginDescrs, 1, memory_order_relaxed);
     CHECK_LT(idx, kNumStackOriginDescrs);
-    StackOriginDescr[idx] = descr + 4;
+    StackOriginDescr[idx] = descr;
     StackOriginPC[idx] = pc;
     id = Origin::CreateStackOrigin(idx).raw_id();
     *id_ptr = id;
@@ -602,14 +604,21 @@ void __msan_set_origin(const void *a, uptr size, u32 origin) {
 }
 
 void __msan_set_alloca_origin(void *a, uptr size, char *descr) {
-  SetAllocaOrigin(a, size, descr, GET_CALLER_PC());
+  SetAllocaOrigin(a, size, reinterpret_cast<u32 *>(descr), descr + 4,
+                  GET_CALLER_PC());
 }
 
 void __msan_set_alloca_origin4(void *a, uptr size, char *descr, uptr pc) {
   // Intentionally ignore pc and use return address. This function is here for
   // compatibility, in case program is linked with library instrumented by
   // older clang.
-  SetAllocaOrigin(a, size, descr, GET_CALLER_PC());
+  SetAllocaOrigin(a, size, reinterpret_cast<u32 *>(descr), descr + 4,
+                  GET_CALLER_PC());
+}
+
+void __msan_set_alloca_origin_with_descr(void *a, uptr size, u32 *id_ptr,
+                                         char *descr) {
+  SetAllocaOrigin(a, size, id_ptr, descr, GET_CALLER_PC());
 }
 
 u32 __msan_chain_origin(u32 id) {

diff  --git a/compiler-rt/lib/msan/msan_interface_internal.h b/compiler-rt/lib/msan/msan_interface_internal.h
index c72c91c3c160c..009d5b408ac90 100644
--- a/compiler-rt/lib/msan/msan_interface_internal.h
+++ b/compiler-rt/lib/msan/msan_interface_internal.h
@@ -109,6 +109,9 @@ void __msan_set_alloca_origin(void *a, uptr size, char *descr);
 SANITIZER_INTERFACE_ATTRIBUTE
 void __msan_set_alloca_origin4(void *a, uptr size, char *descr, uptr pc);
 SANITIZER_INTERFACE_ATTRIBUTE
+void __msan_set_alloca_origin_with_descr(void *a, uptr size, u32 *id_ptr,
+                                         char *descr);
+SANITIZER_INTERFACE_ATTRIBUTE
 u32 __msan_chain_origin(u32 id);
 SANITIZER_INTERFACE_ATTRIBUTE
 u32 __msan_get_origin(const void *a);

diff  --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index a3c6059b1dd47..bb22608e0bd87 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -579,7 +579,7 @@ class MemorySanitizer {
 
   /// Run-time helper that generates a new origin value for a stack
   /// allocation.
-  FunctionCallee MsanSetAllocaOriginFn;
+  FunctionCallee MsanSetAllocaOriginWithDescriptionFn;
 
   /// Run-time helper that poisons stack on function entry.
   FunctionCallee MsanPoisonStackFn;
@@ -691,10 +691,10 @@ void MemorySanitizerPass::printPipeline(
 /// Creates a writable global for Str so that we can pass it to the
 /// run-time lib. Runtime uses first 4 bytes of the string to store the
 /// frame ID, so the string needs to be mutable.
-static GlobalVariable *createPrivateNonConstGlobalForString(Module &M,
-                                                            StringRef Str) {
+static GlobalVariable *createPrivateConstGlobalForString(Module &M,
+                                                         StringRef Str) {
   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
-  return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/false,
+  return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/true,
                             GlobalValue::PrivateLinkage, StrConst, "");
 }
 
@@ -825,9 +825,9 @@ void MemorySanitizer::createUserspaceApi(Module &M) {
         IRB.getInt32Ty());
   }
 
-  MsanSetAllocaOriginFn = M.getOrInsertFunction(
-    "__msan_set_alloca_origin", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy,
-    IRB.getInt8PtrTy());
+  MsanSetAllocaOriginWithDescriptionFn = M.getOrInsertFunction(
+      "__msan_set_alloca_origin_with_descr", IRB.getVoidTy(),
+      IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy(), IRB.getInt8PtrTy());
   MsanPoisonStackFn =
       M.getOrInsertFunction("__msan_poison_stack", IRB.getVoidTy(),
                             IRB.getInt8PtrTy(), IntptrTy);
@@ -3877,17 +3877,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                   "_msphi_o"));
   }
 
+  Value *getLocalVarIdptr(AllocaInst &I) {
+    ConstantInt *IntConst =
+        ConstantInt::get(Type::getInt32Ty((*F.getParent()).getContext()), 0);
+    return new GlobalVariable(*F.getParent(), IntConst->getType(),
+                              /*isConstant=*/false, GlobalValue::PrivateLinkage,
+                              IntConst);
+  }
+
   Value *getLocalVarDescription(AllocaInst &I) {
-    SmallString<2048> StackDescriptionStorage;
-    raw_svector_ostream StackDescription(StackDescriptionStorage);
-    // We create a string with a description of the stack allocation and
-    // pass it into __msan_set_alloca_origin.
-    // It will be printed by the run-time if stack-originated UMR is found.
-    // The first 4 bytes of the string are set to '----' and will be replaced
-    // by __msan_va_arg_overflow_size_tls at the first call.
-    StackDescription << "----" << I.getName();
-    return createPrivateNonConstGlobalForString(*F.getParent(),
-                                                StackDescription.str());
+    return createPrivateConstGlobalForString(*F.getParent(), I.getName());
   }
 
   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
@@ -3904,9 +3903,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
 
     if (PoisonStack && MS.TrackOrigins) {
+      Value *Idptr = getLocalVarIdptr(I);
       Value *Descr = getLocalVarDescription(I);
-      IRB.CreateCall(MS.MsanSetAllocaOriginFn,
+      IRB.CreateCall(MS.MsanSetAllocaOriginWithDescriptionFn,
                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
+                      IRB.CreatePointerCast(Idptr, IRB.getInt8PtrTy()),
                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
     }
   }

diff  --git a/llvm/test/Instrumentation/MemorySanitizer/alloca.ll b/llvm/test/Instrumentation/MemorySanitizer/alloca.ll
index 2fc50acbeae37..13fed182310ed 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/alloca.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/alloca.ll
@@ -14,7 +14,8 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; ORIGIN: [[D:@[0-9]+]] = private global [13 x i8] c"----unique_x\00"
+; ORIGIN: [[IDPTR:@[0-9]+]] = private global i32 0
+; ORIGIN: [[DESCR:@[0-9]+]] = private constant [9 x i8] c"unique_x\00"
 
 define void @static() sanitize_memory {
 entry:
@@ -25,7 +26,7 @@ entry:
 ; CHECK-LABEL: define void @static(
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false)
 ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4)
-; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4, i8* {{.*}} [[D]],
+; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4, i8* {{.*}} [[IDPTR]] {{.*}}, i8* {{.*}} [[DESCR]],
 ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4,
 ; CHECK: ret void
 
@@ -41,7 +42,7 @@ l:
 ; CHECK-LABEL: define void @dynamic(
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false)
 ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4)
-; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4,
+; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4,
 ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4,
 ; CHECK: ret void
 
@@ -54,7 +55,7 @@ entry:
 ; CHECK-LABEL: define void @array(
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 20, i1 false)
 ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 20)
-; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 20,
+; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 20,
 ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 20,
 ; CHECK: ret void
 
@@ -68,7 +69,7 @@ entry:
 ; CHECK: %[[A:.*]] = mul i64 4, %cnt
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 %[[A]], i1 false)
 ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 %[[A]])
-; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 %[[A]],
+; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 %[[A]],
 ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 %[[A]],
 ; CHECK: ret void
 
@@ -82,7 +83,7 @@ entry:
 ; CHECK-LABEL: define void @unpoison_local(
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 20, i1 false)
 ; CALL: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 0, i64 20, i1 false)
-; ORIGIN-NOT: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 20,
+; ORIGIN-NOT: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 20,
 ; KMSAN: call void @__msan_unpoison_alloca(i8* {{.*}}, i64 20)
 ; CHECK: ret void
 
@@ -111,13 +112,13 @@ another_bb:
 ; CHECK: call void @llvm.lifetime.start
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false)
 ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4)
-; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4,
+; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4,
 ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4,
 
 ; CHECK: call void @llvm.lifetime.start
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false)
 ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4)
-; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4,
+; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4,
 ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4,
 ; CHECK: ret void
 
@@ -138,7 +139,7 @@ entry:
 ; CHECK: %[[A:.*]] = mul i64 4, %cnt
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 %[[A]], i1 false)
 ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 %[[A]])
-; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 %[[A]],
+; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 %[[A]],
 ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 %[[A]],
 ; CHECK: call void @llvm.lifetime.end
 ; CHECK: ret void
@@ -178,36 +179,36 @@ another_bb:
 ; CHECK: %x = alloca i32
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false)
 ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4)
-; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4,
+; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4,
 ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4,
 ; CHECK: %y = alloca i32
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false)
 ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4)
-; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4,
+; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4,
 ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4,
 ; CHECK: %z = alloca i32
 ; INLINE: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false)
 ; CALL: call void @__msan_poison_stack(i8* {{.*}}, i64 4)
-; ORIGIN: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4,
+; ORIGIN: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4,
 ; KMSAN: call void @__msan_poison_alloca(i8* {{.*}}, i64 4,
 
 ; There're two lifetime intrinsics for %z, but we must instrument it only once.
 ; INLINE-NOT: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false)
 ; CALL-NOT: call void @__msan_poison_stack(i8* {{.*}}, i64 4)
-; ORIGIN-NOT: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4,
+; ORIGIN-NOT: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4,
 ; KMSAN-NOT: call void @__msan_poison_alloca(i8* {{.*}}, i64 4,
 ; CHECK-LABEL: another_bb:
 
 ; CHECK: call void @llvm.lifetime.start
 ; INLINE-NOT: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false)
 ; CALL-NOT: call void @__msan_poison_stack(i8* {{.*}}, i64 4)
-; ORIGIN-NOT: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4,
+; ORIGIN-NOT: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4,
 ; KMSAN-NOT: call void @__msan_poison_alloca(i8* {{.*}}, i64 4,
 ; CHECK: call void @llvm.lifetime.end
 ; CHECK: call void @llvm.lifetime.start
 ; INLINE-NOT: call void @llvm.memset.p0i8.i64(i8* align 4 {{.*}}, i8 -1, i64 4, i1 false)
 ; CALL-NOT: call void @__msan_poison_stack(i8* {{.*}}, i64 4)
-; ORIGIN-NOT: call void @__msan_set_alloca_origin(i8* {{.*}}, i64 4,
+; ORIGIN-NOT: call void @__msan_set_alloca_origin_with_descr(i8* {{.*}}, i64 4,
 ; KMSAN-NOT: call void @__msan_poison_alloca(i8* {{.*}}, i64 4,
 ; CHECK: call void @llvm.lifetime.end
 


        


More information about the llvm-commits mailing list