[llvm] d1c7f51 - MemorySanitizer: If a field is marked noundef, check init at call site

Gui Andrade via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 13 16:32:40 PDT 2020


Author: Gui Andrade
Date: 2020-07-13T23:32:26Z
New Revision: d1c7f51a9e8d6ea623349337d28da1df8b5194e2

URL: https://github.com/llvm/llvm-project/commit/d1c7f51a9e8d6ea623349337d28da1df8b5194e2
DIFF: https://github.com/llvm/llvm-project/commit/d1c7f51a9e8d6ea623349337d28da1df8b5194e2.diff

LOG: MemorySanitizer: If a field is marked noundef, check init at call site

Adds LLVM option to control eager checking under -msan-eager-checks.
This change depends on the noundef keyword to determining cases where it
it sound to check these shadows, and falls back to passing shadows
values by TLS.

Checking at call boundaries enforces undefined behavior rules with
passing uninitialized arguments by value.

Differential Revision: https://reviews.llvm.org/D81699

Added: 
    llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll

Modified: 
    llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index f825cf99205b..07caac4bc874 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -284,6 +284,11 @@ static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
        cl::desc("report accesses through a pointer which has poisoned shadow"),
        cl::Hidden, cl::init(true));
 
+static cl::opt<bool> ClEagerChecks(
+    "msan-eager-checks",
+    cl::desc("check arguments and return values at function call boundaries"),
+    cl::Hidden, cl::init(false));
+
 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
        cl::desc("print out instructions with default strict semantics"),
        cl::Hidden, cl::init(false));
@@ -1052,7 +1057,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   bool PropagateShadow;
   bool PoisonStack;
   bool PoisonUndef;
-  bool CheckReturnValue;
 
   struct ShadowOriginAndInsertPoint {
     Value *Shadow;
@@ -1076,9 +1080,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     PropagateShadow = SanitizeFunction;
     PoisonStack = SanitizeFunction && ClPoisonStack;
     PoisonUndef = SanitizeFunction && ClPoisonUndef;
-    // FIXME: Consider using SpecialCaseList to specify a list of functions that
-    // must always return fully initialized values. For now, we hardcode "main".
-    CheckReturnValue = SanitizeFunction && (F.getName() == "main");
 
     MS.initializeCallbacks(*F.getParent());
     if (MS.CompileKernel)
@@ -1618,14 +1619,23 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
           continue;
         }
+
+        bool FArgByVal = FArg.hasByValAttr();
+        bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef);
+        bool FArgEagerCheck = ClEagerChecks && !FArgByVal && FArgNoUndef;
         unsigned Size =
             FArg.hasByValAttr()
                 ? DL.getTypeAllocSize(FArg.getParamByValType())
                 : DL.getTypeAllocSize(FArg.getType());
+
         if (A == &FArg) {
           bool Overflow = ArgOffset + Size > kParamTLSSize;
-          Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
-          if (FArg.hasByValAttr()) {
+          if (FArgEagerCheck) {
+            *ShadowPtr = getCleanShadow(V);
+            setOrigin(A, getCleanOrigin());
+            continue;
+          } else if (FArgByVal) {
+            Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
             // ByVal pointer itself has clean shadow. We copy the actual
             // argument shadow to the underlying memory.
             // Figure out maximal valid memcpy alignment.
@@ -1650,6 +1660,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
             }
             *ShadowPtr = getCleanShadow(V);
           } else {
+            // Shadow over TLS
+            Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
             if (Overflow) {
               // ParamTLS overflow.
               *ShadowPtr = getCleanShadow(V);
@@ -1668,7 +1680,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
             setOrigin(A, getCleanOrigin());
           }
         }
-        ArgOffset += alignTo(Size, kShadowTLSAlignment);
+
+        if (!FArgEagerCheck)
+          ArgOffset += alignTo(Size, kShadowTLSAlignment);
       }
       assert(*ShadowPtr && "Could not find shadow for an argument");
       return *ShadowPtr;
@@ -3391,7 +3405,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                         << " Shadow: " << *ArgShadow << "\n");
       bool ArgIsInitialized = false;
       const DataLayout &DL = F.getParent()->getDataLayout();
-      if (CB.paramHasAttr(i, Attribute::ByVal)) {
+
+      bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
+      bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
+      bool EagerCheck = ClEagerChecks && !ByVal && NoUndef;
+
+      if (EagerCheck) {
+        insertShadowCheck(A, &CB);
+        continue;
+      }
+      if (ByVal) {
+        // ByVal requires some special handling as it's too big for a single
+        // load
         assert(A->getType()->isPointerTy() &&
                "ByVal argument is not a pointer!");
         Size = DL.getTypeAllocSize(CB.getParamByValType(i));
@@ -3409,6 +3434,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                  Alignment, Size);
         // TODO(glider): need to copy origins.
       } else {
+        // Any other parameters mean we need bit-grained tracking of uninit data
         Size = DL.getTypeAllocSize(A->getType());
         if (ArgOffset + Size > kParamTLSSize) break;
         Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
@@ -3437,6 +3463,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // Don't emit the epilogue for musttail call returns.
     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
       return;
+
+    if (ClEagerChecks && CB.hasRetAttr(Attribute::NoUndef)) {
+      setShadow(&CB, getCleanShadow(&CB));
+      setOrigin(&CB, getCleanOrigin());
+      return;
+    }
+
     IRBuilder<> IRBBefore(&CB);
     // Until we have full dynamic coverage, make sure the retval shadow is 0.
     Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
@@ -3489,14 +3522,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // Don't emit the epilogue for musttail call returns.
     if (isAMustTailRetVal(RetVal)) return;
     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
-    if (CheckReturnValue) {
+    bool HasNoUndef =
+        F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
+    bool StoreShadow = !(ClEagerChecks && HasNoUndef);
+    // FIXME: Consider using SpecialCaseList to specify a list of functions that
+    // must always return fully initialized values. For now, we hardcode "main".
+    bool EagerCheck = (ClEagerChecks && HasNoUndef) || (F.getName() == "main");
+
+    Value *Shadow = getShadow(RetVal);
+    bool StoreOrigin = true;
+    if (EagerCheck) {
       insertShadowCheck(RetVal, &I);
-      Value *Shadow = getCleanShadow(RetVal);
-      IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
-    } else {
-      Value *Shadow = getShadow(RetVal);
+      Shadow = getCleanShadow(RetVal);
+      StoreOrigin = false;
+    }
+
+    // The caller may still expect information passed over TLS if we pass our
+    // check
+    if (StoreShadow) {
       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
-      if (MS.TrackOrigins)
+      if (MS.TrackOrigins && StoreOrigin)
         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
     }
   }

diff  --git a/llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll
new file mode 100644
index 000000000000..1c203177796e
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -msan-eager-checks -S -passes='module(msan-module),function(msan)' 2>&1 | \
+; RUN:   FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,CHECK-ORIGINS %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define noundef i32 @NormalRet() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @NormalRet(
+; CHECK-NEXT:    ret i32 123
+;
+  ret i32 123
+}
+
+define i32 @PartialRet() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @PartialRet(
+; CHECK-NEXT:    store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8
+; CHECK-NEXT:    store i32 0, i32* @__msan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret i32 123
+;
+  ret i32 123
+}
+
+define noundef i32 @LoadedRet() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @LoadedRet(
+; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 0 to i32*
+; CHECK-NEXT:    [[O:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i32* [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32*
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32*
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSLD]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof !0
+; CHECK:       7:
+; CHECK-NEXT:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #1
+; CHECK-NEXT:    unreachable
+; CHECK:       8:
+; CHECK-NEXT:    ret i32 [[O]]
+;
+  %p = inttoptr i64 0 to i32 *
+  %o = load i32, i32 *%p
+  ret i32 %o
+}
+
+
+define void @NormalArg(i32 noundef %a) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @NormalArg(
+; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 0 to i32*
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i32* [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32*
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32*
+; CHECK-NEXT:    store i32 0, i32* [[TMP3]], align 4
+; CHECK-NEXT:    store i32 [[A:%.*]], i32* [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p = inttoptr i64 0 to i32 *
+  store i32 %a, i32 *%p
+  ret void
+}
+
+define void @PartialArg(i32 %a) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @PartialArg(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* bitcast ([100 x i64]* @__msan_param_tls to i32*), align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4
+; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 0 to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint i32* [[P]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32*
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP4]], 17592186044416
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to i32*
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof !0
+; CHECK:       8:
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP7]], align 4
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    store i32 [[A:%.*]], i32* [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p = inttoptr i64 0 to i32 *
+  store i32 %a, i32 *%p
+  ret void
+}
+
+define void @CallNormal() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @CallNormal(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @NormalRet() #0
+; CHECK-NEXT:    call void @NormalArg(i32 [[R]]) #0
+; CHECK-NEXT:    ret void
+;
+  %r = call i32 @NormalRet() nounwind uwtable sanitize_memory
+  call void @NormalArg(i32 %r) nounwind uwtable sanitize_memory
+  ret void
+}
+
+define void @CallWithLoaded() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @CallWithLoaded(
+; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 0 to i32*
+; CHECK-NEXT:    [[O:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i32* [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32*
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32*
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSLD]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof !0
+; CHECK:       7:
+; CHECK-NEXT:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #1
+; CHECK-NEXT:    unreachable
+; CHECK:       8:
+; CHECK-NEXT:    call void @NormalArg(i32 [[O]]) #0
+; CHECK-NEXT:    ret void
+;
+  %p = inttoptr i64 0 to i32 *
+  %o = load i32, i32 *%p
+  call void @NormalArg(i32 %o) nounwind uwtable sanitize_memory
+  ret void
+}
+
+define void @CallPartial() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @CallPartial(
+; CHECK-NEXT:    store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8
+; CHECK-NEXT:    [[R:%.*]] = call i32 @PartialRet() #0
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @__msan_retval_origin_tls, align 4
+; CHECK-NEXT:    store i32 [[_MSRET]], i32* bitcast ([100 x i64]* @__msan_param_tls to i32*), align 8
+; CHECK-NEXT:    store i32 [[TMP1]], i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4
+; CHECK-NEXT:    call void @PartialArg(i32 [[R]]) #0
+; CHECK-NEXT:    ret void
+;
+  %r = call i32 @PartialRet() nounwind uwtable sanitize_memory
+  call void @PartialArg(i32 %r) nounwind uwtable sanitize_memory
+  ret void
+}


        


More information about the llvm-commits mailing list