[llvm] db7fe6c - [dfsan] Propagate origin tracking at store

Thu Mar 4 15:35:12 PST 2021

Author: Jianzhou Zhao
Date: 2021-03-04T23:34:44Z
New Revision: db7fe6cd4b336e84fd47106ac05b7265635fa333

URL: https://github.com/llvm/llvm-project/commit/db7fe6cd4b336e84fd47106ac05b7265635fa333
DIFF: https://github.com/llvm/llvm-project/commit/db7fe6cd4b336e84fd47106ac05b7265635fa333.diff

LOG: [dfsan] Propagate origin tracking at store

This is a part of https://reviews.llvm.org/D95835.

Reviewed By: morehouse, gbalats

Differential Revision: https://reviews.llvm.org/D97789

Added: 
    llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll

Modified: 
    llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
    llvm/test/Instrumentation/DataFlowSanitizer/basic.ll
    llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
index ab057bb9b85a..a537865dc45b 100644

--- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp
@@ -207,6 +207,14 @@ static cl::opt<bool> ClTrackSelectControlFlow(
              "to results."),
     cl::Hidden, cl::init(true));
 
+// TODO: This default value follows MSan. DFSan may use a 
diff erent value.
+static cl::opt<int> ClInstrumentWithCallThreshold(
+    "dfsan-instrument-with-call-threshold",
+    cl::desc("If the function being instrumented requires more than "
+             "this number of origin stores, use callbacks instead of "
+             "inline checks (-1 means never use callbacks)."),
+    cl::Hidden, cl::init(3500));
+
 // Controls how to track origins.
 // * 0: do not track origins.
 // * 1: track origins at memory store operations.
@@ -426,6 +434,7 @@ class DataFlowSanitizer {
   FunctionCallee DFSanMaybeStoreOriginFn;
   SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
   MDNode *ColdCallWeights;
+  MDNode *OriginStoreWeights;
   DFSanABIList ABIList;
   DenseMap<Value *, Function *> UnwrappedFnMap;
   AttrBuilder ReadOnlyNoneAttrs;
@@ -578,8 +587,9 @@ struct DFSanFunction {
   std::pair<Value *, Value *> loadShadowOrigin(Value *ShadowAddr, uint64_t Size,
                                                Align InstAlignment,
                                                Instruction *Pos);
-  void storePrimitiveShadow(Value *Addr, uint64_t Size, Align Alignment,
-                            Value *PrimitiveShadow, Instruction *Pos);
+  void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
+                                  Align InstAlignment, Value *PrimitiveShadow,
+                                  Value *Origin, Instruction *Pos);
   /// Applies PrimitiveShadow to all primitive subtypes of T, returning
   /// the expanded shadow value.
   ///
@@ -633,6 +643,33 @@ struct DFSanFunction {
   /// checks if it is possible to load labels and origins without using the
   /// callback function.
   bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
+
+  /// Returns a chain at the current stack with previous origin V.
+  Value *updateOrigin(Value *V, IRBuilder<> &IRB);
+
+  /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
+  /// Origin otherwise.
+  Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
+
+  /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
+  /// Size).
+  void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,
+                   uint64_t StoreOriginSize, Align Alignment);
+
+  /// Stores Origin in terms of its Shadow value.
+  /// * Do not write origins for zero shadows because we do not trace origins
+  ///   for untainted sinks.
+  /// * Use __dfsan_maybe_store_origin if there are too many origin store
+  ///   instrumentations.
+  void storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, Value *Shadow,
+                   Value *Origin, Value *StoreOriginAddr, Align InstAlignment);
+
+  /// Convert a scalar value to an i1 by comparing with 0.
+  Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");
+
+  bool shouldInstrumentWithCall();
+
+  int NumOriginStores = 0;
 };
 
 class DFSanVisitor : public InstVisitor<DFSanVisitor> {
@@ -837,6 +874,11 @@ static Value *expandFromPrimitiveShadowRecursive(
   llvm_unreachable("Unexpected shadow type");
 }
 
+bool DFSanFunction::shouldInstrumentWithCall() {
+  return ClInstrumentWithCallThreshold >= 0 &&
+         NumOriginStores >= ClInstrumentWithCallThreshold;
+}
+
 Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
                                                 Instruction *Pos) {
   Type *ShadowTy = DFS.getShadowTy(T);
@@ -1009,6 +1051,7 @@ bool DataFlowSanitizer::init(Module &M) {
                         /*isVarArg=*/false);
 
   ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
+  OriginStoreWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
   return true;
 }
 
@@ -2212,6 +2255,99 @@ void DFSanVisitor::visitLoadInst(LoadInst &LI) {
   }
 }
 
+Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
+  if (!DFS.shouldTrackOrigins())
+    return V;
+  return IRB.CreateCall(DFS.DFSanChainOriginFn, V);
+}
+
+Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {
+  const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
+  const DataLayout &DL = F->getParent()->getDataLayout();
+  unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
+  if (IntptrSize == OriginSize)
+    return Origin;
+  assert(IntptrSize == OriginSize * 2);
+  Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);
+  return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));
+}
+
+void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
+                                Value *StoreOriginAddr,
+                                uint64_t StoreOriginSize, Align Alignment) {
+  const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
+  const DataLayout &DL = F->getParent()->getDataLayout();
+  const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);
+  unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
+  assert(IntptrAlignment >= MinOriginAlignment);
+  assert(IntptrSize >= OriginSize);
+
+  unsigned Ofs = 0;
+  Align CurrentAlignment = Alignment;
+  if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {
+    Value *IntptrOrigin = originToIntptr(IRB, Origin);
+    Value *IntptrStoreOriginPtr = IRB.CreatePointerCast(
+        StoreOriginAddr, PointerType::get(DFS.IntptrTy, 0));
+    for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {
+      Value *Ptr =
+          I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)
+            : IntptrStoreOriginPtr;
+      IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
+      Ofs += IntptrSize / OriginSize;
+      CurrentAlignment = IntptrAlignment;
+    }
+  }
+
+  for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;
+       ++I) {
+    Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)
+                   : StoreOriginAddr;
+    IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
+    CurrentAlignment = MinOriginAlignment;
+  }
+}
+
+Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
+                                    const Twine &Name) {
+  Type *VTy = V->getType();
+  assert(VTy->isIntegerTy());
+  if (VTy->getIntegerBitWidth() == 1)
+    // Just converting a bool to a bool, so do nothing.
+    return V;
+  return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);
+}
+
+void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size,
+                                Value *Shadow, Value *Origin,
+                                Value *StoreOriginAddr, Align InstAlignment) {
+  // Do not write origins for zero shadows because we do not trace origins for
+  // untainted sinks.
+  const Align OriginAlignment = getOriginAlign(InstAlignment);
+  Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);
+  IRBuilder<> IRB(Pos);
+  if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {
+    if (!ConstantShadow->isZeroValue())
+      paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,
+                  OriginAlignment);
+    return;
+  }
+
+  if (shouldInstrumentWithCall()) {
+    IRB.CreateCall(DFS.DFSanMaybeStoreOriginFn,
+                   {CollapsedShadow,
+                    IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
+                    ConstantInt::get(DFS.IntptrTy, Size), Origin});
+  } else {
+    Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
+    Instruction *CheckTerm = SplitBlockAndInsertIfThen(
+        Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DT);
+    IRBuilder<> IRBNew(CheckTerm);
+    paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
+                OriginAlignment);
+    ++NumOriginStores;
+  }
+}
+
 void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
                                              Align ShadowAlign,
                                              Instruction *Pos) {
@@ -2226,30 +2362,46 @@ void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
   // Do not write origins for 0 shadows because we do not trace origins for
   // untainted sinks.
 }
-void DFSanFunction::storePrimitiveShadow(Value *Addr, uint64_t Size,
-                                         Align Alignment,
-                                         Value *PrimitiveShadow,
-                                         Instruction *Pos) {
+
+void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
+                                               Align InstAlignment,
+                                               Value *PrimitiveShadow,
+                                               Value *Origin,
+                                               Instruction *Pos) {
+  const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;
+
   if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
-    const auto I = AllocaShadowMap.find(AI);
-    if (I != AllocaShadowMap.end()) {
+    const auto SI = AllocaShadowMap.find(AI);
+    if (SI != AllocaShadowMap.end()) {
       IRBuilder<> IRB(Pos);
-      IRB.CreateStore(PrimitiveShadow, I->second);
+      IRB.CreateStore(PrimitiveShadow, SI->second);
+
+      // Do not write origins for 0 shadows because we do not trace origins for
+      // untainted sinks.
+      if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {
+        const auto OI = AllocaOriginMap.find(AI);
+        assert(OI != AllocaOriginMap.end() && Origin);
+        IRB.CreateStore(Origin, OI->second);
+      }
       return;
     }
   }
 
-  const Align ShadowAlign(Alignment.value() * DFS.ShadowWidthBytes);
+  const Align ShadowAlign = getShadowAlign(InstAlignment);
   if (DFS.isZeroShadow(PrimitiveShadow)) {
     storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);
     return;
   }
 
   IRBuilder<> IRB(Pos);
-  Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
+  Value *ShadowAddr, *OriginAddr;
+  std::tie(ShadowAddr, OriginAddr) =
+      DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
+
   const unsigned ShadowVecSize = 128 / DFS.ShadowWidthBits;
   uint64_t Offset = 0;
-  if (Size >= ShadowVecSize) {
+  uint64_t LeftSize = Size;
+  if (LeftSize >= ShadowVecSize) {
     auto *ShadowVecTy =
         FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
     Value *ShadowVec = UndefValue::get(ShadowVecTy);
@@ -2264,18 +2416,23 @@ void DFSanFunction::storePrimitiveShadow(Value *Addr, uint64_t Size,
       Value *CurShadowVecAddr =
           IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
       IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
-      Size -= ShadowVecSize;
+      LeftSize -= ShadowVecSize;
       ++Offset;
-    } while (Size >= ShadowVecSize);
+    } while (LeftSize >= ShadowVecSize);
     Offset *= ShadowVecSize;
   }
-  while (Size > 0) {
+  while (LeftSize > 0) {
     Value *CurShadowAddr =
         IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
     IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
-    --Size;
+    --LeftSize;
     ++Offset;
   }
+
+  if (ShouldTrackOrigins) {
+    storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,
+                InstAlignment);
+  }
 }
 
 static AtomicOrdering addReleaseOrdering(AtomicOrdering AO) {
@@ -2310,19 +2467,36 @@ void DFSanVisitor::visitStoreInst(StoreInst &SI) {
   if (SI.isAtomic())
     SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
 
-  const Align Alignment = ClPreserveAlignment ? SI.getAlign() : Align(1);
+  const bool ShouldTrackOrigins =
+      DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();
+  std::vector<Value *> Shadows;
+  std::vector<Value *> Origins;
 
   Value *Shadow =
       SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);
+
+  if (ShouldTrackOrigins) {
+    Shadows.push_back(Shadow);
+    Origins.push_back(DFSF.getOrigin(Val));
+  }
+
   Value *PrimitiveShadow;
   if (ClCombinePointerLabelsOnStore) {
     Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
+    if (ShouldTrackOrigins) {
+      Shadows.push_back(PtrShadow);
+      Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));
+    }
     PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
   } else {
     PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI);
   }
-  DFSF.storePrimitiveShadow(SI.getPointerOperand(), Size, Alignment,
-                            PrimitiveShadow, &SI);
+  Value *Origin = nullptr;
+  if (ShouldTrackOrigins) {
+    Origin = DFSF.combineOrigins(Shadows, Origins, &SI);
+  }
+  DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),
+                                  PrimitiveShadow, Origin, &SI);
   if (ClEventCallbacks) {
     IRBuilder<> IRB(&SI);
     Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr);

diff  --git a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll
index f7c49d06dee9..98fb755992bf 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/basic.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -dfsan -S | FileCheck %s --check-prefixes=CHECK,CHECK_NO_ORIGIN
-; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefixes=CHECK,CHECK_ORIGIN
+; RUN: opt < %s -dfsan -S | FileCheck %s --check-prefixes=CHECK,CHECK_NO_ORIGIN -DSHADOW_MASK=-123145302310913
+; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefixes=CHECK,CHECK_ORIGIN -DSHADOW_MASK=-123145302310913
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -12,6 +12,23 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: @__dfsan_shadow_width_bits = weak_odr constant i32 [[#SBITS:]]
 ; CHECK: @__dfsan_shadow_width_bytes = weak_odr constant i32 [[#SBYTES:]]
 ; CHECK: @__dfsan_shadow_ptr_mask = external global i64
+
+define i8 @load(i8* %p) {
+  ; CHECK-LABEL: define i8 @"dfs$load"
+  ; CHECK: and i64 {{.*}}, [[SHADOW_MASK]]
+  ; CHECK: ret i8 %a
+  %a = load i8, i8* %p
+  ret i8 %a
+}
+
+define void @store(i8* %p) {
+  ; CHECK-LABEL: define void @"dfs$store"
+  ; CHECK: and i64 {{.*}}, [[SHADOW_MASK]]
+  ; CHECK: ret void
+  store i8 0, i8* %p
+  ret void
+}
+
 ; CHECK: declare void @__dfsan_load_callback(i[[#SBITS]], i8*)
 ; CHECK: declare void @__dfsan_store_callback(i[[#SBITS]], i8*)
 ; CHECK: declare void @__dfsan_mem_transfer_callback(i[[#SBITS]]*, i64)
@@ -39,7 +56,3 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK: declare zeroext i32 @__dfsan_chain_origin(i32 zeroext)
 ; CHECK: declare void @__dfsan_mem_origin_transfer(i8*, i8*, i64)
 ; CHECK: declare void @__dfsan_maybe_store_origin(i[[#SBITS]] zeroext, i8*, i64, i32 zeroext)
-
-define void @foo() {
-  ret void
-}

diff  --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll
index e80bff25d592..7834bb008bc8 100644
--- a/llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_ldst.ll
@@ -1,5 +1,6 @@
 ; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -S | FileCheck %s --check-prefixes=CHECK_META,CHECK
 ; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -dfsan-combine-pointer-labels-on-load=false -S | FileCheck %s --check-prefixes=CHECK_META,NO_COMBINE_LOAD_PTR
+; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -dfsan-combine-pointer-labels-on-store=true -S | FileCheck %s --check-prefixes=CHECK_META,COMBINE_STORE_PTR
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -7,7 +8,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK_META: @__dfsan_shadow_width_bytes = weak_odr constant i32 [[#SBYTES:]]
 
 define {} @load0({}* %p) {
-  ; CHECK: @"dfs$load0"
+  ; CHECK-LABEL: @"dfs$load0"
   ; CHECK-NEXT: %a = load {}, {}* %p, align 1
   ; CHECK-NEXT: store {} zeroinitializer, {}* bitcast ([100 x i64]* @__dfsan_retval_tls to {}*), align [[#SBYTES]]
   ; CHECK-NEXT: store i32 0, i32* @__dfsan_retval_origin_tls, align 4
@@ -18,14 +19,14 @@ define {} @load0({}* %p) {
 }
 
 define i16 @load_non_escaped_alloca() {
-  ; CHECK: @"dfs$load_non_escaped_alloca"
-  ; CHECK: [[S_ALLOCA:%.*]] = alloca i[[#SBITS]], align [[#SBYTES]]
-  ; CHECK: [[O_ALLOCA:%.*]] = alloca i32, align 4
+  ; CHECK-LABEL: @"dfs$load_non_escaped_alloca"
+  ; CHECK-NEXT: [[S_ALLOCA:%.*]] = alloca i[[#SBITS]], align [[#SBYTES]]
+  ; CHECK-NEXT: [[O_ALLOCA:%.*]] = alloca i32, align 4
   ; CHECK: [[SHADOW:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[S_ALLOCA]], align [[#SBYTES]]
-  ; CHECK: [[ORIGIN:%.*]] = load i32, i32* [[O_ALLOCA]], align 4
-  ; CHECK: %a = load i16, i16* %p, align 2
-  ; CHECK: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
-  ; CHECK: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4
+  ; CHECK-NEXT: [[ORIGIN:%.*]] = load i32, i32* [[O_ALLOCA]], align 4
+  ; CHECK-NEXT: %a = load i16, i16* %p, align 2
+  ; CHECK-NEXT: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK-NEXT: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4
   
   %p = alloca i16
   %a = load i16, i16* %p
@@ -33,22 +34,22 @@ define i16 @load_non_escaped_alloca() {
 }
 
 define i16* @load_escaped_alloca() {
-  ; CHECK: @"dfs$load_escaped_alloca"
+  ; CHECK-LABEL: @"dfs$load_escaped_alloca"
   ; CHECK: [[INTP:%.*]] = ptrtoint i[[#SBITS]]* %p to i64
-  ; CHECK: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913
-  ; CHECK: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
-  ; CHECK: [[SHADOW_PTR0:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
-  ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832
-  ; CHECK: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4
-  ; CHECK: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
-  ; CHECK: {{%.*}} = load i32, i32* [[ORIGIN_PTR]], align 4
-  ; CHECK: [[SHADOW_PTR1:%.*]] = getelementptr i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], i64 1
-  ; CHECK: [[SHADOW0:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], align [[#SBYTES]]
-  ; CHECK: [[SHADOW1:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR1]], align [[#SBYTES]]
-  ; CHECK: {{%.*}} = or i[[#SBITS]] [[SHADOW0]], [[SHADOW1]]
-  ; CHECK: %a = load i16, i16* %p, align 2
-  ; CHECK: store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
-  ; CHECK: store i32 0, i32* @__dfsan_retval_origin_tls, align 4
+  ; CHECK-NEXT: [[OFFSET:%.*]] = and i64 [[INTP]]
+  ; CHECK-NEXT: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
+  ; CHECK-NEXT: [[SHADOW_PTR0:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
+  ; CHECK-NEXT: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832
+  ; CHECK-NEXT: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4
+  ; CHECK-NEXT: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
+  ; CHECK-NEXT: {{%.*}} = load i32, i32* [[ORIGIN_PTR]], align 4
+  ; CHECK-NEXT: [[SHADOW_PTR1:%.*]] = getelementptr i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], i64 1
+  ; CHECK-NEXT: [[SHADOW0:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], align [[#SBYTES]]
+  ; CHECK-NEXT: [[SHADOW1:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR1]], align [[#SBYTES]]
+  ; CHECK-NEXT: {{%.*}} = or i[[#SBITS]] [[SHADOW0]], [[SHADOW1]]
+  ; CHECK-NEXT: %a = load i16, i16* %p, align 2
+  ; CHECK-NEXT: store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK-NEXT: store i32 0, i32* @__dfsan_retval_origin_tls, align 4
   
   %p = alloca i16
   %a = load i16, i16* %p
@@ -57,130 +58,130 @@ define i16* @load_escaped_alloca() {
 
 @X = constant i1 1
 define i1 @load_global() {
-  ; CHECK: @"dfs$load_global"
+  ; CHECK-LABEL: @"dfs$load_global"
   ; CHECK: %a = load i1, i1* @X, align 1
-  ; CHECK: store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
-  ; CHECK: store i32 0, i32* @__dfsan_retval_origin_tls, align 4
+  ; CHECK-NEXT: store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK-NEXT: store i32 0, i32* @__dfsan_retval_origin_tls, align 4
 
   %a = load i1, i1* @X
   ret i1 %a
 }
 
 define i1 @load1(i1* %p) {
-  ; CHECK: @"dfs$load1"
-  ; CHECK: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4
-  ; CHECK: [[PS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[#SBYTES]]
-  ; CHECK: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64
-  ; CHECK: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913
-  ; CHECK: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
-  ; CHECK: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
-  ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832
-  ; CHECK: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4
-  ; CHECK: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
-  ; CHECK: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4
-  ; CHECK: [[AS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR]], align [[#SBYTES]]
-  ; CHECK: [[RS:%.*]] = or i[[#SBITS]] [[AS]], [[PS]]
-  ; CHECK: [[PS_NZ:%.*]] = icmp ne i[[#SBITS]] [[PS]], 0
-  ; CHECK: [[RO:%.*]] = select i1 [[PS_NZ]], i32 [[PO]], i32 [[AO]]
-  ; CHECK: %a = load i1, i1* %p, align 1
-  ; CHECK: store i[[#SBITS]] [[RS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
-  ; CHECK: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4
+  ; CHECK-LABEL: @"dfs$load1"
+  ; CHECK-NEXT: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4
+  ; CHECK-NEXT: [[PS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK-NEXT: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64
+  ; CHECK-NEXT: [[OFFSET:%.*]] = and i64 [[INTP]]
+  ; CHECK-NEXT: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
+  ; CHECK-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
+  ; CHECK-NEXT: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832
+  ; CHECK-NEXT: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4
+  ; CHECK-NEXT: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
+  ; CHECK-NEXT: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4
+  ; CHECK-NEXT: [[AS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR]], align [[#SBYTES]]
+  ; CHECK-NEXT: [[RS:%.*]] = or i[[#SBITS]] [[AS]], [[PS]]
+  ; CHECK-NEXT: [[PS_NZ:%.*]] = icmp ne i[[#SBITS]] [[PS]], 0
+  ; CHECK-NEXT: [[RO:%.*]] = select i1 [[PS_NZ]], i32 [[PO]], i32 [[AO]]
+  ; CHECK-NEXT: %a = load i1, i1* %p, align 1
+  ; CHECK-NEXT: store i[[#SBITS]] [[RS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK-NEXT: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4
 
   %a = load i1, i1* %p
   ret i1 %a
 }
 
 define i16 @load16(i1 %i, i16* %p) {
-  ; CHECK: @"dfs$load16"
-  ; CHECK: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
-  ; CHECK: [[PS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i[[#SBITS]]*), align [[#SBYTES]]
-  ; CHECK: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64
-  ; CHECK: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913
-  ; CHECK: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
-  ; CHECK: [[SHADOW_PTR0:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
-  ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832
-  ; CHECK: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4
-  ; CHECK: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
-  ; CHECK: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4
-  ; CHECK: [[SHADOW_PTR1:%.*]] = getelementptr i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], i64 1
-  ; CHECK: [[SHADOW0:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], align [[#SBYTES]]
-  ; CHECK: [[SHADOW1:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR1]], align [[#SBYTES]]
-  ; CHECK: [[AS:%.*]] = or i[[#SBITS]] [[SHADOW0]], [[SHADOW1]]
-  ; CHECK: [[RS:%.*]] = or i[[#SBITS]] [[AS]], [[PS]]
-  ; CHECK: [[PS_NZ:%.*]] = icmp ne i[[#SBITS]] [[PS]], 0
-  ; CHECK: [[RO:%.*]] = select i1 [[PS_NZ]], i32 [[PO]], i32 [[AO]]
-  ; CHECK: %a = load i16, i16* %p, align 2
-  ; CHECK: store i[[#SBITS]] [[RS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
-  ; CHECK: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4
+  ; CHECK-LABEL: @"dfs$load16"
+  ; CHECK-NEXT: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+  ; CHECK-NEXT: [[PS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK-NEXT: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64
+  ; CHECK-NEXT: [[OFFSET:%.*]] = and i64 [[INTP]]
+  ; CHECK-NEXT: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
+  ; CHECK-NEXT: [[SHADOW_PTR0:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
+  ; CHECK-NEXT: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832
+  ; CHECK-NEXT: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4
+  ; CHECK-NEXT: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
+  ; CHECK-NEXT: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4
+  ; CHECK-NEXT: [[SHADOW_PTR1:%.*]] = getelementptr i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], i64 1
+  ; CHECK-NEXT: [[SHADOW0:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR0]], align [[#SBYTES]]
+  ; CHECK-NEXT: [[SHADOW1:%.*]] = load i[[#SBITS]], i[[#SBITS]]* [[SHADOW_PTR1]], align [[#SBYTES]]
+  ; CHECK-NEXT: [[AS:%.*]] = or i[[#SBITS]] [[SHADOW0]], [[SHADOW1]]
+  ; CHECK-NEXT: [[RS:%.*]] = or i[[#SBITS]] [[AS]], [[PS]]
+  ; CHECK-NEXT: [[PS_NZ:%.*]] = icmp ne i[[#SBITS]] [[PS]], 0
+  ; CHECK-NEXT: [[RO:%.*]] = select i1 [[PS_NZ]], i32 [[PO]], i32 [[AO]]
+  ; CHECK-NEXT: %a = load i16, i16* %p, align 2
+  ; CHECK-NEXT: store i[[#SBITS]] [[RS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK-NEXT: store i32 [[RO]], i32* @__dfsan_retval_origin_tls, align 4
 
   %a = load i16, i16* %p
   ret i16 %a
 }
 
 define i32 @load32(i32* %p) {
-  ; CHECK: @"dfs$load32"
+  ; CHECK-LABEL: @"dfs$load32"
 
-  ; NO_COMBINE_LOAD_PTR: @"dfs$load32"
+  ; NO_COMBINE_LOAD_PTR-LABEL: @"dfs$load32"
   ; NO_COMBINE_LOAD_PTR: [[INTP:%.*]] = ptrtoint i32* %p to i64
-  ; NO_COMBINE_LOAD_PTR: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
-  ; NO_COMBINE_LOAD_PTR: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR64:%.*]] = bitcast i[[#SBITS]]* [[SHADOW_PTR]] to i64*
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64:%.*]] = load i64, i64* [[SHADOW_PTR64]], align [[#SBYTES]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i[[#SBITS]]
-  ; NO_COMBINE_LOAD_PTR: %a = load i32, i32* %p, align 4
-  ; NO_COMBINE_LOAD_PTR: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
-  ; NO_COMBINE_LOAD_PTR: store i32 [[AO]], i32* @__dfsan_retval_origin_tls, align 4
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[OFFSET:%.*]] = and i64 [[INTP]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[AO:%.*]] = load i32, i32* [[ORIGIN_PTR]], align 4
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_PTR64:%.*]] = bitcast i[[#SBITS]]* [[SHADOW_PTR]] to i64*
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64:%.*]] = load i64, i64* [[SHADOW_PTR64]], align [[#SBYTES]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i[[#SBITS]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: %a = load i32, i32* %p, align 4
+  ; NO_COMBINE_LOAD_PTR-NEXT: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: store i32 [[AO]], i32* @__dfsan_retval_origin_tls, align 4
 
   %a = load i32, i32* %p
   ret i32 %a
 }
 
 define i64 @load64(i64* %p) {
-  ; CHECK: @"dfs$load64"
+  ; CHECK-LABEL: @"dfs$load64"
   
-  ; NO_COMBINE_LOAD_PTR: @"dfs$load64"
+  ; NO_COMBINE_LOAD_PTR-LABEL: @"dfs$load64"
   ; NO_COMBINE_LOAD_PTR: [[INTP:%.*]] = ptrtoint i64* %p to i64
-  ; NO_COMBINE_LOAD_PTR: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_0:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_0:%.*]] = load i32, i32* [[ORIGIN_PTR_0]], align 8
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_0:%.*]] = bitcast i[[#SBITS]]* [[SHADOW_PTR]] to i64*
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_0:%.*]] = load i64, i64* [[SHADOW_PTR_0]], align [[#SBYTES]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_1:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_0]], i64 1
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_1:%.*]] = load i64, i64* [[SHADOW_PTR_1]], align [[#SBYTES]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64:%.*]] = or i64 [[SHADOW_0]], [[SHADOW_1]]
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_1:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_0]], i64 1
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_1:%.*]] = load i32, i32* [[ORIGIN_PTR_1]], align 8
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i[[#SBITS]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_1_NZ:%.*]] = icmp ne i64 [[SHADOW_1]], 0
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN:%.*]] = select i1 [[SHADOW_1_NZ]], i32 [[ORIGIN_1]], i32 [[ORIGIN_0]]
-  ; NO_COMBINE_LOAD_PTR: %a = load i64, i64* %p, align 8
-  ; NO_COMBINE_LOAD_PTR: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
-  ; NO_COMBINE_LOAD_PTR: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[OFFSET:%.*]] = and i64 [[INTP]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_PTR_0:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_0:%.*]] = load i32, i32* [[ORIGIN_PTR_0]], align 8
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_PTR_0:%.*]] = bitcast i[[#SBITS]]* [[SHADOW_PTR]] to i64*
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_0:%.*]] = load i64, i64* [[SHADOW_PTR_0]], align [[#SBYTES]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_PTR_1:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_0]], i64 1
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_1:%.*]] = load i64, i64* [[SHADOW_PTR_1]], align [[#SBYTES]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64:%.*]] = or i64 [[SHADOW_0]], [[SHADOW_1]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_PTR_1:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_0]], i64 1
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_1:%.*]] = load i32, i32* [[ORIGIN_PTR_1]], align 8
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i[[#SBITS]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_1_NZ:%.*]] = icmp ne i64 [[SHADOW_1]], 0
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN:%.*]] = select i1 [[SHADOW_1_NZ]], i32 [[ORIGIN_1]], i32 [[ORIGIN_0]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: %a = load i64, i64* %p, align 8
+  ; NO_COMBINE_LOAD_PTR-NEXT: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4
 
   %a = load i64, i64* %p
   ret i64 %a
 }
 
 define i64 @load64_align2(i64* %p) {
-  ; CHECK: @"dfs$load64_align2"
+  ; CHECK-LABEL: @"dfs$load64_align2"
 
-  ; NO_COMBINE_LOAD_PTR: @"dfs$load64_align2"
-  ; NO_COMBINE_LOAD_PTR-NEXT: [[INTP:%.*]] = bitcast i64* %p to i8*
+  ; NO_COMBINE_LOAD_PTR-LABEL: @"dfs$load64_align2"
+  ; NO_COMBINE_LOAD_PTR: [[INTP:%.*]] = bitcast i64* %p to i8*
   ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL_ORIGIN:%.*]] = call zeroext i64 @__dfsan_load_label_and_origin(i8* [[INTP]], i64 8)
   ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL_ORIGIN_H32:%.*]] = lshr i64 [[LABEL_ORIGIN]], 32
   ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL:%.*]] = trunc i64 [[LABEL_ORIGIN_H32]] to i[[#SBITS]]
@@ -194,49 +195,49 @@ define i64 @load64_align2(i64* %p) {
 }
 
 define i92 @load92(i92* %p) {
-  ; CHECK: @"dfs$load92"
+  ; CHECK-LABEL: @"dfs$load92"
 
-  ; NO_COMBINE_LOAD_PTR: @"dfs$load92"
+  ; NO_COMBINE_LOAD_PTR-LABEL: @"dfs$load92"
   ; NO_COMBINE_LOAD_PTR: [[INTP:%.*]] = ptrtoint i92* %p to i64
-  ; NO_COMBINE_LOAD_PTR: [[OFFSET:%.*]] = and i64 [[INTP]], -123145302310913
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_0:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_0:%.*]] = load i32, i32* [[ORIGIN_PTR_0]], align 8
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_0:%.*]] = bitcast i[[#SBITS]]* [[SHADOW_PTR]] to i64*
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_0:%.*]] = load i64, i64* [[SHADOW_PTR_0]], align [[#SBYTES]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_1:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_0]], i64 1
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_1:%.*]] = load i64, i64* [[SHADOW_PTR_1]], align [[#SBYTES]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_01:%.*]] = or i64 [[SHADOW_0]], [[SHADOW_1]]
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_1:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_0]], i64 1
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_1:%.*]] = load i32, i32* [[ORIGIN_PTR_1]], align 8
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_PTR_2:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_1]], i64 1
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_2:%.*]] = load i64, i64* [[SHADOW_PTR_2]], align [[#SBYTES]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64:%.*]] = or i64 [[SHADOW_01]], [[SHADOW_2]]
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_PTR_2:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_1]], i64 1
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_2:%.*]] = load i32, i32* [[ORIGIN_PTR_2]], align 8
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i[[#SBITS]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_1_NZ:%.*]] = icmp ne i64 [[SHADOW_1]], 0
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN_10:%.*]] = select i1 [[SHADOW_1_NZ]], i32 [[ORIGIN_1]], i32 [[ORIGIN_0]]
-  ; NO_COMBINE_LOAD_PTR: [[SHADOW_2_NZ:%.*]] = icmp ne i64 [[SHADOW_2]], 0
-  ; NO_COMBINE_LOAD_PTR: [[ORIGIN:%.*]] = select i1 [[SHADOW_2_NZ]], i32 [[ORIGIN_2]], i32 [[ORIGIN_10]]
-  ; NO_COMBINE_LOAD_PTR: %a = load i92, i92* %p, align 8
-  ; NO_COMBINE_LOAD_PTR: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
-  ; NO_COMBINE_LOAD_PTR: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[OFFSET:%.*]] = and i64 [[INTP]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_ADDR:%.*]] = mul i64 [[OFFSET]], 2
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_PTR:%.*]] = inttoptr i64 [[SHADOW_ADDR]] to i[[#SBITS]]*
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_ADDR:%.*]] = add i64 [[OFFSET]], 35184372088832
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_PTR_0:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_0:%.*]] = load i32, i32* [[ORIGIN_PTR_0]], align 8
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_PTR_0:%.*]] = bitcast i[[#SBITS]]* [[SHADOW_PTR]] to i64*
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_0:%.*]] = load i64, i64* [[SHADOW_PTR_0]], align [[#SBYTES]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_PTR_1:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_0]], i64 1
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_1:%.*]] = load i64, i64* [[SHADOW_PTR_1]], align [[#SBYTES]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_01:%.*]] = or i64 [[SHADOW_0]], [[SHADOW_1]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_PTR_1:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_0]], i64 1
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_1:%.*]] = load i32, i32* [[ORIGIN_PTR_1]], align 8
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_PTR_2:%.*]] = getelementptr i64, i64* [[SHADOW_PTR_1]], i64 1
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_2:%.*]] = load i64, i64* [[SHADOW_PTR_2]], align [[#SBYTES]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64:%.*]] = or i64 [[SHADOW_01]], [[SHADOW_2]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_PTR_2:%.*]] = getelementptr i32, i32* [[ORIGIN_PTR_1]], i64 1
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_2:%.*]] = load i32, i32* [[ORIGIN_PTR_2]], align 8
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_H32:%.*]] = lshr i64 [[SHADOW64]], 32
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_HL32:%.*]] = or i64 [[SHADOW64]], [[SHADOW64_H32]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_HL32_H16:%.*]] = lshr i64 [[SHADOW64_HL32]], 16
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW64_HL32_HL16:%.*]] = or i64 [[SHADOW64_HL32]], [[SHADOW64_HL32_H16]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW:%.*]] = trunc i64 [[SHADOW64_HL32_HL16]] to i[[#SBITS]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_1_NZ:%.*]] = icmp ne i64 [[SHADOW_1]], 0
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN_10:%.*]] = select i1 [[SHADOW_1_NZ]], i32 [[ORIGIN_1]], i32 [[ORIGIN_0]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[SHADOW_2_NZ:%.*]] = icmp ne i64 [[SHADOW_2]], 0
+  ; NO_COMBINE_LOAD_PTR-NEXT: [[ORIGIN:%.*]] = select i1 [[SHADOW_2_NZ]], i32 [[ORIGIN_2]], i32 [[ORIGIN_10]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: %a = load i92, i92* %p, align 8
+  ; NO_COMBINE_LOAD_PTR-NEXT: store i[[#SBITS]] [[SHADOW]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_retval_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; NO_COMBINE_LOAD_PTR-NEXT: store i32 [[ORIGIN]], i32* @__dfsan_retval_origin_tls, align 4
   
   %a = load i92, i92* %p
   ret i92 %a
 }
 
 define i17 @load17(i17* %p) {
-  ; CHECK: @"dfs$load17"
+  ; CHECK-LABEL: @"dfs$load17"
 
-  ; NO_COMBINE_LOAD_PTR: @"dfs$load17"
+  ; NO_COMBINE_LOAD_PTR-LABEL: @"dfs$load17"
   ; NO_COMBINE_LOAD_PTR-NEXT: [[INTP:%.*]] = bitcast i17* %p to i8*
   ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL_ORIGIN:%.*]] = call zeroext i64 @__dfsan_load_label_and_origin(i8* [[INTP]], i64 3)
   ; NO_COMBINE_LOAD_PTR-NEXT: [[LABEL_ORIGIN_H32:%.*]] = lshr i64 [[LABEL_ORIGIN]], 32
@@ -249,3 +250,173 @@ define i17 @load17(i17* %p) {
   %a = load i17, i17* %p, align 4
   ret i17 %a
 }
+
+define void @store_zero_to_non_escaped_alloca() {
+  ; CHECK-LABEL: @"dfs$store_zero_to_non_escaped_alloca"
+  ; CHECK-NEXT: [[A:%.*]] = alloca i[[#SBITS]], align [[#SBYTES]]
+  ; CHECK-NEXT: %_dfsa = alloca i32, align 4
+  ; CHECK-NEXT: %p = alloca i[[#SBITS]], align [[#SBYTES]]
+  ; CHECK-NEXT: store i[[#SBITS]] 0, i[[#SBITS]]* [[A]], align [[#SBYTES]]
+  ; CHECK-NEXT: store i16 1, i16* %p, align 2
+  ; CHECK-NEXT: ret void
+  
+  %p = alloca i16
+  store i16 1, i16* %p
+  ret void
+}
+
+define void @store_nonzero_to_non_escaped_alloca(i16 %a) {
+  ; CHECK-LABEL: @"dfs$store_nonzero_to_non_escaped_alloca"
+  ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4
+  ; CHECK: %_dfsa = alloca i32, align 4
+  ; CHECK: store i32 [[AO]], i32* %_dfsa, align 4
+  
+  %p = alloca i16
+  store i16 %a, i16* %p
+  ret void
+}
+
+declare void @foo(i16* %p)
+
+define void @store_zero_to_escaped_alloca() {
+  ; CHECK-LABEL: @"dfs$store_zero_to_escaped_alloca"
+  ; CHECK: [[SA:%.*]] = bitcast i[[#SBITS]]* {{.*}} to i32*
+  ; CHECK-NEXT: store i32 0, i32* [[SA]], align 2
+  ; CHECK-NEXT: store i[[#SBITS]] 1, i[[#SBITS]]* %p, align 2
+  ; CHECK-NEXT: store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[#SBYTES]]
+
+  ; COMBINE_STORE_PTR-LABEL: @"dfs$store_zero_to_escaped_alloca"
+  ; COMBINE_STORE_PTR: [[SA:%.*]] = bitcast i[[#SBITS]]* {{.*}} to i32*
+  ; COMBINE_STORE_PTR_NEXT: store i32 0, i32* [[SA]], align 2
+  ; COMBINE_STORE_PTR_NEXT: store i16 1, i16* %p, align 2
+  ; COMBINE_STORE_PTR_NEXT: call void @foo(i16* %p)
+  ; COMBINE_STORE_PTR_NEXT: store i[[#SBITS]] 0, i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[#SBYTES]]
+
+  %p = alloca i16
+  store i16 1, i16* %p
+  call void @foo(i16* %p)
+  ret void
+}
+
+define void @store_nonzero_to_escaped_alloca(i16 %a) {
+  ; CHECK-LABEL: @"dfs$store_nonzero_to_escaped_alloca"
+  ; CHECK-NEXT: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4
+  ; CHECK-NEXT: [[AS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64
+  ; CHECK-NEXT: [[OFFSET:%.*]] = and i64 [[INTP]]
+  ; CHECK: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832
+  ; CHECK-NEXT: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4
+  ; CHECK-NEXT: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
+  ; CHECK: %_dfscmp = icmp ne i[[#SBITS]] [[AS]], 0
+  ; CHECK-NEXT: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]],
+  ; CHECK: [[L1]]:
+  ; CHECK-NEXT: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[AO]])
+  ; CHECK-NEXT: store i32 [[NO]], i32* [[ORIGIN_PTR]], align 4
+  ; CHECK-NEXT: br label %[[L2]]
+  ; CHECK: [[L2]]:
+  ; CHECK-NEXT: store i16 %a, i16* %p, align 2
+  
+  ; COMBINE_STORE_PTR-LABEL: @"dfs$store_nonzero_to_escaped_alloca"
+  ; COMBINE_STORE_PTR-NEXT: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4
+  ; COMBINE_STORE_PTR-NEXT: [[AS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; COMBINE_STORE_PTR: [[INTP:%.*]] = ptrtoint {{.*}} %p to i64
+  ; COMBINE_STORE_PTR-NEXT: [[OFFSET:%.*]] = and i64 [[INTP]]
+  ; COMBINE_STORE_PTR: [[ORIGIN_OFFSET:%.*]] = add i64 [[OFFSET]], 35184372088832
+  ; COMBINE_STORE_PTR-NEXT: [[ORIGIN_ADDR:%.*]] = and i64 [[ORIGIN_OFFSET]], -4
+  ; COMBINE_STORE_PTR-NEXT: [[ORIGIN_PTR:%.*]] = inttoptr i64 [[ORIGIN_ADDR]] to i32*
+  ; COMBINE_STORE_PTR: %_dfscmp = icmp ne i[[#SBITS]] [[AS]], 0
+  ; COMBINE_STORE_PTR-NEXT: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]],
+  ; COMBINE_STORE_PTR: [[L1]]:
+  ; COMBINE_STORE_PTR-NEXT: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[AO]])
+  ; COMBINE_STORE_PTR-NEXT: store i32 [[NO]], i32* [[ORIGIN_PTR]], align 4
+  ; COMBINE_STORE_PTR-NEXT: br label %[[L2]]
+  ; COMBINE_STORE_PTR: [[L2]]:
+  ; COMBINE_STORE_PTR-NEXT: store i16 %a, i16* %p, align 2
+  
+  %p = alloca i16
+  store i16 %a, i16* %p
+  call void @foo(i16* %p)
+  ret void
+}
+
+define void @store64_align8(i64* %p, i64 %a) {
+  ; CHECK-LABEL: @"dfs$store64_align8"
+  ; CHECK-NEXT: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+  ; CHECK-NEXT: [[AS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK: %_dfscmp = icmp ne i[[#SBITS]] [[AS]], 0
+  ; CHECK-NEXT: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]],
+  ; CHECK: [[L1]]:
+  ; CHECK-NEXT: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[AO]])
+  ; CHECK-NEXT: [[NO_ZEXT:%.*]] = zext i32 [[NO]] to i64
+  ; CHECK-NEXT: [[NO_SHL:%.*]] = shl i64 [[NO_ZEXT]], 32
+  ; CHECK-NEXT: [[NO2:%.*]] = or i64 [[NO_ZEXT]], [[NO_SHL]]
+  ; CHECK-NEXT: [[O_PTR:%.*]] = bitcast i32* {{.*}} to i64*
+  ; CHECK-NEXT: store i64 [[NO2]], i64* [[O_PTR]], align 8
+  ; CHECK-NEXT: br label %[[L2]]
+  ; CHECK: [[L2]]:
+  ; CHECK-NEXT: store i64 %a, i64* %p, align 8
+  
+  ; COMBINE_STORE_PTR-LABEL: @"dfs$store64_align8"
+  ; COMBINE_STORE_PTR-NEXT: [[PO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 0), align 4
+  ; COMBINE_STORE_PTR-NEXT: [[PS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* bitcast ([100 x i64]* @__dfsan_arg_tls to i[[#SBITS]]*), align [[#SBYTES]]
+  ; COMBINE_STORE_PTR-NEXT: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+  ; COMBINE_STORE_PTR-NEXT: [[AS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i[[#SBITS]]*), align [[#SBYTES]]
+  ; COMBINE_STORE_PTR-NEXT: [[MS:%.*]] = or i[[#SBITS]] [[AS]], [[PS]]
+  ; COMBINE_STORE_PTR-NEXT: [[NE:%.*]] = icmp ne i[[#SBITS]] [[PS]], 0
+  ; COMBINE_STORE_PTR-NEXT: [[MO:%.*]] = select i1 [[NE]], i32 [[PO]], i32 [[AO]]
+  ; COMBINE_STORE_PTR: %_dfscmp = icmp ne i[[#SBITS]] [[MS]], 0
+  ; COMBINE_STORE_PTR-NEXT: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]],
+  ; COMBINE_STORE_PTR: [[L1]]:
+  ; COMBINE_STORE_PTR-NEXT: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[MO]])
+  ; COMBINE_STORE_PTR-NEXT: [[NO_ZEXT:%.*]] = zext i32 [[NO]] to i64
+  ; COMBINE_STORE_PTR-NEXT: [[NO_SHL:%.*]] = shl i64 [[NO_ZEXT]], 32
+  ; COMBINE_STORE_PTR-NEXT: [[NO2:%.*]] = or i64 [[NO_ZEXT]], [[NO_SHL]]
+  ; COMBINE_STORE_PTR-NEXT: [[O_PTR:%.*]] = bitcast i32* {{.*}} to i64*
+  ; COMBINE_STORE_PTR-NEXT: store i64 [[NO2]], i64* [[O_PTR]], align 8
+  ; COMBINE_STORE_PTR-NEXT: br label %[[L2]]
+  ; COMBINE_STORE_PTR: [[L2]]:
+  ; COMBINE_STORE_PTR-NEXT: store i64 %a, i64* %p, align 8
+  
+  store i64 %a, i64* %p
+  ret void
+}
+
+define void @store64_align2(i64* %p, i64 %a) {
+  ; CHECK-LABEL: @"dfs$store64_align2"
+  ; CHECK-NEXT: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+  ; CHECK-NEXT: [[AS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK: %_dfscmp = icmp ne i[[#SBITS]] [[AS]], 0
+  ; CHECK-NEXT: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]],
+  ; CHECK: [[L1]]:
+  ; CHECK-NEXT: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[AO]])
+  ; CHECK-NEXT: store i32 [[NO]], i32* [[O_PTR0:%.*]], align 4
+  ; CHECK-NEXT: [[O_PTR1:%.*]] = getelementptr i32, i32* [[O_PTR0]], i32 1
+  ; CHECK-NEXT: store i32 [[NO]], i32* [[O_PTR1]], align 4
+  ; CHECK: [[L2]]:
+  ; CHECK-NEXT: store i64 %a, i64* %p, align [[#SBYTES]]
+  
+  store i64 %a, i64* %p, align 2
+  ret void
+}
+
+define void @store96_align8(i96* %p, i96 %a) {
+  ; CHECK-LABEL: @"dfs$store96_align8"
+  ; CHECK-NEXT: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+  ; CHECK-NEXT: [[AS:%.*]] = load i[[#SBITS]], i[[#SBITS]]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to i[[#SBITS]]*), align [[#SBYTES]]
+  ; CHECK: %_dfscmp = icmp ne i[[#SBITS]] [[AS]], 0
+  ; CHECK-NEXT: br i1 %_dfscmp, label %[[L1:.*]], label %[[L2:.*]],
+  ; CHECK: [[L1]]:
+  ; CHECK-NEXT: [[NO:%.*]] = call i32 @__dfsan_chain_origin(i32 [[AO]])
+  ; CHECK-NEXT: [[NO_ZEXT:%.*]] = zext i32 [[NO]] to i64
+  ; CHECK-NEXT: [[NO_SHL:%.*]] = shl i64 [[NO_ZEXT]], 32
+  ; CHECK-NEXT: [[NO2:%.*]] = or i64 [[NO_ZEXT]], [[NO_SHL]]
+  ; CHECK-NEXT: [[O_PTR64:%.*]] = bitcast i32* [[O_PTR0:%.*]] to i64*
+  ; CHECK-NEXT: store i64 [[NO2]], i64* [[O_PTR64]], align 8
+  ; CHECK-NEXT: [[O_PTR1:%.*]] = getelementptr i32, i32* [[O_PTR0]], i32 2
+  ; CHECK-NEXT: store i32 [[NO]], i32* [[O_PTR1]], align 8
+  ; CHECK: [[L2]]:
+  ; CHECK-NEXT: store i96 %a, i96* %p, align 8
+  
+  store i96 %a, i96* %p, align 8
+  ret void
+}

diff  --git a/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll
new file mode 100644
index 000000000000..3b3be8392cfe
--- /dev/null
+++ b/llvm/test/Instrumentation/DataFlowSanitizer/origin_store_threshold.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -dfsan -dfsan-track-origins=1 -dfsan-fast-16-labels=true -dfsan-instrument-with-call-threshold=0 -S | FileCheck %s --check-prefix=CHECK
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: @__dfsan_shadow_width_bits = weak_odr constant i32 [[#SBITS:]]
+; CHECK: @__dfsan_shadow_width_bytes = weak_odr constant i32 [[#SBYTES:]]
+
+define void @store_threshold([2 x i64]* %p, [2 x i64] %a) {
+  ; CHECK: @"dfs$store_threshold"
+  ; CHECK: [[AO:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__dfsan_arg_origin_tls, i64 0, i64 1), align 4
+  ; CHECK: [[AS:%.*]] = load [2 x i[[#SBITS]]], [2 x i[[#SBITS]]]* inttoptr (i64 add (i64 ptrtoint ([100 x i64]* @__dfsan_arg_tls to i64), i64 2) to [2 x i[[#SBITS]]]*), align 2
+  ; CHECK: [[AS0:%.*]] = extractvalue [2 x i[[#SBITS]]] [[AS]], 0
+  ; CHECK: [[AS1:%.*]] = extractvalue [2 x i[[#SBITS]]] [[AS]], 1
+  ; CHECK: [[AS01:%.*]] = or i[[#SBITS]] [[AS0]], [[AS1]]
+  ; CHECK: [[ADDR:%.*]] = bitcast [2 x i64]* %p to i8*
+  ; CHECK: call void @__dfsan_maybe_store_origin(i[[#SBITS]] [[AS01]], i8* [[ADDR]], i64 16, i32 [[AO]])
+  ; CHECK: store [2 x i64] %a, [2 x i64]* %p, align 8
+
+  store [2 x i64] %a, [2 x i64]* %p
+  ret void
+}