[llvm] [msan] Reduces overhead of #113200, by 10% (PR #113201)

Vitaly Buka via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 22 14:13:40 PDT 2024


https://github.com/vitalybuka updated https://github.com/llvm/llvm-project/pull/113201

>From ced59e0c065cfcb21dca5e02fc48f4ef6fe66b7e Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Mon, 21 Oct 2024 10:51:40 -0700
Subject: [PATCH 1/3] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
 =?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4

[skip ci]
---
 .../Instrumentation/MemorySanitizer.cpp       |    2 +-
 .../MemorySanitizer/msan_basic.ll             | 4060 +++++++++++++++--
 .../MemorySanitizer/pr32842.ll                |   52 +-
 3 files changed, 3686 insertions(+), 428 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 9e174e2415e719..440413b8c2684f 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -276,7 +276,7 @@ static cl::opt<bool>
 static cl::opt<bool>
     ClHandleICmpExact("msan-handle-icmp-exact",
                       cl::desc("exact handling of relational integer ICmp"),
-                      cl::Hidden, cl::init(false));
+                      cl::Hidden, cl::init(true));
 
 static cl::opt<bool> ClHandleLifetimeIntrinsics(
     "msan-handle-lifetime-intrinsics",
diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
index fe7637918524d1..7aeb763b309048 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_basic.ll
@@ -1,76 +1,240 @@
-; RUN: opt < %s -msan-check-access-address=0 -S -passes='module(msan)' 2>&1 | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=CHECK,NOORIGINS --implicit-check-not="call void @__msan_warning"
-; RUN: opt < %s --passes='module(msan)' -msan-check-access-address=0 -S | FileCheck -allow-deprecated-dag-overlap %s --check-prefixes=CHECK,NOORIGINS --implicit-check-not="call void @__msan_warning"
-; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -S -passes='module(msan)' 2>&1 | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,ORIGINS %s --implicit-check-not="call void @__msan_warning"
-; RUN: opt < %s -passes='module(msan)' -msan-check-access-address=0 -msan-track-origins=1 -S | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,ORIGINS %s --implicit-check-not="call void @__msan_warning"
-; RUN: opt < %s -passes='module(msan)' -msan-instrumentation-with-call-threshold=0 -msan-track-origins=1 -S | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK-CALLS %s --implicit-check-not="call void @__msan_warning"
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -S -passes='module(msan)' -msan-check-access-address=0                                      | FileCheck %s --check-prefixes=CHECK
+; RUN: opt < %s -S -passes='module(msan)' -msan-check-access-address=0                -msan-track-origins=1 | FileCheck %s --check-prefixes=ORIGIN
+; RUN: opt < %s -S -passes='module(msan)' -msan-instrumentation-with-call-threshold=0 -msan-track-origins=1 | FileCheck %s --check-prefixes=CALLS
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; CHECK: @llvm.used = appending global [1 x ptr] [ptr @msan.module_ctor]
-; CHECK: @llvm.global_ctors {{.*}} { i32 0, ptr @msan.module_ctor, ptr null }
 
 ; Check the presence and the linkage type of __msan_track_origins and
 ; other interface symbols.
-; CHECK-NOT: @__msan_track_origins
 ; ORIGINS: @__msan_track_origins = weak_odr constant i32 1
-; CHECK-NOT: @__msan_keep_going = weak_odr constant i32 0
-; CHECK: @__msan_retval_tls = external thread_local(initialexec) global [{{.*}}]
-; CHECK: @__msan_retval_origin_tls = external thread_local(initialexec) global i32
-; CHECK: @__msan_param_tls = external thread_local(initialexec) global [{{.*}}]
-; CHECK: @__msan_param_origin_tls = external thread_local(initialexec) global [{{.*}}]
-; CHECK: @__msan_va_arg_tls = external thread_local(initialexec) global [{{.*}}]
-; CHECK: @__msan_va_arg_overflow_size_tls = external thread_local(initialexec) global i64
 
 
 ; Check instrumentation of stores
 
 define void @Store(ptr nocapture %p, i32 %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @Store(
+; CHECK-SAME: ptr nocapture [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP3]], align 4
+; CHECK-NEXT:    store i32 [[X]], ptr [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @Store(
+; ORIGIN-SAME: ptr nocapture [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
+; ORIGIN-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; ORIGIN-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; ORIGIN-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416
+; ORIGIN-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; ORIGIN-NEXT:    store i32 [[TMP0]], ptr [[TMP4]], align 4
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP0]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1:![0-9]+]]
+; ORIGIN:       [[BB7]]:
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr [[TMP6]], align 4
+; ORIGIN-NEXT:    br label %[[BB8]]
+; ORIGIN:       [[BB8]]:
+; ORIGIN-NEXT:    store i32 [[X]], ptr [[P]], align 4
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @Store(
+; CALLS-SAME: ptr nocapture [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
+; CALLS-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CALLS-NEXT:    [[TMP7:%.*]] = add i64 [[TMP5]], 17592186044416
+; CALLS-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CALLS-NEXT:    store i32 [[TMP2]], ptr [[TMP6]], align 4
+; CALLS-NEXT:    call void @__msan_maybe_store_origin_4(i32 zeroext [[TMP2]], ptr [[P]], i32 zeroext [[TMP3]])
+; CALLS-NEXT:    store i32 [[X]], ptr [[P]], align 4
+; CALLS-NEXT:    ret void
+;
 entry:
   store i32 %x, ptr %p, align 4
   ret void
 }
 
-; CHECK-LABEL: @Store
-; CHECK: load {{.*}} @__msan_param_tls
-; ORIGINS: load {{.*}} @__msan_param_origin_tls
-; CHECK: store
-; ORIGINS: icmp
-; ORIGINS: br i1
-; ORIGINS: {{^[0-9]+}}:
-; ORIGINS: store
-; ORIGINS: br label
-; ORIGINS: {{^[0-9]+}}:
-; CHECK: store
-; CHECK: ret void
-
-
 ; Check instrumentation of aligned stores
 ; Shadow store has the same alignment as the original store; origin store
 ; does not specify explicit alignment.
 
 define void @AlignedStore(ptr nocapture %p, i32 %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @AlignedStore(
+; CHECK-SAME: ptr nocapture [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP3]], align 32
+; CHECK-NEXT:    store i32 [[X]], ptr [[P]], align 32
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @AlignedStore(
+; ORIGIN-SAME: ptr nocapture [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
+; ORIGIN-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; ORIGIN-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; ORIGIN-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416
+; ORIGIN-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; ORIGIN-NEXT:    store i32 [[TMP0]], ptr [[TMP4]], align 32
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP0]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB11:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB7]]:
+; ORIGIN-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP1]] to i64
+; ORIGIN-NEXT:    [[TMP9:%.*]] = shl i64 [[TMP8]], 32
+; ORIGIN-NEXT:    [[TMP10:%.*]] = or i64 [[TMP8]], [[TMP9]]
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr [[TMP6]], align 32
+; ORIGIN-NEXT:    br label %[[BB11]]
+; ORIGIN:       [[BB11]]:
+; ORIGIN-NEXT:    store i32 [[X]], ptr [[P]], align 32
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @AlignedStore(
+; CALLS-SAME: ptr nocapture [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
+; CALLS-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CALLS-NEXT:    [[TMP7:%.*]] = add i64 [[TMP5]], 17592186044416
+; CALLS-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CALLS-NEXT:    store i32 [[TMP2]], ptr [[TMP6]], align 32
+; CALLS-NEXT:    call void @__msan_maybe_store_origin_4(i32 zeroext [[TMP2]], ptr [[P]], i32 zeroext [[TMP3]])
+; CALLS-NEXT:    store i32 [[X]], ptr [[P]], align 32
+; CALLS-NEXT:    ret void
+;
 entry:
   store i32 %x, ptr %p, align 32
   ret void
 }
 
-; CHECK-LABEL: @AlignedStore
-; CHECK: load {{.*}} @__msan_param_tls
-; ORIGINS: load {{.*}} @__msan_param_origin_tls
-; CHECK: store {{.*}} align 32
-; ORIGINS: icmp
-; ORIGINS: br i1
-; ORIGINS: {{^[0-9]+}}:
-; ORIGINS: store {{.*}} align 32
-; ORIGINS: br label
-; ORIGINS: {{^[0-9]+}}:
-; CHECK: store {{.*}} align 32
-; CHECK: ret void
-
-
 ; load followed by cmp: check that we load the shadow and call __msan_warning_with_origin.
 define void @LoadAndCmp(ptr nocapture %a) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @LoadAndCmp(
+; CHECK-SAME: ptr nocapture [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[_MSLD]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne i32 [[TMP5]], 0
+; CHECK-NEXT:    [[TMP7:%.*]] = xor i32 [[TMP5]], -1
+; CHECK-NEXT:    [[TMP8:%.*]] = and i32 [[TMP7]], [[TMP4]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 [[TMP6]], [[TMP9]]
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[_MSPROP_ICMP]], label %[[BB10:.*]], label %[[BB11:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK:       [[BB10]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR12:[0-9]+]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB11]]:
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    tail call void (...) @foo() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @LoadAndCmp(
+; ORIGIN-SAME: ptr nocapture [[A:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; ORIGIN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; ORIGIN-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; ORIGIN-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; ORIGIN-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; ORIGIN-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; ORIGIN-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP3]], align 4
+; ORIGIN-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+; ORIGIN-NEXT:    [[TMP7:%.*]] = xor i32 [[TMP0]], 0
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[_MSLD]], 0
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP8]], 0
+; ORIGIN-NEXT:    [[TMP10:%.*]] = xor i32 [[TMP8]], -1
+; ORIGIN-NEXT:    [[TMP11:%.*]] = and i32 [[TMP10]], [[TMP7]]
+; ORIGIN-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 0
+; ORIGIN-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 [[TMP9]], [[TMP12]]
+; ORIGIN-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP0]], 0
+; ORIGIN-NEXT:    br i1 [[_MSPROP_ICMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB13]]:
+; ORIGIN-NEXT:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #[[ATTR12:[0-9]+]]
+; ORIGIN-NEXT:    unreachable
+; ORIGIN:       [[BB14]]:
+; ORIGIN-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; ORIGIN:       [[IF_THEN]]:
+; ORIGIN-NEXT:    store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8
+; ORIGIN-NEXT:    tail call void (...) @foo() #[[ATTR5:[0-9]+]]
+; ORIGIN-NEXT:    br label %[[IF_END]]
+; ORIGIN:       [[IF_END]]:
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @LoadAndCmp(
+; CALLS-SAME: ptr nocapture [[A:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A]], align 4
+; CALLS-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[A]] to i64
+; CALLS-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080
+; CALLS-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CALLS-NEXT:    [[TMP6:%.*]] = add i64 [[TMP4]], 17592186044416
+; CALLS-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CALLS-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP5]], align 4
+; CALLS-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+; CALLS-NEXT:    [[TMP9:%.*]] = xor i32 [[TMP2]], 0
+; CALLS-NEXT:    [[TMP10:%.*]] = or i32 [[_MSLD]], 0
+; CALLS-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[TMP10]], 0
+; CALLS-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], -1
+; CALLS-NEXT:    [[TMP13:%.*]] = and i32 [[TMP12]], [[TMP9]]
+; CALLS-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 0
+; CALLS-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 [[TMP11]], [[TMP14]]
+; CALLS-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP2]], 0
+; CALLS-NEXT:    [[TMP15:%.*]] = zext i1 [[_MSPROP_ICMP]] to i8
+; CALLS-NEXT:    call void @__msan_maybe_warning_1(i8 zeroext [[TMP15]], i32 zeroext [[TMP8]])
+; CALLS-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CALLS:       [[IF_THEN]]:
+; CALLS-NEXT:    store i64 0, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CALLS-NEXT:    tail call void (...) @foo() #[[ATTR5:[0-9]+]]
+; CALLS-NEXT:    br label %[[IF_END]]
+; CALLS:       [[IF_END]]:
+; CALLS-NEXT:    ret void
+;
 entry:
   %0 = load i32, ptr %a, align 4
   %tobool = icmp eq i32 %0, 0
@@ -86,44 +250,269 @@ if.end:                                           ; preds = %entry, %if.then
 
 declare void @foo(...)
 
-; CHECK-LABEL: @LoadAndCmp
-; CHECK: %0 = load i32,
-; CHECK: = load
-; ORIGINS: %[[ORIGIN:.*]] = load
-; NOORIGINS: call void @__msan_warning_noreturn()
-; ORIGINS: call void @__msan_warning_with_origin_noreturn(i32 %[[ORIGIN]])
-; CHECK-CONT:
-; CHECK-NEXT: unreachable
-; CHECK: br i1 %tobool
-; CHECK: ret void
-
 ; Check that we store the shadow for the retval.
 define i32 @ReturnInt() nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define i32 @ReturnInt(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 123
+;
+; ORIGIN-LABEL: define i32 @ReturnInt(
+; ORIGIN-SAME: ) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 123
+;
+; CALLS-LABEL: define i32 @ReturnInt(
+; CALLS-SAME: ) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 123
+;
 entry:
   ret i32 123
 }
 
-; CHECK-LABEL: @ReturnInt
-; CHECK: store i32 0,{{.*}}__msan_retval_tls
-; CHECK: ret i32
 
 ; Check that we get the shadow for the retval.
 define void @CopyRetVal(ptr nocapture %a) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @CopyRetVal(
+; CHECK-SAME: ptr nocapture [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @ReturnInt() #[[ATTR5]]
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080
+; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    store i32 [[_MSRET]], ptr [[TMP2]], align 4
+; CHECK-NEXT:    store i32 [[CALL]], ptr [[A]], align 4
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @CopyRetVal(
+; ORIGIN-SAME: ptr nocapture [[A:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    [[CALL:%.*]] = tail call i32 @ReturnInt() #[[ATTR5]]
+; ORIGIN-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; ORIGIN-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; ORIGIN-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; ORIGIN-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; ORIGIN-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; ORIGIN-NEXT:    store i32 [[_MSRET]], ptr [[TMP3]], align 4
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSRET]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB6]]:
+; ORIGIN-NEXT:    store i32 [[TMP0]], ptr [[TMP5]], align 4
+; ORIGIN-NEXT:    br label %[[BB7]]
+; ORIGIN:       [[BB7]]:
+; ORIGIN-NEXT:    store i32 [[CALL]], ptr [[A]], align 4
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @CopyRetVal(
+; CALLS-SAME: ptr nocapture [[A:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    [[CALL:%.*]] = tail call i32 @ReturnInt() #[[ATTR5]]
+; CALLS-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[A]] to i64
+; CALLS-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080
+; CALLS-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CALLS-NEXT:    [[TMP6:%.*]] = add i64 [[TMP4]], 17592186044416
+; CALLS-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CALLS-NEXT:    store i32 [[_MSRET]], ptr [[TMP5]], align 4
+; CALLS-NEXT:    call void @__msan_maybe_store_origin_4(i32 zeroext [[_MSRET]], ptr [[A]], i32 zeroext [[TMP2]])
+; CALLS-NEXT:    store i32 [[CALL]], ptr [[A]], align 4
+; CALLS-NEXT:    ret void
+;
 entry:
   %call = tail call i32 @ReturnInt() nounwind
   store i32 %call, ptr %a, align 4
   ret void
 }
 
-; CHECK-LABEL: @CopyRetVal
-; CHECK: load{{.*}}__msan_retval_tls
-; CHECK: store
-; CHECK: store
-; CHECK: ret void
 
 
 ; Check that we generate PHIs for shadow.
 define void @FuncWithPhi(ptr nocapture %a, ptr %b, ptr nocapture %c) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @FuncWithPhi(
+; CHECK-SAME: ptr nocapture [[A:%.*]], ptr [[B:%.*]], ptr nocapture [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[TMP0]], 0
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP3]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = and i64 [[TMP5]], [[TMP2]]
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[TMP6]], 0
+; CHECK-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 [[TMP4]], [[TMP7]]
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq ptr [[B]], null
+; CHECK-NEXT:    br i1 [[_MSPROP_ICMP]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK:       [[BB8]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR12]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB9]]:
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[B]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-NEXT:    [[_MSLD1:%.*]] = load i32, ptr [[TMP13]], align 4
+; CHECK-NEXT:    br label %[[IF_END:.*]]
+; CHECK:       [[IF_ELSE]]:
+; CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[C]], align 4
+; CHECK-NEXT:    [[TMP15:%.*]] = ptrtoint ptr [[C]] to i64
+; CHECK-NEXT:    [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080
+; CHECK-NEXT:    [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP17]], align 4
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    [[_MSPHI_S:%.*]] = phi i32 [ [[_MSLD1]], %[[IF_THEN]] ], [ [[_MSLD]], %[[IF_ELSE]] ]
+; CHECK-NEXT:    [[T_0:%.*]] = phi i32 [ [[TMP10]], %[[IF_THEN]] ], [ [[TMP14]], %[[IF_ELSE]] ]
+; CHECK-NEXT:    [[TMP18:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP19:%.*]] = xor i64 [[TMP18]], 87960930222080
+; CHECK-NEXT:    [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr
+; CHECK-NEXT:    store i32 [[_MSPHI_S]], ptr [[TMP20]], align 4
+; CHECK-NEXT:    store i32 [[T_0]], ptr [[A]], align 4
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @FuncWithPhi(
+; ORIGIN-SAME: ptr nocapture [[A:%.*]], ptr [[B:%.*]], ptr nocapture [[C:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[B]] to i64
+; ORIGIN-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 0
+; ORIGIN-NEXT:    [[TMP4:%.*]] = or i64 [[TMP0]], 0
+; ORIGIN-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP4]], -1
+; ORIGIN-NEXT:    [[TMP7:%.*]] = and i64 [[TMP6]], [[TMP3]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[TMP7]], 0
+; ORIGIN-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 [[TMP5]], [[TMP8]]
+; ORIGIN-NEXT:    [[TOBOOL:%.*]] = icmp eq ptr [[B]], null
+; ORIGIN-NEXT:    br i1 [[_MSPROP_ICMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB9]]:
+; ORIGIN-NEXT:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP1]]) #[[ATTR12]]
+; ORIGIN-NEXT:    unreachable
+; ORIGIN:       [[BB10]]:
+; ORIGIN-NEXT:    br i1 [[TOBOOL]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; ORIGIN:       [[IF_THEN]]:
+; ORIGIN-NEXT:    [[TMP11:%.*]] = load i32, ptr [[B]], align 4
+; ORIGIN-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080
+; ORIGIN-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; ORIGIN-NEXT:    [[TMP15:%.*]] = add i64 [[TMP13]], 17592186044416
+; ORIGIN-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; ORIGIN-NEXT:    [[_MSLD1:%.*]] = load i32, ptr [[TMP14]], align 4
+; ORIGIN-NEXT:    [[TMP17:%.*]] = load i32, ptr [[TMP16]], align 4
+; ORIGIN-NEXT:    br label %[[IF_END:.*]]
+; ORIGIN:       [[IF_ELSE]]:
+; ORIGIN-NEXT:    [[TMP18:%.*]] = load i32, ptr [[C]], align 4
+; ORIGIN-NEXT:    [[TMP19:%.*]] = ptrtoint ptr [[C]] to i64
+; ORIGIN-NEXT:    [[TMP20:%.*]] = xor i64 [[TMP19]], 87960930222080
+; ORIGIN-NEXT:    [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr
+; ORIGIN-NEXT:    [[TMP22:%.*]] = add i64 [[TMP20]], 17592186044416
+; ORIGIN-NEXT:    [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr
+; ORIGIN-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP21]], align 4
+; ORIGIN-NEXT:    [[TMP24:%.*]] = load i32, ptr [[TMP23]], align 4
+; ORIGIN-NEXT:    br label %[[IF_END]]
+; ORIGIN:       [[IF_END]]:
+; ORIGIN-NEXT:    [[_MSPHI_S:%.*]] = phi i32 [ [[_MSLD1]], %[[IF_THEN]] ], [ [[_MSLD]], %[[IF_ELSE]] ]
+; ORIGIN-NEXT:    [[_MSPHI_O:%.*]] = phi i32 [ [[TMP17]], %[[IF_THEN]] ], [ [[TMP24]], %[[IF_ELSE]] ]
+; ORIGIN-NEXT:    [[T_0:%.*]] = phi i32 [ [[TMP11]], %[[IF_THEN]] ], [ [[TMP18]], %[[IF_ELSE]] ]
+; ORIGIN-NEXT:    [[TMP25:%.*]] = ptrtoint ptr [[A]] to i64
+; ORIGIN-NEXT:    [[TMP26:%.*]] = xor i64 [[TMP25]], 87960930222080
+; ORIGIN-NEXT:    [[TMP27:%.*]] = inttoptr i64 [[TMP26]] to ptr
+; ORIGIN-NEXT:    [[TMP28:%.*]] = add i64 [[TMP26]], 17592186044416
+; ORIGIN-NEXT:    [[TMP29:%.*]] = inttoptr i64 [[TMP28]] to ptr
+; ORIGIN-NEXT:    store i32 [[_MSPHI_S]], ptr [[TMP27]], align 4
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSPHI_S]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB30]]:
+; ORIGIN-NEXT:    store i32 [[_MSPHI_O]], ptr [[TMP29]], align 4
+; ORIGIN-NEXT:    br label %[[BB31]]
+; ORIGIN:       [[BB31]]:
+; ORIGIN-NEXT:    store i32 [[T_0]], ptr [[A]], align 4
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @FuncWithPhi(
+; CALLS-SAME: ptr nocapture [[A:%.*]], ptr [[B:%.*]], ptr nocapture [[C:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; CALLS-NEXT:    [[TMP4:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP5:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[B]] to i64
+; CALLS-NEXT:    [[TMP7:%.*]] = xor i64 [[TMP6]], 0
+; CALLS-NEXT:    [[TMP8:%.*]] = or i64 [[TMP0]], 0
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp ne i64 [[TMP8]], 0
+; CALLS-NEXT:    [[TMP10:%.*]] = xor i64 [[TMP8]], -1
+; CALLS-NEXT:    [[TMP11:%.*]] = and i64 [[TMP10]], [[TMP7]]
+; CALLS-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[TMP11]], 0
+; CALLS-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 [[TMP9]], [[TMP12]]
+; CALLS-NEXT:    [[TOBOOL:%.*]] = icmp eq ptr [[B]], null
+; CALLS-NEXT:    [[TMP13:%.*]] = zext i1 [[_MSPROP_ICMP]] to i8
+; CALLS-NEXT:    call void @__msan_maybe_warning_1(i8 zeroext [[TMP13]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    br i1 [[TOBOOL]], label %[[IF_ELSE:.*]], label %[[IF_THEN:.*]]
+; CALLS:       [[IF_THEN]]:
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP14:%.*]] = load i32, ptr [[B]], align 4
+; CALLS-NEXT:    [[TMP15:%.*]] = ptrtoint ptr [[B]] to i64
+; CALLS-NEXT:    [[TMP16:%.*]] = xor i64 [[TMP15]], 87960930222080
+; CALLS-NEXT:    [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr
+; CALLS-NEXT:    [[TMP18:%.*]] = add i64 [[TMP16]], 17592186044416
+; CALLS-NEXT:    [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
+; CALLS-NEXT:    [[_MSLD1:%.*]] = load i32, ptr [[TMP17]], align 4
+; CALLS-NEXT:    [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4
+; CALLS-NEXT:    br label %[[IF_END:.*]]
+; CALLS:       [[IF_ELSE]]:
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP2]], i32 zeroext [[TMP3]])
+; CALLS-NEXT:    [[TMP21:%.*]] = load i32, ptr [[C]], align 4
+; CALLS-NEXT:    [[TMP22:%.*]] = ptrtoint ptr [[C]] to i64
+; CALLS-NEXT:    [[TMP23:%.*]] = xor i64 [[TMP22]], 87960930222080
+; CALLS-NEXT:    [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr
+; CALLS-NEXT:    [[TMP25:%.*]] = add i64 [[TMP23]], 17592186044416
+; CALLS-NEXT:    [[TMP26:%.*]] = inttoptr i64 [[TMP25]] to ptr
+; CALLS-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP24]], align 4
+; CALLS-NEXT:    [[TMP27:%.*]] = load i32, ptr [[TMP26]], align 4
+; CALLS-NEXT:    br label %[[IF_END]]
+; CALLS:       [[IF_END]]:
+; CALLS-NEXT:    [[_MSPHI_S:%.*]] = phi i32 [ [[_MSLD1]], %[[IF_THEN]] ], [ [[_MSLD]], %[[IF_ELSE]] ]
+; CALLS-NEXT:    [[_MSPHI_O:%.*]] = phi i32 [ [[TMP20]], %[[IF_THEN]] ], [ [[TMP27]], %[[IF_ELSE]] ]
+; CALLS-NEXT:    [[T_0:%.*]] = phi i32 [ [[TMP14]], %[[IF_THEN]] ], [ [[TMP21]], %[[IF_ELSE]] ]
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP4]], i32 zeroext [[TMP5]])
+; CALLS-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[A]] to i64
+; CALLS-NEXT:    [[TMP29:%.*]] = xor i64 [[TMP28]], 87960930222080
+; CALLS-NEXT:    [[TMP30:%.*]] = inttoptr i64 [[TMP29]] to ptr
+; CALLS-NEXT:    [[TMP31:%.*]] = add i64 [[TMP29]], 17592186044416
+; CALLS-NEXT:    [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr
+; CALLS-NEXT:    store i32 [[_MSPHI_S]], ptr [[TMP30]], align 4
+; CALLS-NEXT:    call void @__msan_maybe_store_origin_4(i32 zeroext [[_MSPHI_S]], ptr [[A]], i32 zeroext [[_MSPHI_O]])
+; CALLS-NEXT:    store i32 [[T_0]], ptr [[A]], align 4
+; CALLS-NEXT:    ret void
+;
 entry:
   %tobool = icmp eq ptr %b, null
   br i1 %tobool, label %if.else, label %if.then
@@ -142,17 +531,86 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @FuncWithPhi
-; NOORIGINS: call void @__msan_warning_noreturn()
-; ORIGINS: call void @__msan_warning_with_origin_noreturn(i32
-; CHECK: = phi
-; CHECK-NEXT: = phi
-; CHECK: store
-; CHECK: store
-; CHECK: ret void
-
 ; Compute shadow for "x << 10"
 define void @ShlConst(ptr nocapture %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @ShlConst(
+; CHECK-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = shl i32 [[_MSLD]], 10
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], 0
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 [[TMP0]], 10
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT:    store i32 [[TMP5]], ptr [[TMP9]], align 4
+; CHECK-NEXT:    store i32 [[TMP6]], ptr [[X]], align 4
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @ShlConst(
+; ORIGIN-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4
+; ORIGIN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[X]] to i64
+; ORIGIN-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; ORIGIN-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; ORIGIN-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; ORIGIN-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; ORIGIN-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP3]], align 4
+; ORIGIN-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+; ORIGIN-NEXT:    [[TMP7:%.*]] = shl i32 [[_MSLD]], 10
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], 0
+; ORIGIN-NEXT:    [[TMP9:%.*]] = shl i32 [[TMP0]], 10
+; ORIGIN-NEXT:    [[TMP10:%.*]] = ptrtoint ptr [[X]] to i64
+; ORIGIN-NEXT:    [[TMP11:%.*]] = xor i64 [[TMP10]], 87960930222080
+; ORIGIN-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; ORIGIN-NEXT:    [[TMP13:%.*]] = add i64 [[TMP11]], 17592186044416
+; ORIGIN-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; ORIGIN-NEXT:    store i32 [[TMP8]], ptr [[TMP12]], align 4
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP8]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB15]]:
+; ORIGIN-NEXT:    store i32 [[TMP6]], ptr [[TMP14]], align 4
+; ORIGIN-NEXT:    br label %[[BB16]]
+; ORIGIN:       [[BB16]]:
+; ORIGIN-NEXT:    store i32 [[TMP9]], ptr [[X]], align 4
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @ShlConst(
+; CALLS-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+; CALLS-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[X]] to i64
+; CALLS-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080
+; CALLS-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CALLS-NEXT:    [[TMP6:%.*]] = add i64 [[TMP4]], 17592186044416
+; CALLS-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CALLS-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP5]], align 4
+; CALLS-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+; CALLS-NEXT:    [[TMP9:%.*]] = shl i32 [[_MSLD]], 10
+; CALLS-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], 0
+; CALLS-NEXT:    [[TMP11:%.*]] = shl i32 [[TMP2]], 10
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP12:%.*]] = ptrtoint ptr [[X]] to i64
+; CALLS-NEXT:    [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080
+; CALLS-NEXT:    [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CALLS-NEXT:    [[TMP15:%.*]] = add i64 [[TMP13]], 17592186044416
+; CALLS-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; CALLS-NEXT:    store i32 [[TMP10]], ptr [[TMP14]], align 4
+; CALLS-NEXT:    call void @__msan_maybe_store_origin_4(i32 zeroext [[TMP10]], ptr [[X]], i32 zeroext [[TMP8]])
+; CALLS-NEXT:    store i32 [[TMP11]], ptr [[X]], align 4
+; CALLS-NEXT:    ret void
+;
 entry:
   %0 = load i32, ptr %x, align 4
   %1 = shl i32 %0, 10
@@ -160,17 +618,97 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @ShlConst
-; CHECK: = load
-; CHECK: = load
-; CHECK: shl
-; CHECK: shl
-; CHECK: store
-; CHECK: store
-; CHECK: ret void
 
 ; Compute shadow for "10 << x": it should have 'sext i1'.
 define void @ShlNonConst(ptr nocapture %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @ShlNonConst(
+; CHECK-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[_MSLD]], 0
+; CHECK-NEXT:    [[TMP5:%.*]] = sext i1 [[TMP4]] to i32
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i32 0, [[TMP0]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP5]]
+; CHECK-NEXT:    [[TMP8:%.*]] = shl i32 10, [[TMP0]]
+; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[X]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080
+; CHECK-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; CHECK-NEXT:    store i32 [[TMP7]], ptr [[TMP11]], align 4
+; CHECK-NEXT:    store i32 [[TMP8]], ptr [[X]], align 4
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @ShlNonConst(
+; ORIGIN-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4
+; ORIGIN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[X]] to i64
+; ORIGIN-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; ORIGIN-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; ORIGIN-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; ORIGIN-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; ORIGIN-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP3]], align 4
+; ORIGIN-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 4
+; ORIGIN-NEXT:    [[TMP7:%.*]] = icmp ne i32 [[_MSLD]], 0
+; ORIGIN-NEXT:    [[TMP8:%.*]] = sext i1 [[TMP7]] to i32
+; ORIGIN-NEXT:    [[TMP9:%.*]] = shl i32 0, [[TMP0]]
+; ORIGIN-NEXT:    [[TMP10:%.*]] = or i32 [[TMP9]], [[TMP8]]
+; ORIGIN-NEXT:    [[TMP11:%.*]] = icmp ne i32 [[_MSLD]], 0
+; ORIGIN-NEXT:    [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[TMP6]], i32 0
+; ORIGIN-NEXT:    [[TMP13:%.*]] = shl i32 10, [[TMP0]]
+; ORIGIN-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[X]] to i64
+; ORIGIN-NEXT:    [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080
+; ORIGIN-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; ORIGIN-NEXT:    [[TMP17:%.*]] = add i64 [[TMP15]], 17592186044416
+; ORIGIN-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
+; ORIGIN-NEXT:    store i32 [[TMP10]], ptr [[TMP16]], align 4
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP10]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB19:.*]], label %[[BB20:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB19]]:
+; ORIGIN-NEXT:    store i32 [[TMP12]], ptr [[TMP18]], align 4
+; ORIGIN-NEXT:    br label %[[BB20]]
+; ORIGIN:       [[BB20]]:
+; ORIGIN-NEXT:    store i32 [[TMP13]], ptr [[X]], align 4
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @ShlNonConst(
+; CALLS-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X]], align 4
+; CALLS-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[X]] to i64
+; CALLS-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080
+; CALLS-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CALLS-NEXT:    [[TMP6:%.*]] = add i64 [[TMP4]], 17592186044416
+; CALLS-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CALLS-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP5]], align 4
+; CALLS-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 4
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[_MSLD]], 0
+; CALLS-NEXT:    [[TMP10:%.*]] = sext i1 [[TMP9]] to i32
+; CALLS-NEXT:    [[TMP11:%.*]] = shl i32 0, [[TMP2]]
+; CALLS-NEXT:    [[TMP12:%.*]] = or i32 [[TMP11]], [[TMP10]]
+; CALLS-NEXT:    [[TMP13:%.*]] = icmp ne i32 [[_MSLD]], 0
+; CALLS-NEXT:    [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[TMP8]], i32 0
+; CALLS-NEXT:    [[TMP15:%.*]] = shl i32 10, [[TMP2]]
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP16:%.*]] = ptrtoint ptr [[X]] to i64
+; CALLS-NEXT:    [[TMP17:%.*]] = xor i64 [[TMP16]], 87960930222080
+; CALLS-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
+; CALLS-NEXT:    [[TMP19:%.*]] = add i64 [[TMP17]], 17592186044416
+; CALLS-NEXT:    [[TMP20:%.*]] = inttoptr i64 [[TMP19]] to ptr
+; CALLS-NEXT:    store i32 [[TMP12]], ptr [[TMP18]], align 4
+; CALLS-NEXT:    call void @__msan_maybe_store_origin_4(i32 zeroext [[TMP12]], ptr [[X]], i32 zeroext [[TMP14]])
+; CALLS-NEXT:    store i32 [[TMP15]], ptr [[X]], align 4
+; CALLS-NEXT:    ret void
+;
 entry:
   %0 = load i32, ptr %x, align 4
   %1 = shl i32 10, %0
@@ -178,16 +716,88 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @ShlNonConst
-; CHECK: = load
-; CHECK: = load
-; CHECK: = sext i1
-; CHECK: store
-; CHECK: store
-; CHECK: ret void
 
 ; SExt
 define void @SExt(ptr nocapture %a, ptr nocapture %b) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @SExt(
+; CHECK-SAME: ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[B]], align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i16, ptr [[TMP3]], align 2
+; CHECK-NEXT:    [[_MSPROP:%.*]] = sext i16 [[_MSLD]] to i32
+; CHECK-NEXT:    [[TMP4:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    store i32 [[_MSPROP]], ptr [[TMP7]], align 4
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[A]], align 4
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @SExt(
+; ORIGIN-SAME: ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i16, ptr [[B]], align 2
+; ORIGIN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[B]] to i64
+; ORIGIN-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; ORIGIN-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; ORIGIN-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; ORIGIN-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], -4
+; ORIGIN-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; ORIGIN-NEXT:    [[_MSLD:%.*]] = load i16, ptr [[TMP3]], align 2
+; ORIGIN-NEXT:    [[TMP7:%.*]] = load i32, ptr [[TMP6]], align 4
+; ORIGIN-NEXT:    [[_MSPROP:%.*]] = sext i16 [[_MSLD]] to i32
+; ORIGIN-NEXT:    [[TMP8:%.*]] = sext i16 [[TMP0]] to i32
+; ORIGIN-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[A]] to i64
+; ORIGIN-NEXT:    [[TMP10:%.*]] = xor i64 [[TMP9]], 87960930222080
+; ORIGIN-NEXT:    [[TMP11:%.*]] = inttoptr i64 [[TMP10]] to ptr
+; ORIGIN-NEXT:    [[TMP12:%.*]] = add i64 [[TMP10]], 17592186044416
+; ORIGIN-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; ORIGIN-NEXT:    store i32 [[_MSPROP]], ptr [[TMP11]], align 4
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB14]]:
+; ORIGIN-NEXT:    store i32 [[TMP7]], ptr [[TMP13]], align 4
+; ORIGIN-NEXT:    br label %[[BB15]]
+; ORIGIN:       [[BB15]]:
+; ORIGIN-NEXT:    store i32 [[TMP8]], ptr [[A]], align 4
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @SExt(
+; CALLS-SAME: ptr nocapture [[A:%.*]], ptr nocapture [[B:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP4:%.*]] = load i16, ptr [[B]], align 2
+; CALLS-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[B]] to i64
+; CALLS-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CALLS-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CALLS-NEXT:    [[TMP8:%.*]] = add i64 [[TMP6]], 17592186044416
+; CALLS-NEXT:    [[TMP9:%.*]] = and i64 [[TMP8]], -4
+; CALLS-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CALLS-NEXT:    [[_MSLD:%.*]] = load i16, ptr [[TMP7]], align 2
+; CALLS-NEXT:    [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
+; CALLS-NEXT:    [[_MSPROP:%.*]] = sext i16 [[_MSLD]] to i32
+; CALLS-NEXT:    [[TMP12:%.*]] = sext i16 [[TMP4]] to i32
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP2]], i32 zeroext [[TMP3]])
+; CALLS-NEXT:    [[TMP13:%.*]] = ptrtoint ptr [[A]] to i64
+; CALLS-NEXT:    [[TMP14:%.*]] = xor i64 [[TMP13]], 87960930222080
+; CALLS-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
+; CALLS-NEXT:    [[TMP16:%.*]] = add i64 [[TMP14]], 17592186044416
+; CALLS-NEXT:    [[TMP17:%.*]] = inttoptr i64 [[TMP16]] to ptr
+; CALLS-NEXT:    store i32 [[_MSPROP]], ptr [[TMP15]], align 4
+; CALLS-NEXT:    call void @__msan_maybe_store_origin_4(i32 zeroext [[_MSPROP]], ptr [[A]], i32 zeroext [[TMP11]])
+; CALLS-NEXT:    store i32 [[TMP12]], ptr [[A]], align 4
+; CALLS-NEXT:    ret void
+;
 entry:
   %0 = load i16, ptr %b, align 2
   %1 = sext i16 %0 to i32
@@ -195,18 +805,31 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @SExt
-; CHECK: = load
-; CHECK: = load
-; CHECK: = sext
-; CHECK: = sext
-; CHECK: store
-; CHECK: store
-; CHECK: ret void
 
 
 ; memset
 define void @MemSet(ptr nocapture %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @MemSet(
+; CHECK-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_memset(ptr [[X]], i32 42, i64 10)
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @MemSet(
+; ORIGIN-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP0:%.*]] = call ptr @__msan_memset(ptr [[X]], i32 42, i64 10)
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @MemSet(
+; CALLS-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP0:%.*]] = call ptr @__msan_memset(ptr [[X]], i32 42, i64 10)
+; CALLS-NEXT:    ret void
+;
 entry:
   call void @llvm.memset.p0.i64(ptr %x, i8 42, i64 10, i1 false)
   ret void
@@ -214,13 +837,36 @@ entry:
 
 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
 
-; CHECK-LABEL: @MemSet
-; CHECK: call ptr @__msan_memset
-; CHECK: ret void
 
 
 ; memcpy
 define void @MemCpy(ptr nocapture %x, ptr nocapture %y) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @MemCpy(
+; CHECK-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @__msan_memcpy(ptr [[X]], ptr [[Y]], i64 10)
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @MemCpy(
+; ORIGIN-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP2:%.*]] = call ptr @__msan_memcpy(ptr [[X]], ptr [[Y]], i64 10)
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @MemCpy(
+; CALLS-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP2:%.*]] = call ptr @__msan_memcpy(ptr [[X]], ptr [[Y]], i64 10)
+; CALLS-NEXT:    ret void
+;
 entry:
   call void @llvm.memcpy.p0.p0.i64(ptr %x, ptr %y, i64 10, i1 false)
   ret void
@@ -228,12 +874,30 @@ entry:
 
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
 
-; CHECK-LABEL: @MemCpy
-; CHECK: call ptr @__msan_memcpy
-; CHECK: ret void
 
 ; memset.inline
 define void @MemSetInline(ptr nocapture %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @MemSetInline(
+; CHECK-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP0:%.*]] = call ptr @__msan_memset(ptr [[X]], i32 42, i64 10)
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @MemSetInline(
+; ORIGIN-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP0:%.*]] = call ptr @__msan_memset(ptr [[X]], i32 42, i64 10)
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @MemSetInline(
+; CALLS-SAME: ptr nocapture [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP0:%.*]] = call ptr @__msan_memset(ptr [[X]], i32 42, i64 10)
+; CALLS-NEXT:    ret void
+;
 entry:
   call void @llvm.memset.inline.p0.i64(ptr %x, i8 42, i64 10, i1 false)
   ret void
@@ -241,12 +905,35 @@ entry:
 
 declare void @llvm.memset.inline.p0.i64(ptr nocapture, i8, i64, i1) nounwind
 
-; CHECK-LABEL: @MemSetInline
-; CHECK: call ptr @__msan_memset
-; CHECK: ret void
 
 ; memcpy.inline
 define void @MemCpyInline(ptr nocapture %x, ptr nocapture %y) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @MemCpyInline(
+; CHECK-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @__msan_memcpy(ptr [[X]], ptr [[Y]], i64 10)
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @MemCpyInline(
+; ORIGIN-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP2:%.*]] = call ptr @__msan_memcpy(ptr [[X]], ptr [[Y]], i64 10)
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @MemCpyInline(
+; CALLS-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP2:%.*]] = call ptr @__msan_memcpy(ptr [[X]], ptr [[Y]], i64 10)
+; CALLS-NEXT:    ret void
+;
 entry:
   call void @llvm.memcpy.inline.p0.p0.i64(ptr %x, ptr %y, i64 10, i1 false)
   ret void
@@ -254,12 +941,35 @@ entry:
 
 declare void @llvm.memcpy.inline.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
 
-; CHECK-LABEL: @MemCpyInline
-; CHECK: call ptr @__msan_memcpy
-; CHECK: ret void
 
 ; memmove is lowered to a call
 define void @MemMove(ptr nocapture %x, ptr nocapture %y) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @MemMove(
+; CHECK-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = call ptr @__msan_memmove(ptr [[X]], ptr [[Y]], i64 10)
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @MemMove(
+; ORIGIN-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP2:%.*]] = call ptr @__msan_memmove(ptr [[X]], ptr [[Y]], i64 10)
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @MemMove(
+; CALLS-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP2:%.*]] = call ptr @__msan_memmove(ptr [[X]], ptr [[Y]], i64 10)
+; CALLS-NEXT:    ret void
+;
 entry:
   call void @llvm.memmove.p0.p0.i64(ptr %x, ptr %y, i64 10, i1 false)
   ret void
@@ -267,9 +977,6 @@ entry:
 
 declare void @llvm.memmove.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
 
-; CHECK-LABEL: @MemMove
-; CHECK: call ptr @__msan_memmove
-; CHECK: ret void
 
 ;; ------------
 ;; Placeholder tests that will fail once element atomic @llvm.mem[cpy|move|set] intrinsics have
@@ -281,28 +988,70 @@ declare void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr nocapture writ
 declare void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32) nounwind
 
 define void @atomic_memcpy(ptr nocapture %x, ptr nocapture %y) nounwind {
-  ; CHECK-LABEL: atomic_memcpy
-  ; CHECK-NEXT: call void @llvm.donothing
-  ; CHECK-NEXT: call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 %x, ptr align 2 %y, i64 16, i32 1)
-  ; CHECK-NEXT: ret void
+; CHECK-LABEL: define void @atomic_memcpy(
+; CHECK-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR5]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 [[X]], ptr align 2 [[Y]], i64 16, i32 1)
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @atomic_memcpy(
+; ORIGIN-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR5]] {
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 [[X]], ptr align 2 [[Y]], i64 16, i32 1)
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @atomic_memcpy(
+; CALLS-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR5]] {
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 [[X]], ptr align 2 [[Y]], i64 16, i32 1)
+; CALLS-NEXT:    ret void
+;
   call void @llvm.memcpy.element.unordered.atomic.p0.p0.i64(ptr align 1 %x, ptr align 2 %y, i64 16, i32 1)
   ret void
 }
 
 define void @atomic_memmove(ptr nocapture %x, ptr nocapture %y) nounwind {
-  ; CHECK-LABEL: atomic_memmove
-  ; CHECK-NEXT: call void @llvm.donothing
-  ; CHECK-NEXT: call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 %x, ptr align 2 %y, i64 16, i32 1)
-  ; CHECK-NEXT: ret void
+; CHECK-LABEL: define void @atomic_memmove(
+; CHECK-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR5]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 [[X]], ptr align 2 [[Y]], i64 16, i32 1)
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @atomic_memmove(
+; ORIGIN-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR5]] {
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 [[X]], ptr align 2 [[Y]], i64 16, i32 1)
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @atomic_memmove(
+; CALLS-SAME: ptr nocapture [[X:%.*]], ptr nocapture [[Y:%.*]]) #[[ATTR5]] {
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 [[X]], ptr align 2 [[Y]], i64 16, i32 1)
+; CALLS-NEXT:    ret void
+;
   call void @llvm.memmove.element.unordered.atomic.p0.p0.i64(ptr align 1 %x, ptr align 2 %y, i64 16, i32 1)
   ret void
 }
 
 define void @atomic_memset(ptr nocapture %x) nounwind {
-  ; CHECK-LABEL: atomic_memset
-  ; CHECK-NEXT: call void @llvm.donothing
-  ; CHECK-NEXT: call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 1 %x, i8 88, i64 16, i32 1)
-  ; CHECK-NEXT: ret void
+; CHECK-LABEL: define void @atomic_memset(
+; CHECK-SAME: ptr nocapture [[X:%.*]]) #[[ATTR5]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 1 [[X]], i8 88, i64 16, i32 1)
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @atomic_memset(
+; ORIGIN-SAME: ptr nocapture [[X:%.*]]) #[[ATTR5]] {
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 1 [[X]], i8 88, i64 16, i32 1)
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @atomic_memset(
+; CALLS-SAME: ptr nocapture [[X:%.*]]) #[[ATTR5]] {
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 1 [[X]], i8 88, i64 16, i32 1)
+; CALLS-NEXT:    ret void
+;
   call void @llvm.memset.element.unordered.atomic.p0.i64(ptr align 1 %x, i8 88, i64 16, i32 1)
   ret void
 }
@@ -313,442 +1062,1904 @@ define void @atomic_memset(ptr nocapture %x) nounwind {
 ; Check that we propagate shadow for "select"
 
 define i32 @Select(i32 %a, i32 %b, i1 %c) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define i32 @Select(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i1, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[C]], i32 [[TMP1]], i32 [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[A]], [[B]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = or i32 [[TMP5]], [[TMP2]]
+; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], i32 [[TMP6]], i32 [[TMP3]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[C]], i32 [[A]], i32 [[B]]
+; CHECK-NEXT:    store i32 [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+; ORIGIN-LABEL: define i32 @Select(
+; ORIGIN-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i1, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP6:%.*]] = select i1 [[C]], i32 [[TMP2]], i32 [[TMP4]]
+; ORIGIN-NEXT:    [[TMP7:%.*]] = xor i32 [[A]], [[B]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP2]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = or i32 [[TMP8]], [[TMP4]]
+; ORIGIN-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], i32 [[TMP9]], i32 [[TMP6]]
+; ORIGIN-NEXT:    [[TMP10:%.*]] = select i1 [[C]], i32 [[TMP3]], i32 [[TMP5]]
+; ORIGIN-NEXT:    [[TMP11:%.*]] = select i1 [[TMP0]], i32 [[TMP1]], i32 [[TMP10]]
+; ORIGIN-NEXT:    [[COND:%.*]] = select i1 [[C]], i32 [[A]], i32 [[B]]
+; ORIGIN-NEXT:    store i32 [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP11]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[COND]]
+;
+; CALLS-LABEL: define i32 @Select(
+; CALLS-SAME: i32 [[A:%.*]], i32 [[B:%.*]], i1 [[C:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i1, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP6:%.*]] = select i1 [[C]], i32 [[TMP2]], i32 [[TMP4]]
+; CALLS-NEXT:    [[TMP7:%.*]] = xor i32 [[A]], [[B]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP2]]
+; CALLS-NEXT:    [[TMP9:%.*]] = or i32 [[TMP8]], [[TMP4]]
+; CALLS-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], i32 [[TMP9]], i32 [[TMP6]]
+; CALLS-NEXT:    [[TMP10:%.*]] = select i1 [[C]], i32 [[TMP3]], i32 [[TMP5]]
+; CALLS-NEXT:    [[TMP11:%.*]] = select i1 [[TMP0]], i32 [[TMP1]], i32 [[TMP10]]
+; CALLS-NEXT:    [[COND:%.*]] = select i1 [[C]], i32 [[A]], i32 [[B]]
+; CALLS-NEXT:    store i32 [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP11]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[COND]]
+;
 entry:
   %cond = select i1 %c, i32 %a, i32 %b
   ret i32 %cond
 }
 
-; CHECK-LABEL: @Select
-; CHECK: select i1
-; CHECK-DAG: or i32
-; CHECK-DAG: xor i32
-; CHECK: or i32
-; CHECK-DAG: select i1
-; ORIGINS-DAG: select
-; ORIGINS-DAG: select
-; CHECK-DAG: select i1
-; CHECK: store i32{{.*}}@__msan_retval_tls
-; ORIGINS: store i32{{.*}}@__msan_retval_origin_tls
-; CHECK: ret i32
-
-
 ; Check that we propagate origin for "select" with vector condition.
 ; Select condition is flattened to i1, which is then used to select one of the
 ; argument origins.
 
 define <8 x i16> @SelectVector(<8 x i16> %a, <8 x i16> %b, <8 x i1> %c) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @SelectVector(
+; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i1>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[C]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <8 x i16> [[A]], [[B]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i16> [[TMP4]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = or <8 x i16> [[TMP5]], [[TMP2]]
+; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[TMP6]], <8 x i16> [[TMP3]]
+; CHECK-NEXT:    [[COND:%.*]] = select <8 x i1> [[C]], <8 x i16> [[A]], <8 x i16> [[B]]
+; CHECK-NEXT:    store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <8 x i16> [[COND]]
+;
+; ORIGIN-LABEL: define <8 x i16> @SelectVector(
+; ORIGIN-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[C:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load <8 x i1>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[C]], <8 x i16> [[TMP2]], <8 x i16> [[TMP4]]
+; ORIGIN-NEXT:    [[TMP7:%.*]] = xor <8 x i16> [[A]], [[B]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], [[TMP2]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP4]]
+; ORIGIN-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[TMP9]], <8 x i16> [[TMP6]]
+; ORIGIN-NEXT:    [[TMP10:%.*]] = bitcast <8 x i1> [[C]] to i8
+; ORIGIN-NEXT:    [[TMP11:%.*]] = icmp ne i8 [[TMP10]], 0
+; ORIGIN-NEXT:    [[TMP12:%.*]] = bitcast <8 x i1> [[TMP0]] to i8
+; ORIGIN-NEXT:    [[TMP13:%.*]] = icmp ne i8 [[TMP12]], 0
+; ORIGIN-NEXT:    [[TMP14:%.*]] = select i1 [[TMP11]], i32 [[TMP3]], i32 [[TMP5]]
+; ORIGIN-NEXT:    [[TMP15:%.*]] = select i1 [[TMP13]], i32 [[TMP1]], i32 [[TMP14]]
+; ORIGIN-NEXT:    [[COND:%.*]] = select <8 x i1> [[C]], <8 x i16> [[A]], <8 x i16> [[B]]
+; ORIGIN-NEXT:    store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP15]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret <8 x i16> [[COND]]
+;
+; CALLS-LABEL: define <8 x i16> @SelectVector(
+; CALLS-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[C:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load <8 x i1>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP6:%.*]] = select <8 x i1> [[C]], <8 x i16> [[TMP2]], <8 x i16> [[TMP4]]
+; CALLS-NEXT:    [[TMP7:%.*]] = xor <8 x i16> [[A]], [[B]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], [[TMP2]]
+; CALLS-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP4]]
+; CALLS-NEXT:    [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP0]], <8 x i16> [[TMP9]], <8 x i16> [[TMP6]]
+; CALLS-NEXT:    [[TMP10:%.*]] = bitcast <8 x i1> [[C]] to i8
+; CALLS-NEXT:    [[TMP11:%.*]] = icmp ne i8 [[TMP10]], 0
+; CALLS-NEXT:    [[TMP12:%.*]] = bitcast <8 x i1> [[TMP0]] to i8
+; CALLS-NEXT:    [[TMP13:%.*]] = icmp ne i8 [[TMP12]], 0
+; CALLS-NEXT:    [[TMP14:%.*]] = select i1 [[TMP11]], i32 [[TMP3]], i32 [[TMP5]]
+; CALLS-NEXT:    [[TMP15:%.*]] = select i1 [[TMP13]], i32 [[TMP1]], i32 [[TMP14]]
+; CALLS-NEXT:    [[COND:%.*]] = select <8 x i1> [[C]], <8 x i16> [[A]], <8 x i16> [[B]]
+; CALLS-NEXT:    store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP15]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret <8 x i16> [[COND]]
+;
 entry:
   %cond = select <8 x i1> %c, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %cond
 }
 
-; CHECK-LABEL: @SelectVector
-; CHECK: select <8 x i1>
-; CHECK-DAG: or <8 x i16>
-; CHECK-DAG: xor <8 x i16>
-; CHECK: or <8 x i16>
-; CHECK-DAG: select <8 x i1>
-; ORIGINS-DAG: select
-; ORIGINS-DAG: select
-; CHECK-DAG: select <8 x i1>
-; CHECK: store <8 x i16>{{.*}}@__msan_retval_tls
-; ORIGINS: store i32{{.*}}@__msan_retval_origin_tls
-; CHECK: ret <8 x i16>
-
-
 ; Check that we propagate origin for "select" with scalar condition and vector
 ; arguments. Select condition shadow is sign-extended to the vector type and
 ; mixed into the result shadow.
 
 define <8 x i16> @SelectVector2(<8 x i16> %a, <8 x i16> %b, i1 %c) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @SelectVector2(
+; CHECK-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i1 [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i1, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[C]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <8 x i16> [[A]], [[B]]
+; CHECK-NEXT:    [[TMP5:%.*]] = or <8 x i16> [[TMP4]], [[TMP1]]
+; CHECK-NEXT:    [[TMP6:%.*]] = or <8 x i16> [[TMP5]], [[TMP2]]
+; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], <8 x i16> [[TMP6]], <8 x i16> [[TMP3]]
+; CHECK-NEXT:    [[COND:%.*]] = select i1 [[C]], <8 x i16> [[A]], <8 x i16> [[B]]
+; CHECK-NEXT:    store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <8 x i16> [[COND]]
+;
+; ORIGIN-LABEL: define <8 x i16> @SelectVector2(
+; ORIGIN-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i1 [[C:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i1, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[TMP2]], <8 x i16> [[TMP4]]
+; ORIGIN-NEXT:    [[TMP7:%.*]] = xor <8 x i16> [[A]], [[B]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], [[TMP2]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP4]]
+; ORIGIN-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], <8 x i16> [[TMP9]], <8 x i16> [[TMP6]]
+; ORIGIN-NEXT:    [[TMP10:%.*]] = select i1 [[C]], i32 [[TMP3]], i32 [[TMP5]]
+; ORIGIN-NEXT:    [[TMP11:%.*]] = select i1 [[TMP0]], i32 [[TMP1]], i32 [[TMP10]]
+; ORIGIN-NEXT:    [[COND:%.*]] = select i1 [[C]], <8 x i16> [[A]], <8 x i16> [[B]]
+; ORIGIN-NEXT:    store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP11]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret <8 x i16> [[COND]]
+;
+; CALLS-LABEL: define <8 x i16> @SelectVector2(
+; CALLS-SAME: <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i1 [[C:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i1, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP6:%.*]] = select i1 [[C]], <8 x i16> [[TMP2]], <8 x i16> [[TMP4]]
+; CALLS-NEXT:    [[TMP7:%.*]] = xor <8 x i16> [[A]], [[B]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or <8 x i16> [[TMP7]], [[TMP2]]
+; CALLS-NEXT:    [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP4]]
+; CALLS-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], <8 x i16> [[TMP9]], <8 x i16> [[TMP6]]
+; CALLS-NEXT:    [[TMP10:%.*]] = select i1 [[C]], i32 [[TMP3]], i32 [[TMP5]]
+; CALLS-NEXT:    [[TMP11:%.*]] = select i1 [[TMP0]], i32 [[TMP1]], i32 [[TMP10]]
+; CALLS-NEXT:    [[COND:%.*]] = select i1 [[C]], <8 x i16> [[A]], <8 x i16> [[B]]
+; CALLS-NEXT:    store <8 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP11]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret <8 x i16> [[COND]]
+;
 entry:
   %cond = select i1 %c, <8 x i16> %a, <8 x i16> %b
   ret <8 x i16> %cond
 }
 
-; CHECK-LABEL: @SelectVector2
-; CHECK: select i1
-; CHECK-DAG: or <8 x i16>
-; CHECK-DAG: xor <8 x i16>
-; CHECK: or <8 x i16>
-; CHECK-DAG: select i1
-; ORIGINS-DAG: select i1
-; ORIGINS-DAG: select i1
-; CHECK-DAG: select i1
-; CHECK: ret <8 x i16>
-
-
 define { i64, i64 } @SelectStruct(i1 zeroext %x, { i64, i64 } %a, { i64, i64 } %b) readnone sanitize_memory {
+; CHECK-LABEL: define { i64, i64 } @SelectStruct(
+; CHECK-SAME: i1 zeroext [[X:%.*]], { i64, i64 } [[A:%.*]], { i64, i64 } [[B:%.*]]) #[[ATTR6:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i1, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[X]], { i64, i64 } [[TMP1]], { i64, i64 } [[TMP2]]
+; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 } [[TMP3]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[X]], { i64, i64 } [[A]], { i64, i64 } [[B]]
+; CHECK-NEXT:    store { i64, i64 } [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { i64, i64 } [[C]]
+;
+; ORIGIN-LABEL: define { i64, i64 } @SelectStruct(
+; ORIGIN-SAME: i1 zeroext [[X:%.*]], { i64, i64 } [[A:%.*]], { i64, i64 } [[B:%.*]]) #[[ATTR6:[0-9]+]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i1, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    [[TMP4:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP6:%.*]] = select i1 [[X]], { i64, i64 } [[TMP2]], { i64, i64 } [[TMP4]]
+; ORIGIN-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 } [[TMP6]]
+; ORIGIN-NEXT:    [[TMP7:%.*]] = select i1 [[X]], i32 [[TMP3]], i32 [[TMP5]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = select i1 [[TMP0]], i32 [[TMP1]], i32 [[TMP7]]
+; ORIGIN-NEXT:    [[C:%.*]] = select i1 [[X]], { i64, i64 } [[A]], { i64, i64 } [[B]]
+; ORIGIN-NEXT:    store { i64, i64 } [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP8]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret { i64, i64 } [[C]]
+;
+; CALLS-LABEL: define { i64, i64 } @SelectStruct(
+; CALLS-SAME: i1 zeroext [[X:%.*]], { i64, i64 } [[A:%.*]], { i64, i64 } [[B:%.*]]) #[[ATTR6:[0-9]+]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i1, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    [[TMP4:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CALLS-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP6:%.*]] = select i1 [[X]], { i64, i64 } [[TMP2]], { i64, i64 } [[TMP4]]
+; CALLS-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 } [[TMP6]]
+; CALLS-NEXT:    [[TMP7:%.*]] = select i1 [[X]], i32 [[TMP3]], i32 [[TMP5]]
+; CALLS-NEXT:    [[TMP8:%.*]] = select i1 [[TMP0]], i32 [[TMP1]], i32 [[TMP7]]
+; CALLS-NEXT:    [[C:%.*]] = select i1 [[X]], { i64, i64 } [[A]], { i64, i64 } [[B]]
+; CALLS-NEXT:    store { i64, i64 } [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP8]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret { i64, i64 } [[C]]
+;
 entry:
   %c = select i1 %x, { i64, i64 } %a, { i64, i64 } %b
   ret { i64, i64 } %c
 }
 
-; CHECK-LABEL: @SelectStruct
-; CHECK: select i1 {{.*}}, { i64, i64 }
-; CHECK-NEXT: select i1 {{.*}}, { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 }
-; ORIGINS: select i1
-; ORIGINS: select i1
-; CHECK-NEXT: select i1 {{.*}}, { i64, i64 }
-; CHECK: ret { i64, i64 }
-
-
 define { ptr, double } @SelectStruct2(i1 zeroext %x, { ptr, double } %a, { ptr, double } %b) readnone sanitize_memory {
+; CHECK-LABEL: define { ptr, double } @SelectStruct2(
+; CHECK-SAME: i1 zeroext [[X:%.*]], { ptr, double } [[A:%.*]], { ptr, double } [[B:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i1, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[X]], { i64, i64 } [[TMP1]], { i64, i64 } [[TMP2]]
+; CHECK-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 } [[TMP3]]
+; CHECK-NEXT:    [[C:%.*]] = select i1 [[X]], { ptr, double } [[A]], { ptr, double } [[B]]
+; CHECK-NEXT:    store { i64, i64 } [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { ptr, double } [[C]]
+;
+; ORIGIN-LABEL: define { ptr, double } @SelectStruct2(
+; ORIGIN-SAME: i1 zeroext [[X:%.*]], { ptr, double } [[A:%.*]], { ptr, double } [[B:%.*]]) #[[ATTR6]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i1, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    [[TMP4:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP6:%.*]] = select i1 [[X]], { i64, i64 } [[TMP2]], { i64, i64 } [[TMP4]]
+; ORIGIN-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 } [[TMP6]]
+; ORIGIN-NEXT:    [[TMP7:%.*]] = select i1 [[X]], i32 [[TMP3]], i32 [[TMP5]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = select i1 [[TMP0]], i32 [[TMP1]], i32 [[TMP7]]
+; ORIGIN-NEXT:    [[C:%.*]] = select i1 [[X]], { ptr, double } [[A]], { ptr, double } [[B]]
+; ORIGIN-NEXT:    store { i64, i64 } [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP8]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret { ptr, double } [[C]]
+;
+; CALLS-LABEL: define { ptr, double } @SelectStruct2(
+; CALLS-SAME: i1 zeroext [[X:%.*]], { ptr, double } [[A:%.*]], { ptr, double } [[B:%.*]]) #[[ATTR6]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i1, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    [[TMP4:%.*]] = load { i64, i64 }, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CALLS-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP6:%.*]] = select i1 [[X]], { i64, i64 } [[TMP2]], { i64, i64 } [[TMP4]]
+; CALLS-NEXT:    [[_MSPROP_SELECT:%.*]] = select i1 [[TMP0]], { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 } [[TMP6]]
+; CALLS-NEXT:    [[TMP7:%.*]] = select i1 [[X]], i32 [[TMP3]], i32 [[TMP5]]
+; CALLS-NEXT:    [[TMP8:%.*]] = select i1 [[TMP0]], i32 [[TMP1]], i32 [[TMP7]]
+; CALLS-NEXT:    [[C:%.*]] = select i1 [[X]], { ptr, double } [[A]], { ptr, double } [[B]]
+; CALLS-NEXT:    store { i64, i64 } [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP8]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret { ptr, double } [[C]]
+;
 entry:
   %c = select i1 %x, { ptr, double } %a, { ptr, double } %b
   ret { ptr, double } %c
 }
 
-; CHECK-LABEL: @SelectStruct2
-; CHECK: select i1 {{.*}}, { i64, i64 }
-; CHECK-NEXT: select i1 {{.*}}, { i64, i64 } { i64 -1, i64 -1 }, { i64, i64 }
-; ORIGINS: select i1
-; ORIGINS: select i1
-; CHECK-NEXT: select i1 {{.*}}, { ptr, double }
-; CHECK: ret { ptr, double }
-
-
 define ptr @IntToPtr(i64 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define ptr @IntToPtr(
+; CHECK-SAME: i64 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[X]] to ptr
+; CHECK-NEXT:    store i64 [[TMP0]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret ptr [[TMP1]]
+;
+; ORIGIN-LABEL: define ptr @IntToPtr(
+; ORIGIN-SAME: i64 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[X]] to ptr
+; ORIGIN-NEXT:    store i64 [[TMP0]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret ptr [[TMP2]]
+;
+; CALLS-LABEL: define ptr @IntToPtr(
+; CALLS-SAME: i64 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[X]] to ptr
+; CALLS-NEXT:    store i64 [[TMP0]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret ptr [[TMP2]]
+;
 entry:
   %0 = inttoptr i64 %x to ptr
   ret ptr %0
 }
 
-; CHECK-LABEL: @IntToPtr
-; CHECK: load i64, ptr{{.*}}__msan_param_tls
-; ORIGINS-NEXT: load i32, ptr{{.*}}__msan_param_origin_tls
-; CHECK-NEXT: call void @llvm.donothing
-; CHECK-NEXT: inttoptr
-; CHECK-NEXT: store i64{{.*}}__msan_retval_tls
-; CHECK: ret ptr
-
-
 define ptr @IntToPtr_ZExt(i16 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define ptr @IntToPtr_ZExt(
+; CHECK-SAME: i16 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP_INTTOPTR:%.*]] = zext i16 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = inttoptr i16 [[X]] to ptr
+; CHECK-NEXT:    store i64 [[_MSPROP_INTTOPTR]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret ptr [[TMP1]]
+;
+; ORIGIN-LABEL: define ptr @IntToPtr_ZExt(
+; ORIGIN-SAME: i16 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i16, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[_MSPROP_INTTOPTR:%.*]] = zext i16 [[TMP0]] to i64
+; ORIGIN-NEXT:    [[TMP2:%.*]] = inttoptr i16 [[X]] to ptr
+; ORIGIN-NEXT:    store i64 [[_MSPROP_INTTOPTR]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret ptr [[TMP2]]
+;
+; CALLS-LABEL: define ptr @IntToPtr_ZExt(
+; CALLS-SAME: i16 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i16, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[_MSPROP_INTTOPTR:%.*]] = zext i16 [[TMP0]] to i64
+; CALLS-NEXT:    [[TMP2:%.*]] = inttoptr i16 [[X]] to ptr
+; CALLS-NEXT:    store i64 [[_MSPROP_INTTOPTR]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret ptr [[TMP2]]
+;
 entry:
   %0 = inttoptr i16 %x to ptr
   ret ptr %0
 }
 
-; CHECK-LABEL: @IntToPtr_ZExt
-; CHECK: load i16, ptr{{.*}}__msan_param_tls
-; CHECK: zext
-; CHECK-NEXT: inttoptr
-; CHECK-NEXT: store i64{{.*}}__msan_retval_tls
-; CHECK: ret ptr
 
 
 ; Check that we insert exactly one check on udiv
 ; (2nd arg shadow is checked, 1st arg shadow is propagated)
 
 define i32 @Div(i32 %a, i32 %b) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define i32 @Div(
+; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB2:.*]], label %[[BB3:.*]], !prof [[PROF1]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR12]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    [[DIV:%.*]] = udiv i32 [[A]], [[B]]
+; CHECK-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+; ORIGIN-LABEL: define i32 @Div(
+; ORIGIN-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP0]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB4]]:
+; ORIGIN-NEXT:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP1]]) #[[ATTR12]]
+; ORIGIN-NEXT:    unreachable
+; ORIGIN:       [[BB5]]:
+; ORIGIN-NEXT:    [[DIV:%.*]] = udiv i32 [[A]], [[B]]
+; ORIGIN-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[DIV]]
+;
+; CALLS-LABEL: define i32 @Div(
+; CALLS-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @__msan_maybe_warning_4(i32 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[DIV:%.*]] = udiv i32 [[A]], [[B]]
+; CALLS-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[DIV]]
+;
 entry:
   %div = udiv i32 %a, %b
   ret i32 %div
 }
 
-; CHECK-LABEL: @Div
-; CHECK: icmp
-; NOORIGINS: call void @__msan_warning_noreturn()
-; ORIGINS: call void @__msan_warning_with_origin_noreturn(i32
-; CHECK-NOT: icmp
-; CHECK: udiv
-; CHECK-NOT: icmp
-; CHECK: ret i32
-
 ; Check that fdiv, unlike udiv, simply propagates shadow.
 
 define float @FDiv(float %a, float %b) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define float @FDiv(
+; CHECK-SAME: float [[A:%.*]], float [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[C:%.*]] = fdiv float [[A]], [[B]]
+; CHECK-NEXT:    store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret float [[C]]
+;
+; ORIGIN-LABEL: define float @FDiv(
+; ORIGIN-SAME: float [[A:%.*]], float [[B:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP0]], [[TMP2]]
+; ORIGIN-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP2]], 0
+; ORIGIN-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[TMP1]]
+; ORIGIN-NEXT:    [[C:%.*]] = fdiv float [[A]], [[B]]
+; ORIGIN-NEXT:    store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP5]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret float [[C]]
+;
+; CALLS-LABEL: define float @FDiv(
+; CALLS-SAME: float [[A:%.*]], float [[B:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[_MSPROP:%.*]] = or i32 [[TMP0]], [[TMP2]]
+; CALLS-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP2]], 0
+; CALLS-NEXT:    [[TMP5:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 [[TMP1]]
+; CALLS-NEXT:    [[C:%.*]] = fdiv float [[A]], [[B]]
+; CALLS-NEXT:    store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP5]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret float [[C]]
+;
 entry:
   %c = fdiv float %a, %b
   ret float %c
 }
 
-; CHECK-LABEL: @FDiv
-; CHECK: %[[SA:.*]] = load i32,{{.*}}@__msan_param_tls
-; CHECK: %[[SB:.*]] = load i32,{{.*}}@__msan_param_tls
-; CHECK: %[[SC:.*]] = or i32 %[[SA]], %[[SB]]
-; CHECK: = fdiv float
-; CHECK: store i32 %[[SC]], ptr {{.*}}@__msan_retval_tls
-; CHECK: ret float
 
 ; Check that fneg simply propagates shadow.
 
 define float @FNeg(float %a) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define float @FNeg(
+; CHECK-SAME: float [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[C:%.*]] = fneg float [[A]]
+; CHECK-NEXT:    store i32 [[TMP0]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret float [[C]]
+;
+; ORIGIN-LABEL: define float @FNeg(
+; ORIGIN-SAME: float [[A:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[C:%.*]] = fneg float [[A]]
+; ORIGIN-NEXT:    store i32 [[TMP0]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret float [[C]]
+;
+; CALLS-LABEL: define float @FNeg(
+; CALLS-SAME: float [[A:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[C:%.*]] = fneg float [[A]]
+; CALLS-NEXT:    store i32 [[TMP0]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret float [[C]]
+;
 entry:
   %c = fneg float %a
   ret float %c
 }
 
-; CHECK-LABEL: @FNeg
-; CHECK: %[[SA:.*]] = load i32,{{.*}}@__msan_param_tls
-; ORIGINS: %[[SB:.*]] = load i32,{{.*}}@__msan_param_origin_tls
-; CHECK: = fneg float
-; CHECK: store i32 %[[SA]], ptr {{.*}}@__msan_retval_tls
-; ORIGINS: store i32{{.*}}@__msan_retval_origin_tls
-; CHECK: ret float
-
-; Check that we propagate shadow for x<0, x>=0, etc (i.e. sign bit tests)
-
 define zeroext i1 @ICmpSLTZero(i32 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define zeroext i1 @ICmpSLTZero(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[X]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i32 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i32 [[TMP1]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP11]], -1
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[X]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i32 [[TMP13]], [[TMP10]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[TMP14]], 0
+; CHECK-NEXT:    [[TMP16:%.*]] = xor i1 [[TMP8]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp slt i32 [[X]], 0
+; CHECK-NEXT:    store i1 [[TMP16]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i1 [[TMP17]]
+;
+; ORIGIN-LABEL: define zeroext i1 @ICmpSLTZero(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; ORIGIN-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP4]], -1
+; ORIGIN-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP5]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp slt i32 [[TMP8]], 0
+; ORIGIN-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; ORIGIN-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP12]], -1
+; ORIGIN-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; ORIGIN-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 0
+; ORIGIN-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; ORIGIN-NEXT:    [[TMP18:%.*]] = icmp slt i32 [[X]], 0
+; ORIGIN-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i1 [[TMP18]]
+;
+; CALLS-LABEL: define zeroext i1 @ICmpSLTZero(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; CALLS-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP4]], -1
+; CALLS-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP5]]
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp slt i32 [[TMP8]], 0
+; CALLS-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; CALLS-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; CALLS-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP12]], -1
+; CALLS-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; CALLS-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP11]]
+; CALLS-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[TMP15]], 0
+; CALLS-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; CALLS-NEXT:    [[TMP18:%.*]] = icmp slt i32 [[X]], 0
+; CALLS-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i1 [[TMP18]]
+;
   %1 = icmp slt i32 %x, 0
   ret i1 %1
 }
 
-; CHECK-LABEL: @ICmpSLTZero
-; CHECK: icmp slt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp slt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret i1
 
 define zeroext i1 @ICmpSGEZero(i32 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define zeroext i1 @ICmpSGEZero(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[X]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sge i32 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i32 [[TMP1]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP11]], -1
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[X]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i32 [[TMP13]], [[TMP10]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp sge i32 [[TMP14]], 0
+; CHECK-NEXT:    [[TMP16:%.*]] = xor i1 [[TMP8]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp sge i32 [[X]], 0
+; CHECK-NEXT:    store i1 [[TMP16]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i1 [[TMP17]]
+;
+; ORIGIN-LABEL: define zeroext i1 @ICmpSGEZero(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; ORIGIN-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP4]], -1
+; ORIGIN-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP5]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp sge i32 [[TMP8]], 0
+; ORIGIN-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; ORIGIN-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP12]], -1
+; ORIGIN-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; ORIGIN-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP16:%.*]] = icmp sge i32 [[TMP15]], 0
+; ORIGIN-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; ORIGIN-NEXT:    [[TMP18:%.*]] = icmp sge i32 [[X]], 0
+; ORIGIN-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i1 [[TMP18]]
+;
+; CALLS-LABEL: define zeroext i1 @ICmpSGEZero(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; CALLS-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP4]], -1
+; CALLS-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP5]]
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp sge i32 [[TMP8]], 0
+; CALLS-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; CALLS-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; CALLS-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP12]], -1
+; CALLS-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; CALLS-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP11]]
+; CALLS-NEXT:    [[TMP16:%.*]] = icmp sge i32 [[TMP15]], 0
+; CALLS-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; CALLS-NEXT:    [[TMP18:%.*]] = icmp sge i32 [[X]], 0
+; CALLS-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i1 [[TMP18]]
+;
   %1 = icmp sge i32 %x, 0
   ret i1 %1
 }
 
-; CHECK-LABEL: @ICmpSGEZero
-; CHECK: icmp slt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp sge
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret i1
 
 define zeroext i1 @ICmpSGTZero(i32 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define zeroext i1 @ICmpSGTZero(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[X]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 0, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i32 [[TMP1]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], -1
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[X]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i32 [[TMP13]], [[TMP11]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 0, [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = xor i1 [[TMP8]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 0, [[X]]
+; CHECK-NEXT:    store i1 [[TMP16]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i1 [[TMP17]]
+;
+; ORIGIN-LABEL: define zeroext i1 @ICmpSGTZero(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; ORIGIN-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP5]], -1
+; ORIGIN-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp sgt i32 0, [[TMP8]]
+; ORIGIN-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; ORIGIN-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP11]], -1
+; ORIGIN-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; ORIGIN-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP12]]
+; ORIGIN-NEXT:    [[TMP16:%.*]] = icmp sgt i32 0, [[TMP15]]
+; ORIGIN-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; ORIGIN-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP1]], 0
+; ORIGIN-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP2]], i32 0
+; ORIGIN-NEXT:    [[TMP20:%.*]] = icmp sgt i32 0, [[X]]
+; ORIGIN-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP19]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i1 [[TMP20]]
+;
+; CALLS-LABEL: define zeroext i1 @ICmpSGTZero(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; CALLS-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP5]], -1
+; CALLS-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP4]]
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp sgt i32 0, [[TMP8]]
+; CALLS-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; CALLS-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; CALLS-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP11]], -1
+; CALLS-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; CALLS-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP12]]
+; CALLS-NEXT:    [[TMP16:%.*]] = icmp sgt i32 0, [[TMP15]]
+; CALLS-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; CALLS-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP1]], 0
+; CALLS-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP2]], i32 0
+; CALLS-NEXT:    [[TMP20:%.*]] = icmp sgt i32 0, [[X]]
+; CALLS-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP19]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i1 [[TMP20]]
+;
   %1 = icmp sgt i32 0, %x
   ret i1 %1
 }
 
-; CHECK-LABEL: @ICmpSGTZero
-; CHECK: icmp slt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp sgt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret i1
 
 define zeroext i1 @ICmpSLEZero(i32 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define zeroext i1 @ICmpSLEZero(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[X]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sle i32 0, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i32 [[TMP1]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], -1
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[X]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i32 [[TMP13]], [[TMP11]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp sle i32 0, [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = xor i1 [[TMP8]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp sle i32 0, [[X]]
+; CHECK-NEXT:    store i1 [[TMP16]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i1 [[TMP17]]
+;
+; ORIGIN-LABEL: define zeroext i1 @ICmpSLEZero(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; ORIGIN-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP5]], -1
+; ORIGIN-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp sle i32 0, [[TMP8]]
+; ORIGIN-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; ORIGIN-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP11]], -1
+; ORIGIN-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; ORIGIN-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP12]]
+; ORIGIN-NEXT:    [[TMP16:%.*]] = icmp sle i32 0, [[TMP15]]
+; ORIGIN-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; ORIGIN-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP1]], 0
+; ORIGIN-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP2]], i32 0
+; ORIGIN-NEXT:    [[TMP20:%.*]] = icmp sle i32 0, [[X]]
+; ORIGIN-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP19]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i1 [[TMP20]]
+;
+; CALLS-LABEL: define zeroext i1 @ICmpSLEZero(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; CALLS-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP5]], -1
+; CALLS-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP4]]
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp sle i32 0, [[TMP8]]
+; CALLS-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; CALLS-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; CALLS-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP11]], -1
+; CALLS-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; CALLS-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP12]]
+; CALLS-NEXT:    [[TMP16:%.*]] = icmp sle i32 0, [[TMP15]]
+; CALLS-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; CALLS-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP1]], 0
+; CALLS-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP2]], i32 0
+; CALLS-NEXT:    [[TMP20:%.*]] = icmp sle i32 0, [[X]]
+; CALLS-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP19]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i1 [[TMP20]]
+;
   %1 = icmp sle i32 0, %x
   ret i1 %1
 }
 
-; CHECK-LABEL: @ICmpSLEZero
-; CHECK: icmp slt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp sle
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret i1
 
 
 ; Check that we propagate shadow for x<=-1, x>-1, etc (i.e. sign bit tests)
 
 define zeroext i1 @ICmpSLTAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define zeroext i1 @ICmpSLTAllOnes(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[X]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt i32 -1, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i32 [[TMP1]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], -1
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[X]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i32 [[TMP13]], [[TMP11]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 -1, [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = xor i1 [[TMP8]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp slt i32 -1, [[X]]
+; CHECK-NEXT:    store i1 [[TMP16]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i1 [[TMP17]]
+;
+; ORIGIN-LABEL: define zeroext i1 @ICmpSLTAllOnes(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; ORIGIN-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP5]], -1
+; ORIGIN-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp slt i32 -1, [[TMP8]]
+; ORIGIN-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; ORIGIN-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP11]], -1
+; ORIGIN-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; ORIGIN-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP12]]
+; ORIGIN-NEXT:    [[TMP16:%.*]] = icmp slt i32 -1, [[TMP15]]
+; ORIGIN-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; ORIGIN-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP1]], 0
+; ORIGIN-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP2]], i32 0
+; ORIGIN-NEXT:    [[TMP20:%.*]] = icmp slt i32 -1, [[X]]
+; ORIGIN-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP19]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i1 [[TMP20]]
+;
+; CALLS-LABEL: define zeroext i1 @ICmpSLTAllOnes(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; CALLS-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP5]], -1
+; CALLS-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP4]]
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp slt i32 -1, [[TMP8]]
+; CALLS-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; CALLS-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; CALLS-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP11]], -1
+; CALLS-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; CALLS-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP12]]
+; CALLS-NEXT:    [[TMP16:%.*]] = icmp slt i32 -1, [[TMP15]]
+; CALLS-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; CALLS-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP1]], 0
+; CALLS-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP2]], i32 0
+; CALLS-NEXT:    [[TMP20:%.*]] = icmp slt i32 -1, [[X]]
+; CALLS-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP19]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i1 [[TMP20]]
+;
   %1 = icmp slt i32 -1, %x
   ret i1 %1
 }
 
-; CHECK-LABEL: @ICmpSLTAllOnes
-; CHECK: icmp slt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp slt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret i1
 
 define zeroext i1 @ICmpSGEAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define zeroext i1 @ICmpSGEAllOnes(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP4]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[X]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sge i32 -1, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i32 [[TMP1]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP10]], -1
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[X]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i32 [[TMP13]], [[TMP11]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp sge i32 -1, [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = xor i1 [[TMP8]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp sge i32 -1, [[X]]
+; CHECK-NEXT:    store i1 [[TMP16]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i1 [[TMP17]]
+;
+; ORIGIN-LABEL: define zeroext i1 @ICmpSGEAllOnes(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; ORIGIN-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP5]], -1
+; ORIGIN-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp sge i32 -1, [[TMP8]]
+; ORIGIN-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; ORIGIN-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP11]], -1
+; ORIGIN-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; ORIGIN-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP12]]
+; ORIGIN-NEXT:    [[TMP16:%.*]] = icmp sge i32 -1, [[TMP15]]
+; ORIGIN-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; ORIGIN-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP1]], 0
+; ORIGIN-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP2]], i32 0
+; ORIGIN-NEXT:    [[TMP20:%.*]] = icmp sge i32 -1, [[X]]
+; ORIGIN-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP19]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i1 [[TMP20]]
+;
+; CALLS-LABEL: define zeroext i1 @ICmpSGEAllOnes(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; CALLS-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP5]], -1
+; CALLS-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP4]]
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp sge i32 -1, [[TMP8]]
+; CALLS-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; CALLS-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; CALLS-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP11]], -1
+; CALLS-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; CALLS-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP12]]
+; CALLS-NEXT:    [[TMP16:%.*]] = icmp sge i32 -1, [[TMP15]]
+; CALLS-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; CALLS-NEXT:    [[TMP18:%.*]] = icmp ne i32 [[TMP1]], 0
+; CALLS-NEXT:    [[TMP19:%.*]] = select i1 [[TMP18]], i32 [[TMP2]], i32 0
+; CALLS-NEXT:    [[TMP20:%.*]] = icmp sge i32 -1, [[X]]
+; CALLS-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP19]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i1 [[TMP20]]
+;
   %1 = icmp sge i32 -1, %x
   ret i1 %1
 }
 
-; CHECK-LABEL: @ICmpSGEAllOnes
-; CHECK: icmp slt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp sge
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret i1
 
 define zeroext i1 @ICmpSGTAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define zeroext i1 @ICmpSGTAllOnes(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[X]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[TMP7]], -1
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i32 [[TMP1]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP11]], -1
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[X]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i32 [[TMP13]], [[TMP10]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp sgt i32 [[TMP14]], -1
+; CHECK-NEXT:    [[TMP16:%.*]] = xor i1 [[TMP8]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp sgt i32 [[X]], -1
+; CHECK-NEXT:    store i1 [[TMP16]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i1 [[TMP17]]
+;
+; ORIGIN-LABEL: define zeroext i1 @ICmpSGTAllOnes(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; ORIGIN-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP4]], -1
+; ORIGIN-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP5]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], -1
+; ORIGIN-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; ORIGIN-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP12]], -1
+; ORIGIN-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; ORIGIN-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], -1
+; ORIGIN-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; ORIGIN-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[X]], -1
+; ORIGIN-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i1 [[TMP18]]
+;
+; CALLS-LABEL: define zeroext i1 @ICmpSGTAllOnes(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; CALLS-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP4]], -1
+; CALLS-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP5]]
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP8]], -1
+; CALLS-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; CALLS-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; CALLS-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP12]], -1
+; CALLS-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; CALLS-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP11]]
+; CALLS-NEXT:    [[TMP16:%.*]] = icmp sgt i32 [[TMP15]], -1
+; CALLS-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; CALLS-NEXT:    [[TMP18:%.*]] = icmp sgt i32 [[X]], -1
+; CALLS-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i1 [[TMP18]]
+;
   %1 = icmp sgt i32 %x, -1
   ret i1 %1
 }
 
-; CHECK-LABEL: @ICmpSGTAllOnes
-; CHECK: icmp slt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp sgt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret i1
 
 define zeroext i1 @ICmpSLEAllOnes(i32 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define zeroext i1 @ICmpSLEAllOnes(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP2]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[X]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sle i32 [[TMP7]], -1
+; CHECK-NEXT:    [[TMP9:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr i32 [[TMP9]], 1
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i32 [[TMP1]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP11]], -1
+; CHECK-NEXT:    [[TMP13:%.*]] = and i32 [[X]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or i32 [[TMP13]], [[TMP10]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp sle i32 [[TMP14]], -1
+; CHECK-NEXT:    [[TMP16:%.*]] = xor i1 [[TMP8]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp sle i32 [[X]], -1
+; CHECK-NEXT:    store i1 [[TMP16]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i1 [[TMP17]]
+;
+; ORIGIN-LABEL: define zeroext i1 @ICmpSLEAllOnes(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; ORIGIN-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP4]], -1
+; ORIGIN-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP5]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp sle i32 [[TMP8]], -1
+; ORIGIN-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; ORIGIN-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; ORIGIN-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP12]], -1
+; ORIGIN-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; ORIGIN-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP16:%.*]] = icmp sle i32 [[TMP15]], -1
+; ORIGIN-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; ORIGIN-NEXT:    [[TMP18:%.*]] = icmp sle i32 [[X]], -1
+; ORIGIN-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i1 [[TMP18]]
+;
+; CALLS-LABEL: define zeroext i1 @ICmpSLEAllOnes(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP4:%.*]] = lshr i32 [[TMP3]], 1
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP1]], [[TMP4]]
+; CALLS-NEXT:    [[TMP6:%.*]] = xor i32 [[TMP4]], -1
+; CALLS-NEXT:    [[TMP7:%.*]] = and i32 [[X]], [[TMP6]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or i32 [[TMP7]], [[TMP5]]
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp sle i32 [[TMP8]], -1
+; CALLS-NEXT:    [[TMP10:%.*]] = shl i32 [[TMP1]], 1
+; CALLS-NEXT:    [[TMP11:%.*]] = lshr i32 [[TMP10]], 1
+; CALLS-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP1]], [[TMP11]]
+; CALLS-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP12]], -1
+; CALLS-NEXT:    [[TMP14:%.*]] = and i32 [[X]], [[TMP13]]
+; CALLS-NEXT:    [[TMP15:%.*]] = or i32 [[TMP14]], [[TMP11]]
+; CALLS-NEXT:    [[TMP16:%.*]] = icmp sle i32 [[TMP15]], -1
+; CALLS-NEXT:    [[TMP17:%.*]] = xor i1 [[TMP9]], [[TMP16]]
+; CALLS-NEXT:    [[TMP18:%.*]] = icmp sle i32 [[X]], -1
+; CALLS-NEXT:    store i1 [[TMP17]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i1 [[TMP18]]
+;
   %1 = icmp sle i32 %x, -1
   ret i1 %1
 }
 
-; CHECK-LABEL: @ICmpSLEAllOnes
-; CHECK: icmp slt
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp sle
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret i1
 
 
 ; Check that we propagate shadow for x<0, x>=0, etc (i.e. sign bit tests)
 ; of the vector arguments.
 
 define <2 x i1> @ICmpSLT_vector_Zero(<2 x ptr> %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define <2 x i1> @ICmpSLT_vector_Zero(
+; CHECK-SAME: <2 x ptr> [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint <2 x ptr> [[X]] to <2 x i64>
+; CHECK-NEXT:    [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr <2 x i64> [[TMP3]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i64> [[TMP1]], [[TMP4]]
+; CHECK-NEXT:    [[TMP6:%.*]] = xor <2 x i64> [[TMP4]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP7:%.*]] = and <2 x i64> [[TMP2]], [[TMP6]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or <2 x i64> [[TMP7]], [[TMP5]]
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt <2 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP10:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP11:%.*]] = lshr <2 x i64> [[TMP10]], <i64 1, i64 1>
+; CHECK-NEXT:    [[TMP12:%.*]] = xor <2 x i64> [[TMP1]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = xor <2 x i64> [[TMP12]], <i64 -1, i64 -1>
+; CHECK-NEXT:    [[TMP14:%.*]] = and <2 x i64> [[TMP2]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = or <2 x i64> [[TMP14]], [[TMP11]]
+; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt <2 x i64> [[TMP15]], zeroinitializer
+; CHECK-NEXT:    [[TMP17:%.*]] = xor <2 x i1> [[TMP9]], [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp slt <2 x ptr> [[X]], zeroinitializer
+; CHECK-NEXT:    store <2 x i1> [[TMP17]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <2 x i1> [[TMP18]]
+;
+; ORIGIN-LABEL: define <2 x i1> @ICmpSLT_vector_Zero(
+; ORIGIN-SAME: <2 x ptr> [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = ptrtoint <2 x ptr> [[X]] to <2 x i64>
+; ORIGIN-NEXT:    [[TMP4:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
+; ORIGIN-NEXT:    [[TMP5:%.*]] = lshr <2 x i64> [[TMP4]], <i64 1, i64 1>
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor <2 x i64> [[TMP1]], [[TMP5]]
+; ORIGIN-NEXT:    [[TMP7:%.*]] = xor <2 x i64> [[TMP5]], <i64 -1, i64 -1>
+; ORIGIN-NEXT:    [[TMP8:%.*]] = and <2 x i64> [[TMP3]], [[TMP7]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP6]]
+; ORIGIN-NEXT:    [[TMP10:%.*]] = icmp slt <2 x i64> [[TMP9]], zeroinitializer
+; ORIGIN-NEXT:    [[TMP11:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
+; ORIGIN-NEXT:    [[TMP12:%.*]] = lshr <2 x i64> [[TMP11]], <i64 1, i64 1>
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor <2 x i64> [[TMP1]], [[TMP12]]
+; ORIGIN-NEXT:    [[TMP14:%.*]] = xor <2 x i64> [[TMP13]], <i64 -1, i64 -1>
+; ORIGIN-NEXT:    [[TMP15:%.*]] = and <2 x i64> [[TMP3]], [[TMP14]]
+; ORIGIN-NEXT:    [[TMP16:%.*]] = or <2 x i64> [[TMP15]], [[TMP12]]
+; ORIGIN-NEXT:    [[TMP17:%.*]] = icmp slt <2 x i64> [[TMP16]], zeroinitializer
+; ORIGIN-NEXT:    [[TMP18:%.*]] = xor <2 x i1> [[TMP10]], [[TMP17]]
+; ORIGIN-NEXT:    [[TMP19:%.*]] = icmp slt <2 x ptr> [[X]], zeroinitializer
+; ORIGIN-NEXT:    store <2 x i1> [[TMP18]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret <2 x i1> [[TMP19]]
+;
+; CALLS-LABEL: define <2 x i1> @ICmpSLT_vector_Zero(
+; CALLS-SAME: <2 x ptr> [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = ptrtoint <2 x ptr> [[X]] to <2 x i64>
+; CALLS-NEXT:    [[TMP4:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CALLS-NEXT:    [[TMP5:%.*]] = lshr <2 x i64> [[TMP4]], <i64 1, i64 1>
+; CALLS-NEXT:    [[TMP6:%.*]] = xor <2 x i64> [[TMP1]], [[TMP5]]
+; CALLS-NEXT:    [[TMP7:%.*]] = xor <2 x i64> [[TMP5]], <i64 -1, i64 -1>
+; CALLS-NEXT:    [[TMP8:%.*]] = and <2 x i64> [[TMP3]], [[TMP7]]
+; CALLS-NEXT:    [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP6]]
+; CALLS-NEXT:    [[TMP10:%.*]] = icmp slt <2 x i64> [[TMP9]], zeroinitializer
+; CALLS-NEXT:    [[TMP11:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CALLS-NEXT:    [[TMP12:%.*]] = lshr <2 x i64> [[TMP11]], <i64 1, i64 1>
+; CALLS-NEXT:    [[TMP13:%.*]] = xor <2 x i64> [[TMP1]], [[TMP12]]
+; CALLS-NEXT:    [[TMP14:%.*]] = xor <2 x i64> [[TMP13]], <i64 -1, i64 -1>
+; CALLS-NEXT:    [[TMP15:%.*]] = and <2 x i64> [[TMP3]], [[TMP14]]
+; CALLS-NEXT:    [[TMP16:%.*]] = or <2 x i64> [[TMP15]], [[TMP12]]
+; CALLS-NEXT:    [[TMP17:%.*]] = icmp slt <2 x i64> [[TMP16]], zeroinitializer
+; CALLS-NEXT:    [[TMP18:%.*]] = xor <2 x i1> [[TMP10]], [[TMP17]]
+; CALLS-NEXT:    [[TMP19:%.*]] = icmp slt <2 x ptr> [[X]], zeroinitializer
+; CALLS-NEXT:    store <2 x i1> [[TMP18]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret <2 x i1> [[TMP19]]
+;
   %1 = icmp slt <2 x ptr> %x, zeroinitializer
   ret <2 x i1> %1
 }
 
-; CHECK-LABEL: @ICmpSLT_vector_Zero
-; CHECK: icmp slt <2 x i64>
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp slt <2 x ptr>
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret <2 x i1>
 
 ; Check that we propagate shadow for x<=-1, x>0, etc (i.e. sign bit tests)
 ; of the vector arguments.
 
 define <2 x i1> @ICmpSLT_vector_AllOnes(<2 x i32> %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define <2 x i1> @ICmpSLT_vector_AllOnes(
+; CHECK-SAME: <2 x i32> [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i32> [[TMP2]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP4:%.*]] = xor <2 x i32> [[TMP1]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i32> [[TMP4]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP6:%.*]] = and <2 x i32> [[X]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or <2 x i32> [[TMP6]], [[TMP3]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt <2 x i32> <i32 -1, i32 -1>, [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP10:%.*]] = lshr <2 x i32> [[TMP9]], <i32 1, i32 1>
+; CHECK-NEXT:    [[TMP11:%.*]] = xor <2 x i32> [[TMP1]], [[TMP10]]
+; CHECK-NEXT:    [[TMP12:%.*]] = xor <2 x i32> [[TMP10]], <i32 -1, i32 -1>
+; CHECK-NEXT:    [[TMP13:%.*]] = and <2 x i32> [[X]], [[TMP12]]
+; CHECK-NEXT:    [[TMP14:%.*]] = or <2 x i32> [[TMP13]], [[TMP11]]
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt <2 x i32> <i32 -1, i32 -1>, [[TMP14]]
+; CHECK-NEXT:    [[TMP16:%.*]] = xor <2 x i1> [[TMP8]], [[TMP15]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp slt <2 x i32> <i32 -1, i32 -1>, [[X]]
+; CHECK-NEXT:    store <2 x i1> [[TMP16]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <2 x i1> [[TMP17]]
+;
+; ORIGIN-LABEL: define <2 x i1> @ICmpSLT_vector_AllOnes(
+; ORIGIN-SAME: <2 x i32> [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
+; ORIGIN-NEXT:    [[TMP4:%.*]] = lshr <2 x i32> [[TMP3]], <i32 1, i32 1>
+; ORIGIN-NEXT:    [[TMP5:%.*]] = xor <2 x i32> [[TMP1]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP6:%.*]] = xor <2 x i32> [[TMP5]], <i32 -1, i32 -1>
+; ORIGIN-NEXT:    [[TMP7:%.*]] = and <2 x i32> [[X]], [[TMP6]]
+; ORIGIN-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], [[TMP4]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp slt <2 x i32> <i32 -1, i32 -1>, [[TMP8]]
+; ORIGIN-NEXT:    [[TMP10:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
+; ORIGIN-NEXT:    [[TMP11:%.*]] = lshr <2 x i32> [[TMP10]], <i32 1, i32 1>
+; ORIGIN-NEXT:    [[TMP12:%.*]] = xor <2 x i32> [[TMP1]], [[TMP11]]
+; ORIGIN-NEXT:    [[TMP13:%.*]] = xor <2 x i32> [[TMP11]], <i32 -1, i32 -1>
+; ORIGIN-NEXT:    [[TMP14:%.*]] = and <2 x i32> [[X]], [[TMP13]]
+; ORIGIN-NEXT:    [[TMP15:%.*]] = or <2 x i32> [[TMP14]], [[TMP12]]
+; ORIGIN-NEXT:    [[TMP16:%.*]] = icmp slt <2 x i32> <i32 -1, i32 -1>, [[TMP15]]
+; ORIGIN-NEXT:    [[TMP17:%.*]] = xor <2 x i1> [[TMP9]], [[TMP16]]
+; ORIGIN-NEXT:    [[TMP18:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; ORIGIN-NEXT:    [[TMP19:%.*]] = icmp ne i64 [[TMP18]], 0
+; ORIGIN-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 0
+; ORIGIN-NEXT:    [[TMP21:%.*]] = icmp slt <2 x i32> <i32 -1, i32 -1>, [[X]]
+; ORIGIN-NEXT:    store <2 x i1> [[TMP17]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP20]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret <2 x i1> [[TMP21]]
+;
+; CALLS-LABEL: define <2 x i1> @ICmpSLT_vector_AllOnes(
+; CALLS-SAME: <2 x i32> [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CALLS-NEXT:    [[TMP4:%.*]] = lshr <2 x i32> [[TMP3]], <i32 1, i32 1>
+; CALLS-NEXT:    [[TMP5:%.*]] = xor <2 x i32> [[TMP1]], [[TMP4]]
+; CALLS-NEXT:    [[TMP6:%.*]] = xor <2 x i32> [[TMP5]], <i32 -1, i32 -1>
+; CALLS-NEXT:    [[TMP7:%.*]] = and <2 x i32> [[X]], [[TMP6]]
+; CALLS-NEXT:    [[TMP8:%.*]] = or <2 x i32> [[TMP7]], [[TMP4]]
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp slt <2 x i32> <i32 -1, i32 -1>, [[TMP8]]
+; CALLS-NEXT:    [[TMP10:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CALLS-NEXT:    [[TMP11:%.*]] = lshr <2 x i32> [[TMP10]], <i32 1, i32 1>
+; CALLS-NEXT:    [[TMP12:%.*]] = xor <2 x i32> [[TMP1]], [[TMP11]]
+; CALLS-NEXT:    [[TMP13:%.*]] = xor <2 x i32> [[TMP11]], <i32 -1, i32 -1>
+; CALLS-NEXT:    [[TMP14:%.*]] = and <2 x i32> [[X]], [[TMP13]]
+; CALLS-NEXT:    [[TMP15:%.*]] = or <2 x i32> [[TMP14]], [[TMP12]]
+; CALLS-NEXT:    [[TMP16:%.*]] = icmp slt <2 x i32> <i32 -1, i32 -1>, [[TMP15]]
+; CALLS-NEXT:    [[TMP17:%.*]] = xor <2 x i1> [[TMP9]], [[TMP16]]
+; CALLS-NEXT:    [[TMP18:%.*]] = bitcast <2 x i32> [[TMP1]] to i64
+; CALLS-NEXT:    [[TMP19:%.*]] = icmp ne i64 [[TMP18]], 0
+; CALLS-NEXT:    [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[TMP2]], i32 0
+; CALLS-NEXT:    [[TMP21:%.*]] = icmp slt <2 x i32> <i32 -1, i32 -1>, [[X]]
+; CALLS-NEXT:    store <2 x i1> [[TMP17]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP20]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret <2 x i1> [[TMP21]]
+;
   %1 = icmp slt <2 x i32> <i32 -1, i32 -1>, %x
   ret <2 x i1> %1
 }
 
-; CHECK-LABEL: @ICmpSLT_vector_AllOnes
-; CHECK: icmp slt <2 x i32>
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp slt <2 x i32>
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret <2 x i1>
 
 
 ; Check that we propagate shadow for unsigned relational comparisons with
 ; constants
 
 define zeroext i1 @ICmpUGTConst(i32 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define zeroext i1 @ICmpUGTConst(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[TMP0]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[X]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp ugt i32 [[TMP2]], 7
+; CHECK-NEXT:    [[TMP4:%.*]] = or i32 [[X]], [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ugt i32 [[TMP4]], 7
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i1 [[TMP3]], [[TMP5]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X]], 7
+; CHECK-NEXT:    store i1 [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+; ORIGIN-LABEL: define zeroext i1 @ICmpUGTConst(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP0]], -1
+; ORIGIN-NEXT:    [[TMP3:%.*]] = and i32 [[X]], [[TMP2]]
+; ORIGIN-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 7
+; ORIGIN-NEXT:    [[TMP5:%.*]] = or i32 [[X]], [[TMP0]]
+; ORIGIN-NEXT:    [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 7
+; ORIGIN-NEXT:    [[TMP7:%.*]] = xor i1 [[TMP4]], [[TMP6]]
+; ORIGIN-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X]], 7
+; ORIGIN-NEXT:    store i1 [[TMP7]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i1 [[CMP]]
+;
+; CALLS-LABEL: define zeroext i1 @ICmpUGTConst(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP0]], -1
+; CALLS-NEXT:    [[TMP3:%.*]] = and i32 [[X]], [[TMP2]]
+; CALLS-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[TMP3]], 7
+; CALLS-NEXT:    [[TMP5:%.*]] = or i32 [[X]], [[TMP0]]
+; CALLS-NEXT:    [[TMP6:%.*]] = icmp ugt i32 [[TMP5]], 7
+; CALLS-NEXT:    [[TMP7:%.*]] = xor i1 [[TMP4]], [[TMP6]]
+; CALLS-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[X]], 7
+; CALLS-NEXT:    store i1 [[TMP7]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i1 [[CMP]]
+;
 entry:
   %cmp = icmp ugt i32 %x, 7
   ret i1 %cmp
 }
 
-; CHECK-LABEL: @ICmpUGTConst
-; CHECK: icmp ugt i32
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp ugt i32
-; CHECK-NOT: call void @__msan_warning
-; CHECK: icmp ugt i32
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret i1
 
 
 ; Check that loads of shadow have the same alignment as the original loads.
 ; Check that loads of origin have the alignment of max(4, original alignment).
 
 define i32 @ShadowLoadAlignmentLarge() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define i32 @ShadowLoadAlignmentLarge(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[Y:%.*]] = alloca i32, align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 64 [[TMP3]], i8 -1, i64 4, i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = load volatile i32, ptr [[Y]], align 64
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 64
+; CHECK-NEXT:    store i32 [[_MSLD]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+; ORIGIN-LABEL: define i32 @ShadowLoadAlignmentLarge(
+; ORIGIN-SAME: ) #[[ATTR0]] {
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[Y:%.*]] = alloca i32, align 64
+; ORIGIN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[Y]] to i64
+; ORIGIN-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; ORIGIN-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; ORIGIN-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; ORIGIN-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], -4
+; ORIGIN-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memset.p0.i64(ptr align 64 [[TMP3]], i8 -1, i64 4, i1 false)
+; ORIGIN-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[Y]], i64 4, ptr @[[GLOB0:[0-9]+]], ptr @[[GLOB1:[0-9]+]])
+; ORIGIN-NEXT:    [[TMP7:%.*]] = load volatile i32, ptr [[Y]], align 64
+; ORIGIN-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[Y]] to i64
+; ORIGIN-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
+; ORIGIN-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; ORIGIN-NEXT:    [[TMP11:%.*]] = add i64 [[TMP9]], 17592186044416
+; ORIGIN-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; ORIGIN-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP10]], align 64
+; ORIGIN-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 64
+; ORIGIN-NEXT:    store i32 [[_MSLD]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP13]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[TMP7]]
+;
+; CALLS-LABEL: define i32 @ShadowLoadAlignmentLarge(
+; CALLS-SAME: ) #[[ATTR0]] {
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[Y:%.*]] = alloca i32, align 64
+; CALLS-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[Y]] to i64
+; CALLS-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CALLS-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CALLS-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; CALLS-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], -4
+; CALLS-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 64 [[TMP3]], i8 -1, i64 4, i1 false)
+; CALLS-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[Y]], i64 4, ptr @[[GLOB0:[0-9]+]], ptr @[[GLOB1:[0-9]+]])
+; CALLS-NEXT:    [[TMP7:%.*]] = load volatile i32, ptr [[Y]], align 64
+; CALLS-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[Y]] to i64
+; CALLS-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
+; CALLS-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CALLS-NEXT:    [[TMP11:%.*]] = add i64 [[TMP9]], 17592186044416
+; CALLS-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CALLS-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP10]], align 64
+; CALLS-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 64
+; CALLS-NEXT:    store i32 [[_MSLD]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP13]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[TMP7]]
+;
   %y = alloca i32, align 64
   %1 = load volatile i32, ptr %y, align 64
   ret i32 %1
 }
 
-; CHECK-LABEL: @ShadowLoadAlignmentLarge
-; CHECK: load volatile i32, ptr {{.*}} align 64
-; CHECK: load i32, ptr {{.*}} align 64
-; CHECK: ret i32
 
 define i32 @ShadowLoadAlignmentSmall() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define i32 @ShadowLoadAlignmentSmall(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[Y:%.*]] = alloca i32, align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 2 [[TMP3]], i8 -1, i64 4, i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = load volatile i32, ptr [[Y]], align 2
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[Y]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 2
+; CHECK-NEXT:    store i32 [[_MSLD]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[TMP4]]
+;
+; ORIGIN-LABEL: define i32 @ShadowLoadAlignmentSmall(
+; ORIGIN-SAME: ) #[[ATTR0]] {
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[Y:%.*]] = alloca i32, align 2
+; ORIGIN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[Y]] to i64
+; ORIGIN-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; ORIGIN-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; ORIGIN-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; ORIGIN-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], -4
+; ORIGIN-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memset.p0.i64(ptr align 2 [[TMP3]], i8 -1, i64 4, i1 false)
+; ORIGIN-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[Y]], i64 4, ptr @[[GLOB2:[0-9]+]], ptr @[[GLOB3:[0-9]+]])
+; ORIGIN-NEXT:    [[TMP7:%.*]] = load volatile i32, ptr [[Y]], align 2
+; ORIGIN-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[Y]] to i64
+; ORIGIN-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
+; ORIGIN-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; ORIGIN-NEXT:    [[TMP11:%.*]] = add i64 [[TMP9]], 17592186044416
+; ORIGIN-NEXT:    [[TMP12:%.*]] = and i64 [[TMP11]], -4
+; ORIGIN-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; ORIGIN-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP10]], align 2
+; ORIGIN-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+; ORIGIN-NEXT:    store i32 [[_MSLD]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP14]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[TMP7]]
+;
+; CALLS-LABEL: define i32 @ShadowLoadAlignmentSmall(
+; CALLS-SAME: ) #[[ATTR0]] {
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[Y:%.*]] = alloca i32, align 2
+; CALLS-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[Y]] to i64
+; CALLS-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CALLS-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CALLS-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; CALLS-NEXT:    [[TMP5:%.*]] = and i64 [[TMP4]], -4
+; CALLS-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 2 [[TMP3]], i8 -1, i64 4, i1 false)
+; CALLS-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[Y]], i64 4, ptr @[[GLOB2:[0-9]+]], ptr @[[GLOB3:[0-9]+]])
+; CALLS-NEXT:    [[TMP7:%.*]] = load volatile i32, ptr [[Y]], align 2
+; CALLS-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[Y]] to i64
+; CALLS-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
+; CALLS-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CALLS-NEXT:    [[TMP11:%.*]] = add i64 [[TMP9]], 17592186044416
+; CALLS-NEXT:    [[TMP12:%.*]] = and i64 [[TMP11]], -4
+; CALLS-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CALLS-NEXT:    [[_MSLD:%.*]] = load i32, ptr [[TMP10]], align 2
+; CALLS-NEXT:    [[TMP14:%.*]] = load i32, ptr [[TMP13]], align 4
+; CALLS-NEXT:    store i32 [[_MSLD]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP14]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[TMP7]]
+;
   %y = alloca i32, align 2
   %1 = load volatile i32, ptr %y, align 2
   ret i32 %1
 }
 
-; CHECK-LABEL: @ShadowLoadAlignmentSmall
-; CHECK: load volatile i32, ptr {{.*}} align 2
-; CHECK: load i32, ptr {{.*}} align 2
-; ORIGINS: load i32, ptr {{.*}} align 4
-; CHECK: ret i32
-
-
 ; Test vector manipulation instructions.
 ; Check that the same bit manipulation is applied to the shadow values.
 ; Check that there is a zero test of the shadow of %idx argument, where present.
 
 define i32 @ExtractElement(<4 x i32> %vec, i32 %idx) sanitize_memory {
+; CHECK-LABEL: define i32 @ExtractElement(
+; CHECK-SAME: <4 x i32> [[VEC:%.*]], i32 [[IDX:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP2]], i32 [[IDX]]
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB3:.*]], label %[[BB4:.*]], !prof [[PROF1]]
+; CHECK:       [[BB3]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR12]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    [[X:%.*]] = extractelement <4 x i32> [[VEC]], i32 [[IDX]]
+; CHECK-NEXT:    store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[X]]
+;
+; ORIGIN-LABEL: define i32 @ExtractElement(
+; ORIGIN-SAME: <4 x i32> [[VEC:%.*]], i32 [[IDX:%.*]]) #[[ATTR6]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP4:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP3]], i32 [[IDX]]
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB5]]:
+; ORIGIN-NEXT:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR12]]
+; ORIGIN-NEXT:    unreachable
+; ORIGIN:       [[BB6]]:
+; ORIGIN-NEXT:    [[X:%.*]] = extractelement <4 x i32> [[VEC]], i32 [[IDX]]
+; ORIGIN-NEXT:    store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP4]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[X]]
+;
+; CALLS-LABEL: define i32 @ExtractElement(
+; CALLS-SAME: <4 x i32> [[VEC:%.*]], i32 [[IDX:%.*]]) #[[ATTR6]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; CALLS-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP4:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP3]], i32 [[IDX]]
+; CALLS-NEXT:    call void @__msan_maybe_warning_4(i32 zeroext [[TMP1]], i32 zeroext [[TMP2]])
+; CALLS-NEXT:    [[X:%.*]] = extractelement <4 x i32> [[VEC]], i32 [[IDX]]
+; CALLS-NEXT:    store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP4]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[X]]
+;
   %x = extractelement <4 x i32> %vec, i32 %idx
   ret i32 %x
 }
 
-; CHECK-LABEL: @ExtractElement
-; CHECK: extractelement
-; NOORIGINS: call void @__msan_warning_noreturn()
-; ORIGINS: call void @__msan_warning_with_origin_noreturn(i32
-; CHECK: extractelement
-; CHECK: ret i32
-
 define <4 x i32> @InsertElement(<4 x i32> %vec, i32 %idx, i32 %x) sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @InsertElement(
+; CHECK-SAME: <4 x i32> [[VEC:%.*]], i32 [[IDX:%.*]], i32 [[X:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP3]], i32 [[IDX]]
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB4:.*]], label %[[BB5:.*]], !prof [[PROF1]]
+; CHECK:       [[BB4]]:
+; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR12]]
+; CHECK-NEXT:    unreachable
+; CHECK:       [[BB5]]:
+; CHECK-NEXT:    [[VEC1:%.*]] = insertelement <4 x i32> [[VEC]], i32 [[X]], i32 [[IDX]]
+; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i32> [[VEC1]]
+;
+; ORIGIN-LABEL: define <4 x i32> @InsertElement(
+; ORIGIN-SAME: <4 x i32> [[VEC:%.*]], i32 [[IDX:%.*]], i32 [[X:%.*]]) #[[ATTR6]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP4:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP5]], i32 [[IDX]]
+; ORIGIN-NEXT:    [[TMP7:%.*]] = icmp ne i32 [[TMP5]], 0
+; ORIGIN-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 [[TMP4]]
+; ORIGIN-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP1]], 0
+; ORIGIN-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP2]], i32 [[TMP8]]
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB11]]:
+; ORIGIN-NEXT:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP2]]) #[[ATTR12]]
+; ORIGIN-NEXT:    unreachable
+; ORIGIN:       [[BB12]]:
+; ORIGIN-NEXT:    [[VEC1:%.*]] = insertelement <4 x i32> [[VEC]], i32 [[X]], i32 [[IDX]]
+; ORIGIN-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP10]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret <4 x i32> [[VEC1]]
+;
+; CALLS-LABEL: define <4 x i32> @InsertElement(
+; CALLS-SAME: <4 x i32> [[VEC:%.*]], i32 [[IDX:%.*]], i32 [[X:%.*]]) #[[ATTR6]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; CALLS-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP4:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP5:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CALLS-NEXT:    [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[_MSPROP:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[TMP5]], i32 [[IDX]]
+; CALLS-NEXT:    [[TMP7:%.*]] = icmp ne i32 [[TMP5]], 0
+; CALLS-NEXT:    [[TMP8:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 [[TMP4]]
+; CALLS-NEXT:    [[TMP9:%.*]] = icmp ne i32 [[TMP1]], 0
+; CALLS-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i32 [[TMP2]], i32 [[TMP8]]
+; CALLS-NEXT:    call void @__msan_maybe_warning_4(i32 zeroext [[TMP1]], i32 zeroext [[TMP2]])
+; CALLS-NEXT:    [[VEC1:%.*]] = insertelement <4 x i32> [[VEC]], i32 [[X]], i32 [[IDX]]
+; CALLS-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP10]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret <4 x i32> [[VEC1]]
+;
   %vec1 = insertelement <4 x i32> %vec, i32 %x, i32 %idx
   ret <4 x i32> %vec1
 }
 
-; CHECK-LABEL: @InsertElement
-; CHECK: insertelement
-; NOORIGINS: call void @__msan_warning_noreturn()
-; ORIGINS: call void @__msan_warning_with_origin_noreturn(i32
-; CHECK: insertelement
-; CHECK: ret <4 x i32>
-
 define <4 x i32> @ShuffleVector(<4 x i32> %vec, <4 x i32> %vec1) sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @ShuffleVector(
+; CHECK-SAME: <4 x i32> [[VEC:%.*]], <4 x i32> [[VEC1:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+; CHECK-NEXT:    [[VEC2:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> [[VEC1]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+; CHECK-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <4 x i32> [[VEC2]]
+;
+; ORIGIN-LABEL: define <4 x i32> @ShuffleVector(
+; ORIGIN-SAME: <4 x i32> [[VEC:%.*]], <4 x i32> [[VEC1:%.*]]) #[[ATTR6]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+; ORIGIN-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; ORIGIN-NEXT:    [[TMP6:%.*]] = icmp ne i128 [[TMP5]], 0
+; ORIGIN-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP4]], i32 [[TMP2]]
+; ORIGIN-NEXT:    [[VEC2:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> [[VEC1]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+; ORIGIN-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP7]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret <4 x i32> [[VEC2]]
+;
+; CALLS-LABEL: define <4 x i32> @ShuffleVector(
+; CALLS-SAME: <4 x i32> [[VEC:%.*]], <4 x i32> [[VEC1:%.*]]) #[[ATTR6]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+; CALLS-NEXT:    [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CALLS-NEXT:    [[TMP6:%.*]] = icmp ne i128 [[TMP5]], 0
+; CALLS-NEXT:    [[TMP7:%.*]] = select i1 [[TMP6]], i32 [[TMP4]], i32 [[TMP2]]
+; CALLS-NEXT:    [[VEC2:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> [[VEC1]], <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+; CALLS-NEXT:    store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP7]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret <4 x i32> [[VEC2]]
+;
   %vec2 = shufflevector <4 x i32> %vec, <4 x i32> %vec1,
-                        <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+  <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   ret <4 x i32> %vec2
 }
 
-; CHECK-LABEL: @ShuffleVector
-; CHECK: shufflevector
-; CHECK-NOT: call void @__msan_warning
-; CHECK: shufflevector
-; CHECK: ret <4 x i32>
 
 
 ; Test bswap intrinsic instrumentation
 define i32 @BSwap(i32 %x) nounwind uwtable readnone sanitize_memory {
+; CHECK-LABEL: define i32 @BSwap(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CHECK-NEXT:    [[Y:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[X]])
+; CHECK-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[Y]]
+;
+; ORIGIN-LABEL: define i32 @BSwap(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; ORIGIN-NEXT:    [[Y:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[X]])
+; ORIGIN-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[Y]]
+;
+; CALLS-LABEL: define i32 @BSwap(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
+; CALLS-NEXT:    [[Y:%.*]] = tail call i32 @llvm.bswap.i32(i32 [[X]])
+; CALLS-NEXT:    store i32 [[TMP3]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP2]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[Y]]
+;
   %y = tail call i32 @llvm.bswap.i32(i32 %x)
   ret i32 %y
 }
 
 declare i32 @llvm.bswap.i32(i32) nounwind readnone
 
-; CHECK-LABEL: @BSwap
-; CHECK-NOT: call void @__msan_warning
-; CHECK: @llvm.bswap.i32
-; CHECK-NOT: call void @__msan_warning
-; CHECK: @llvm.bswap.i32
-; CHECK-NOT: call void @__msan_warning
-; CHECK: ret i32
 
 ; Test handling of vectors of pointers.
 ; Check that shadow of such vector is a vector of integers.
 
 define <8 x ptr> @VectorOfPointers(ptr %p) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define <8 x ptr> @VectorOfPointers(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[X:%.*]] = load <8 x ptr>, ptr [[P]], align 64
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP3]], align 64
+; CHECK-NEXT:    store <8 x i64> [[_MSLD]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <8 x ptr> [[X]]
+;
+; ORIGIN-LABEL: define <8 x ptr> @VectorOfPointers(
+; ORIGIN-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[X:%.*]] = load <8 x ptr>, ptr [[P]], align 64
+; ORIGIN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
+; ORIGIN-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; ORIGIN-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; ORIGIN-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; ORIGIN-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; ORIGIN-NEXT:    [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP3]], align 64
+; ORIGIN-NEXT:    [[TMP6:%.*]] = load i32, ptr [[TMP5]], align 64
+; ORIGIN-NEXT:    store <8 x i64> [[_MSLD]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP6]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret <8 x ptr> [[X]]
+;
+; CALLS-LABEL: define <8 x ptr> @VectorOfPointers(
+; CALLS-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP1]], i32 zeroext [[TMP2]])
+; CALLS-NEXT:    [[X:%.*]] = load <8 x ptr>, ptr [[P]], align 64
+; CALLS-NEXT:    [[TMP3:%.*]] = ptrtoint ptr [[P]] to i64
+; CALLS-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080
+; CALLS-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CALLS-NEXT:    [[TMP6:%.*]] = add i64 [[TMP4]], 17592186044416
+; CALLS-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CALLS-NEXT:    [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP5]], align 64
+; CALLS-NEXT:    [[TMP8:%.*]] = load i32, ptr [[TMP7]], align 64
+; CALLS-NEXT:    store <8 x i64> [[_MSLD]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP8]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret <8 x ptr> [[X]]
+;
   %x = load <8 x ptr>, ptr %p
   ret <8 x ptr> %x
 }
 
-; CHECK-LABEL: @VectorOfPointers
-; CHECK: load <8 x ptr>, ptr
-; CHECK: load <8 x i64>, ptr
-; CHECK: store <8 x i64> {{.*}} @__msan_retval_tls
-; CHECK: ret <8 x ptr>
 
 ; Test handling of va_copy.
 
 declare void @llvm.va_copy(ptr, ptr) nounwind
 
 define void @VACopy(ptr %p1, ptr %p2) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @VACopy(
+; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P1]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    call void @llvm.va_copy.p0(ptr [[P1]], ptr [[P2]]) #[[ATTR5]]
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @VACopy(
+; ORIGIN-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P1]] to i64
+; ORIGIN-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; ORIGIN-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; ORIGIN-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; ORIGIN-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 24, i1 false)
+; ORIGIN-NEXT:    call void @llvm.va_copy.p0(ptr [[P1]], ptr [[P2]]) #[[ATTR5]]
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @VACopy(
+; CALLS-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P1]] to i64
+; CALLS-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CALLS-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CALLS-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; CALLS-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 0, i64 24, i1 false)
+; CALLS-NEXT:    call void @llvm.va_copy.p0(ptr [[P1]], ptr [[P2]]) #[[ATTR5]]
+; CALLS-NEXT:    ret void
+;
   call void @llvm.va_copy(ptr %p1, ptr %p2) nounwind
   ret void
 }
 
-; CHECK-LABEL: @VACopy
-; CHECK: call void @llvm.memset.p0.i64({{.*}}, i8 0, i64 24, i1 false)
-; CHECK: ret void
 
 
 ; Test that va_start instrumentation does not use va_arg_tls*.
@@ -759,6 +2970,207 @@ declare void @llvm.va_start(ptr) nounwind
 
 ; Function Attrs: nounwind uwtable
 define void @VAStart(i32 %x, ...) sanitize_memory {
+; CHECK-LABEL: define void @VAStart(
+; CHECK-SAME: i32 [[X:%.*]], ...) #[[ATTR6]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = add i64 176, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800)
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false)
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = ptrtoint ptr [[X_ADDR]] to i64
+; CHECK-NEXT:    [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP7]], i8 -1, i64 4, i1 false)
+; CHECK-NEXT:    [[VA:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[VA]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
+; CHECK-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[TMP10]], i8 -1, i64 24, i1 false)
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[X_ADDR]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-NEXT:    store i32 [[TMP4]], ptr [[TMP13]], align 4
+; CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[VA]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080
+; CHECK-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP16]], i8 0, i64 24, i1 false)
+; CHECK-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
+; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr [[VA]] to i64
+; CHECK-NEXT:    [[TMP18:%.*]] = add i64 [[TMP17]], 16
+; CHECK-NEXT:    [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
+; CHECK-NEXT:    [[TMP20:%.*]] = load ptr, ptr [[TMP19]], align 8
+; CHECK-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[TMP20]] to i64
+; CHECK-NEXT:    [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080
+; CHECK-NEXT:    [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP23]], ptr align 16 [[TMP2]], i64 176, i1 false)
+; CHECK-NEXT:    [[TMP24:%.*]] = ptrtoint ptr [[VA]] to i64
+; CHECK-NEXT:    [[TMP25:%.*]] = add i64 [[TMP24]], 8
+; CHECK-NEXT:    [[TMP26:%.*]] = inttoptr i64 [[TMP25]] to ptr
+; CHECK-NEXT:    [[TMP27:%.*]] = load ptr, ptr [[TMP26]], align 8
+; CHECK-NEXT:    [[TMP28:%.*]] = ptrtoint ptr [[TMP27]] to i64
+; CHECK-NEXT:    [[TMP29:%.*]] = xor i64 [[TMP28]], 87960930222080
+; CHECK-NEXT:    [[TMP30:%.*]] = inttoptr i64 [[TMP29]] to ptr
+; CHECK-NEXT:    [[TMP31:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP30]], ptr align 16 [[TMP31]], i64 [[TMP0]], i1 false)
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @VAStart(
+; ORIGIN-SAME: i32 [[X:%.*]], ...) #[[ATTR6]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = add i64 176, [[TMP0]]
+; ORIGIN-NEXT:    [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8
+; ORIGIN-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false)
+; ORIGIN-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800)
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false)
+; ORIGIN-NEXT:    [[TMP4:%.*]] = alloca i8, i64 [[TMP1]], align 8
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP4]], ptr align 8 @__msan_va_arg_origin_tls, i64 [[TMP3]], i1 false)
+; ORIGIN-NEXT:    [[TMP5:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP6:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+; ORIGIN-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[X_ADDR]] to i64
+; ORIGIN-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; ORIGIN-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; ORIGIN-NEXT:    [[TMP10:%.*]] = add i64 [[TMP8]], 17592186044416
+; ORIGIN-NEXT:    [[TMP11:%.*]] = and i64 [[TMP10]], -4
+; ORIGIN-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP9]], i8 -1, i64 4, i1 false)
+; ORIGIN-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[X_ADDR]], i64 4, ptr @[[GLOB4:[0-9]+]], ptr @[[GLOB5:[0-9]+]])
+; ORIGIN-NEXT:    [[VA:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+; ORIGIN-NEXT:    [[TMP13:%.*]] = ptrtoint ptr [[VA]] to i64
+; ORIGIN-NEXT:    [[TMP14:%.*]] = xor i64 [[TMP13]], 87960930222080
+; ORIGIN-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
+; ORIGIN-NEXT:    [[TMP16:%.*]] = add i64 [[TMP14]], 17592186044416
+; ORIGIN-NEXT:    [[TMP17:%.*]] = and i64 [[TMP16]], -4
+; ORIGIN-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[TMP15]], i8 -1, i64 24, i1 false)
+; ORIGIN-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[VA]], i64 24, ptr @[[GLOB6:[0-9]+]], ptr @[[GLOB7:[0-9]+]])
+; ORIGIN-NEXT:    [[TMP19:%.*]] = ptrtoint ptr [[X_ADDR]] to i64
+; ORIGIN-NEXT:    [[TMP20:%.*]] = xor i64 [[TMP19]], 87960930222080
+; ORIGIN-NEXT:    [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr
+; ORIGIN-NEXT:    [[TMP22:%.*]] = add i64 [[TMP20]], 17592186044416
+; ORIGIN-NEXT:    [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr
+; ORIGIN-NEXT:    store i32 [[TMP5]], ptr [[TMP21]], align 4
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP5]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB24]]:
+; ORIGIN-NEXT:    store i32 [[TMP6]], ptr [[TMP23]], align 4
+; ORIGIN-NEXT:    br label %[[BB25]]
+; ORIGIN:       [[BB25]]:
+; ORIGIN-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+; ORIGIN-NEXT:    [[TMP26:%.*]] = ptrtoint ptr [[VA]] to i64
+; ORIGIN-NEXT:    [[TMP27:%.*]] = xor i64 [[TMP26]], 87960930222080
+; ORIGIN-NEXT:    [[TMP28:%.*]] = inttoptr i64 [[TMP27]] to ptr
+; ORIGIN-NEXT:    [[TMP29:%.*]] = add i64 [[TMP27]], 17592186044416
+; ORIGIN-NEXT:    [[TMP30:%.*]] = inttoptr i64 [[TMP29]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP28]], i8 0, i64 24, i1 false)
+; ORIGIN-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
+; ORIGIN-NEXT:    [[TMP31:%.*]] = ptrtoint ptr [[VA]] to i64
+; ORIGIN-NEXT:    [[TMP32:%.*]] = add i64 [[TMP31]], 16
+; ORIGIN-NEXT:    [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr
+; ORIGIN-NEXT:    [[TMP34:%.*]] = load ptr, ptr [[TMP33]], align 8
+; ORIGIN-NEXT:    [[TMP35:%.*]] = ptrtoint ptr [[TMP34]] to i64
+; ORIGIN-NEXT:    [[TMP36:%.*]] = xor i64 [[TMP35]], 87960930222080
+; ORIGIN-NEXT:    [[TMP37:%.*]] = inttoptr i64 [[TMP36]] to ptr
+; ORIGIN-NEXT:    [[TMP38:%.*]] = add i64 [[TMP36]], 17592186044416
+; ORIGIN-NEXT:    [[TMP39:%.*]] = inttoptr i64 [[TMP38]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP37]], ptr align 16 [[TMP2]], i64 176, i1 false)
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP39]], ptr align 16 [[TMP4]], i64 176, i1 false)
+; ORIGIN-NEXT:    [[TMP40:%.*]] = ptrtoint ptr [[VA]] to i64
+; ORIGIN-NEXT:    [[TMP41:%.*]] = add i64 [[TMP40]], 8
+; ORIGIN-NEXT:    [[TMP42:%.*]] = inttoptr i64 [[TMP41]] to ptr
+; ORIGIN-NEXT:    [[TMP43:%.*]] = load ptr, ptr [[TMP42]], align 8
+; ORIGIN-NEXT:    [[TMP44:%.*]] = ptrtoint ptr [[TMP43]] to i64
+; ORIGIN-NEXT:    [[TMP45:%.*]] = xor i64 [[TMP44]], 87960930222080
+; ORIGIN-NEXT:    [[TMP46:%.*]] = inttoptr i64 [[TMP45]] to ptr
+; ORIGIN-NEXT:    [[TMP47:%.*]] = add i64 [[TMP45]], 17592186044416
+; ORIGIN-NEXT:    [[TMP48:%.*]] = inttoptr i64 [[TMP47]] to ptr
+; ORIGIN-NEXT:    [[TMP49:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP46]], ptr align 16 [[TMP49]], i64 [[TMP0]], i1 false)
+; ORIGIN-NEXT:    [[TMP50:%.*]] = getelementptr i8, ptr [[TMP4]], i32 176
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP48]], ptr align 16 [[TMP50]], i64 [[TMP0]], i1 false)
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @VAStart(
+; CALLS-SAME: i32 [[X:%.*]], ...) #[[ATTR6]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = add i64 176, [[TMP0]]
+; CALLS-NEXT:    [[TMP2:%.*]] = alloca i8, i64 [[TMP1]], align 8
+; CALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP2]], i8 0, i64 [[TMP1]], i1 false)
+; CALLS-NEXT:    [[TMP3:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 800)
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP2]], ptr align 8 @__msan_va_arg_tls, i64 [[TMP3]], i1 false)
+; CALLS-NEXT:    [[TMP4:%.*]] = alloca i8, i64 [[TMP1]], align 8
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[TMP4]], ptr align 8 @__msan_va_arg_origin_tls, i64 [[TMP3]], i1 false)
+; CALLS-NEXT:    [[TMP5:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP6:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+; CALLS-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[X_ADDR]] to i64
+; CALLS-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CALLS-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CALLS-NEXT:    [[TMP10:%.*]] = add i64 [[TMP8]], 17592186044416
+; CALLS-NEXT:    [[TMP11:%.*]] = and i64 [[TMP10]], -4
+; CALLS-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP9]], i8 -1, i64 4, i1 false)
+; CALLS-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[X_ADDR]], i64 4, ptr @[[GLOB4:[0-9]+]], ptr @[[GLOB5:[0-9]+]])
+; CALLS-NEXT:    [[VA:%.*]] = alloca [1 x %struct.__va_list_tag], align 16
+; CALLS-NEXT:    [[TMP13:%.*]] = ptrtoint ptr [[VA]] to i64
+; CALLS-NEXT:    [[TMP14:%.*]] = xor i64 [[TMP13]], 87960930222080
+; CALLS-NEXT:    [[TMP15:%.*]] = inttoptr i64 [[TMP14]] to ptr
+; CALLS-NEXT:    [[TMP16:%.*]] = add i64 [[TMP14]], 17592186044416
+; CALLS-NEXT:    [[TMP17:%.*]] = and i64 [[TMP16]], -4
+; CALLS-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
+; CALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 16 [[TMP15]], i8 -1, i64 24, i1 false)
+; CALLS-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[VA]], i64 24, ptr @[[GLOB6:[0-9]+]], ptr @[[GLOB7:[0-9]+]])
+; CALLS-NEXT:    [[TMP19:%.*]] = ptrtoint ptr [[X_ADDR]] to i64
+; CALLS-NEXT:    [[TMP20:%.*]] = xor i64 [[TMP19]], 87960930222080
+; CALLS-NEXT:    [[TMP21:%.*]] = inttoptr i64 [[TMP20]] to ptr
+; CALLS-NEXT:    [[TMP22:%.*]] = add i64 [[TMP20]], 17592186044416
+; CALLS-NEXT:    [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr
+; CALLS-NEXT:    store i32 [[TMP5]], ptr [[TMP21]], align 4
+; CALLS-NEXT:    call void @__msan_maybe_store_origin_4(i32 zeroext [[TMP5]], ptr [[X_ADDR]], i32 zeroext [[TMP6]])
+; CALLS-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+; CALLS-NEXT:    [[TMP24:%.*]] = ptrtoint ptr [[VA]] to i64
+; CALLS-NEXT:    [[TMP25:%.*]] = xor i64 [[TMP24]], 87960930222080
+; CALLS-NEXT:    [[TMP26:%.*]] = inttoptr i64 [[TMP25]] to ptr
+; CALLS-NEXT:    [[TMP27:%.*]] = add i64 [[TMP25]], 17592186044416
+; CALLS-NEXT:    [[TMP28:%.*]] = inttoptr i64 [[TMP27]] to ptr
+; CALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP26]], i8 0, i64 24, i1 false)
+; CALLS-NEXT:    call void @llvm.va_start.p0(ptr [[VA]])
+; CALLS-NEXT:    [[TMP29:%.*]] = ptrtoint ptr [[VA]] to i64
+; CALLS-NEXT:    [[TMP30:%.*]] = add i64 [[TMP29]], 16
+; CALLS-NEXT:    [[TMP31:%.*]] = inttoptr i64 [[TMP30]] to ptr
+; CALLS-NEXT:    [[TMP32:%.*]] = load ptr, ptr [[TMP31]], align 8
+; CALLS-NEXT:    [[TMP33:%.*]] = ptrtoint ptr [[TMP32]] to i64
+; CALLS-NEXT:    [[TMP34:%.*]] = xor i64 [[TMP33]], 87960930222080
+; CALLS-NEXT:    [[TMP35:%.*]] = inttoptr i64 [[TMP34]] to ptr
+; CALLS-NEXT:    [[TMP36:%.*]] = add i64 [[TMP34]], 17592186044416
+; CALLS-NEXT:    [[TMP37:%.*]] = inttoptr i64 [[TMP36]] to ptr
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP35]], ptr align 16 [[TMP2]], i64 176, i1 false)
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP37]], ptr align 16 [[TMP4]], i64 176, i1 false)
+; CALLS-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[VA]] to i64
+; CALLS-NEXT:    [[TMP39:%.*]] = add i64 [[TMP38]], 8
+; CALLS-NEXT:    [[TMP40:%.*]] = inttoptr i64 [[TMP39]] to ptr
+; CALLS-NEXT:    [[TMP41:%.*]] = load ptr, ptr [[TMP40]], align 8
+; CALLS-NEXT:    [[TMP42:%.*]] = ptrtoint ptr [[TMP41]] to i64
+; CALLS-NEXT:    [[TMP43:%.*]] = xor i64 [[TMP42]], 87960930222080
+; CALLS-NEXT:    [[TMP44:%.*]] = inttoptr i64 [[TMP43]] to ptr
+; CALLS-NEXT:    [[TMP45:%.*]] = add i64 [[TMP43]], 17592186044416
+; CALLS-NEXT:    [[TMP46:%.*]] = inttoptr i64 [[TMP45]] to ptr
+; CALLS-NEXT:    [[TMP47:%.*]] = getelementptr i8, ptr [[TMP2]], i32 176
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP44]], ptr align 16 [[TMP47]], i64 [[TMP0]], i1 false)
+; CALLS-NEXT:    [[TMP48:%.*]] = getelementptr i8, ptr [[TMP4]], i32 176
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 16 [[TMP46]], ptr align 16 [[TMP48]], i64 [[TMP0]], i1 false)
+; CALLS-NEXT:    ret void
+;
 entry:
   %x.addr = alloca i32, align 4
   %va = alloca [1 x %struct.__va_list_tag], align 16
@@ -767,31 +3179,128 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: @VAStart
-; CHECK: call void @llvm.va_start
-; CHECK-NOT: @__msan_va_arg_tls
-; CHECK-NOT: @__msan_va_arg_overflow_size_tls
-; CHECK: ret void
 
 
 ; Test handling of volatile stores.
 ; Check that MemorySanitizer does not add a check of the value being stored.
 
 define void @VolatileStore(ptr nocapture %p, i32 %x) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: define void @VolatileStore(
+; CHECK-SAME: ptr nocapture [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    store i32 [[TMP0]], ptr [[TMP3]], align 4
+; CHECK-NEXT:    store volatile i32 [[X]], ptr [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @VolatileStore(
+; ORIGIN-SAME: ptr nocapture [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[P]] to i64
+; ORIGIN-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; ORIGIN-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; ORIGIN-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416
+; ORIGIN-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; ORIGIN-NEXT:    store i32 [[TMP0]], ptr [[TMP4]], align 4
+; ORIGIN-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP0]], 0
+; ORIGIN-NEXT:    br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
+; ORIGIN:       [[BB7]]:
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr [[TMP6]], align 4
+; ORIGIN-NEXT:    br label %[[BB8]]
+; ORIGIN:       [[BB8]]:
+; ORIGIN-NEXT:    store volatile i32 [[X]], ptr [[P]], align 4
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @VolatileStore(
+; CALLS-SAME: ptr nocapture [[P:%.*]], i32 [[X:%.*]]) #[[ATTR0]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[P]] to i64
+; CALLS-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
+; CALLS-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CALLS-NEXT:    [[TMP7:%.*]] = add i64 [[TMP5]], 17592186044416
+; CALLS-NEXT:    [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CALLS-NEXT:    store i32 [[TMP2]], ptr [[TMP6]], align 4
+; CALLS-NEXT:    call void @__msan_maybe_store_origin_4(i32 zeroext [[TMP2]], ptr [[P]], i32 zeroext [[TMP3]])
+; CALLS-NEXT:    store volatile i32 [[X]], ptr [[P]], align 4
+; CALLS-NEXT:    ret void
+;
 entry:
   store volatile i32 %x, ptr %p, align 4
   ret void
 }
 
-; CHECK-LABEL: @VolatileStore
-; CHECK-NOT: @__msan_warning_with_origin
-; CHECK: ret void
 
 
 ; Test that checks are omitted and returned value is always initialized if
 ; sanitize_memory attribute is missing.
 
 define i32 @NoSanitizeMemory(i32 %x) uwtable {
+; CHECK-LABEL: define i32 @NoSanitizeMemory(
+; CHECK-SAME: i32 [[X:%.*]]) #[[ATTR8:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i32 [[X]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 -1, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]]
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    tail call void @bar()
+; CHECK-NEXT:    br label %[[IF_END]]
+; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[X]]
+;
+; ORIGIN-LABEL: define i32 @NoSanitizeMemory(
+; ORIGIN-SAME: i32 [[X:%.*]]) #[[ATTR8:[0-9]+]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP0:%.*]] = xor i32 [[X]], 0
+; ORIGIN-NEXT:    [[TMP1:%.*]] = and i32 -1, [[TMP0]]
+; ORIGIN-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; ORIGIN-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]]
+; ORIGIN-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X]], 0
+; ORIGIN-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; ORIGIN:       [[IF_THEN]]:
+; ORIGIN-NEXT:    tail call void @bar()
+; ORIGIN-NEXT:    br label %[[IF_END]]
+; ORIGIN:       [[IF_END]]:
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[X]]
+;
+; CALLS-LABEL: define i32 @NoSanitizeMemory(
+; CALLS-SAME: i32 [[X:%.*]]) #[[ATTR8:[0-9]+]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP0:%.*]] = xor i32 [[X]], 0
+; CALLS-NEXT:    [[TMP1:%.*]] = and i32 -1, [[TMP0]]
+; CALLS-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CALLS-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]]
+; CALLS-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[X]], 0
+; CALLS-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
+; CALLS:       [[IF_THEN]]:
+; CALLS-NEXT:    tail call void @bar()
+; CALLS-NEXT:    br label %[[IF_END]]
+; CALLS:       [[IF_END]]:
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[X]]
+;
 entry:
   %tobool = icmp eq i32 %x, 0
   br i1 %tobool, label %if.end, label %if.then
@@ -806,17 +3315,67 @@ if.end:                                           ; preds = %entry, %if.then
 
 declare void @bar()
 
-; CHECK-LABEL: @NoSanitizeMemory
-; CHECK-NOT: @__msan_warning_with_origin
-; CHECK: store i32 0, {{.*}} @__msan_retval_tls
-; CHECK-NOT: @__msan_warning_with_origin
-; CHECK: ret i32
 
 
 ; Test that stack allocations are unpoisoned in functions missing
 ; sanitize_memory attribute
 
 define i32 @NoSanitizeMemoryAlloca() {
+; CHECK-LABEL: define i32 @NoSanitizeMemoryAlloca() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[P:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080
+; CHECK-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP2]], i8 0, i64 4, i1 false)
+; CHECK-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[X:%.*]] = call i32 @NoSanitizeMemoryAllocaHelper(ptr [[P]])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[X]]
+;
+; ORIGIN-LABEL: define i32 @NoSanitizeMemoryAlloca() {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[P:%.*]] = alloca i32, align 4
+; ORIGIN-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P]] to i64
+; ORIGIN-NEXT:    [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080
+; ORIGIN-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; ORIGIN-NEXT:    [[TMP3:%.*]] = add i64 [[TMP1]], 17592186044416
+; ORIGIN-NEXT:    [[TMP4:%.*]] = and i64 [[TMP3]], -4
+; ORIGIN-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP2]], i8 0, i64 4, i1 false)
+; ORIGIN-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    [[X:%.*]] = call i32 @NoSanitizeMemoryAllocaHelper(ptr [[P]])
+; ORIGIN-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    [[TMP6:%.*]] = load i32, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[X]]
+;
+; CALLS-LABEL: define i32 @NoSanitizeMemoryAlloca() {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[P:%.*]] = alloca i32, align 4
+; CALLS-NEXT:    [[TMP0:%.*]] = ptrtoint ptr [[P]] to i64
+; CALLS-NEXT:    [[TMP1:%.*]] = xor i64 [[TMP0]], 87960930222080
+; CALLS-NEXT:    [[TMP2:%.*]] = inttoptr i64 [[TMP1]] to ptr
+; CALLS-NEXT:    [[TMP3:%.*]] = add i64 [[TMP1]], 17592186044416
+; CALLS-NEXT:    [[TMP4:%.*]] = and i64 [[TMP3]], -4
+; CALLS-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 4 [[TMP2]], i8 0, i64 4, i1 false)
+; CALLS-NEXT:    store i64 0, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    [[X:%.*]] = call i32 @NoSanitizeMemoryAllocaHelper(ptr [[P]])
+; CALLS-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    [[TMP6:%.*]] = load i32, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[X]]
+;
 entry:
   %p = alloca i32, align 4
   %x = call i32 @NoSanitizeMemoryAllocaHelper(ptr %p)
@@ -825,16 +3384,46 @@ entry:
 
 declare i32 @NoSanitizeMemoryAllocaHelper(ptr %p)
 
-; CHECK-LABEL: @NoSanitizeMemoryAlloca
-; CHECK: call void @llvm.memset.p0.i64(ptr align 4 {{.*}}, i8 0, i64 4, i1 false)
-; CHECK: call i32 @NoSanitizeMemoryAllocaHelper(ptr
-; CHECK: ret i32
 
 
 ; Test that undef is unpoisoned in functions missing
 ; sanitize_memory attribute
 
 define i32 @NoSanitizeMemoryUndef() {
+; CHECK-LABEL: define i32 @NoSanitizeMemoryUndef() {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[X:%.*]] = call i32 @NoSanitizeMemoryUndefHelper(i32 undef)
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[X]]
+;
+; ORIGIN-LABEL: define i32 @NoSanitizeMemoryUndef() {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    [[X:%.*]] = call i32 @NoSanitizeMemoryUndefHelper(i32 undef)
+; ORIGIN-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[X]]
+;
+; CALLS-LABEL: define i32 @NoSanitizeMemoryUndef() {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    [[X:%.*]] = call i32 @NoSanitizeMemoryUndefHelper(i32 undef)
+; CALLS-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[X]]
+;
 entry:
   %x = call i32 @NoSanitizeMemoryUndefHelper(i32 undef)
   ret i32 %x
@@ -842,15 +3431,70 @@ entry:
 
 declare i32 @NoSanitizeMemoryUndefHelper(i32 %x)
 
-; CHECK-LABEL: @NoSanitizeMemoryUndef
-; CHECK: store i32 0, ptr @__msan_param_tls
-; CHECK: call i32 @NoSanitizeMemoryUndefHelper(i32 undef)
-; CHECK: ret i32
 
 
 ; Test PHINode instrumentation in ignorelisted functions
 
 define i32 @NoSanitizeMemoryPHI(i32 %x) {
+; CHECK-LABEL: define i32 @NoSanitizeMemoryPHI(
+; CHECK-SAME: i32 [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i32 [[X]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 -1, [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]]
+; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
+; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; CHECK:       [[COND_TRUE]]:
+; CHECK-NEXT:    br label %[[COND_END:.*]]
+; CHECK:       [[COND_FALSE]]:
+; CHECK-NEXT:    br label %[[COND_END]]
+; CHECK:       [[COND_END]]:
+; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ undef, %[[COND_TRUE]] ], [ undef, %[[COND_FALSE]] ]
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[COND]]
+;
+; ORIGIN-LABEL: define i32 @NoSanitizeMemoryPHI(
+; ORIGIN-SAME: i32 [[X:%.*]]) {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP0:%.*]] = xor i32 [[X]], 0
+; ORIGIN-NEXT:    [[TMP1:%.*]] = and i32 -1, [[TMP0]]
+; ORIGIN-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; ORIGIN-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]]
+; ORIGIN-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
+; ORIGIN-NEXT:    br i1 [[TOBOOL]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; ORIGIN:       [[COND_TRUE]]:
+; ORIGIN-NEXT:    br label %[[COND_END:.*]]
+; ORIGIN:       [[COND_FALSE]]:
+; ORIGIN-NEXT:    br label %[[COND_END]]
+; ORIGIN:       [[COND_END]]:
+; ORIGIN-NEXT:    [[COND:%.*]] = phi i32 [ undef, %[[COND_TRUE]] ], [ undef, %[[COND_FALSE]] ]
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[COND]]
+;
+; CALLS-LABEL: define i32 @NoSanitizeMemoryPHI(
+; CALLS-SAME: i32 [[X:%.*]]) {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP0:%.*]] = xor i32 [[X]], 0
+; CALLS-NEXT:    [[TMP1:%.*]] = and i32 -1, [[TMP0]]
+; CALLS-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CALLS-NEXT:    [[_MSPROP_ICMP:%.*]] = and i1 false, [[TMP2]]
+; CALLS-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
+; CALLS-NEXT:    br i1 [[TOBOOL]], label %[[COND_TRUE:.*]], label %[[COND_FALSE:.*]]
+; CALLS:       [[COND_TRUE]]:
+; CALLS-NEXT:    br label %[[COND_END:.*]]
+; CALLS:       [[COND_FALSE]]:
+; CALLS-NEXT:    br label %[[COND_END]]
+; CALLS:       [[COND_END]]:
+; CALLS-NEXT:    [[COND:%.*]] = phi i32 [ undef, %[[COND_TRUE]] ], [ undef, %[[COND_FALSE]] ]
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[COND]]
+;
 entry:
   %tobool = icmp ne i32 %x, 0
   br i1 %tobool, label %cond.true, label %cond.false
@@ -866,9 +3510,6 @@ cond.end:                                         ; preds = %cond.false, %cond.t
   ret i32 %cond
 }
 
-; CHECK: [[A:%.*]] = phi i32 [ undef, %cond.true ], [ undef, %cond.false ]
-; CHECK: store i32 0, ptr @__msan_retval_tls
-; CHECK: ret i32 [[A]]
 
 
 ; Test that there are no __msan_param_origin_tls stores when
@@ -876,6 +3517,46 @@ cond.end:                                         ; preds = %cond.false, %cond.t
 ; in functions missing sanitize_memory attribute).
 
 define i32 @NoSanitizeMemoryParamTLS(ptr nocapture readonly %x) {
+; CHECK-LABEL: define i32 @NoSanitizeMemoryParamTLS(
+; CHECK-SAME: ptr nocapture readonly [[X:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4
+; CHECK-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[CALL:%.*]] = tail call i32 @NoSanitizeMemoryParamTLSHelper(i32 [[TMP0]])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i32 [[CALL]]
+;
+; ORIGIN-LABEL: define i32 @NoSanitizeMemoryParamTLS(
+; ORIGIN-SAME: ptr nocapture readonly [[X:%.*]]) {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    [[CALL:%.*]] = tail call i32 @NoSanitizeMemoryParamTLSHelper(i32 [[TMP0]])
+; ORIGIN-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret i32 [[CALL]]
+;
+; CALLS-LABEL: define i32 @NoSanitizeMemoryParamTLS(
+; CALLS-SAME: ptr nocapture readonly [[X:%.*]]) {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X]], align 4
+; CALLS-NEXT:    store i32 0, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    [[CALL:%.*]] = tail call i32 @NoSanitizeMemoryParamTLSHelper(i32 [[TMP0]])
+; CALLS-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret i32 [[CALL]]
+;
 entry:
   %0 = load i32, ptr %x, align 4
   %call = tail call i32 @NoSanitizeMemoryParamTLSHelper(i32 %0)
@@ -884,51 +3565,107 @@ entry:
 
 declare i32 @NoSanitizeMemoryParamTLSHelper(i32 %x)
 
-; CHECK-LABEL: define i32 @NoSanitizeMemoryParamTLS(
-; CHECK-NOT: __msan_param_origin_tls
-; CHECK: ret i32
 
 
 ; Test argument shadow alignment
 
 define <2 x i64> @ArgumentShadowAlignment(i64 %a, <2 x i64> %b) sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @ArgumentShadowAlignment(
+; CHECK-SAME: i64 [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store <2 x i64> [[TMP0]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret <2 x i64> [[B]]
+;
+; ORIGIN-LABEL: define <2 x i64> @ArgumentShadowAlignment(
+; ORIGIN-SAME: i64 [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR6]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    store <2 x i64> [[TMP0]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret <2 x i64> [[B]]
+;
+; CALLS-LABEL: define <2 x i64> @ArgumentShadowAlignment(
+; CALLS-SAME: i64 [[A:%.*]], <2 x i64> [[B:%.*]]) #[[ATTR6]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    store <2 x i64> [[TMP0]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP1]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret <2 x i64> [[B]]
+;
 entry:
   ret <2 x i64> %b
 }
 
-; CHECK-LABEL: @ArgumentShadowAlignment
-; CHECK: load <2 x i64>, ptr {{.*}} @__msan_param_tls {{.*}}, align 8
-; CHECK: store <2 x i64> {{.*}}, ptr @__msan_retval_tls, align 8
-; CHECK: ret <2 x i64>
 
 
 ; Test origin propagation for insertvalue
 
 define { i64, i32 } @make_pair_64_32(i64 %x, i32 %y) sanitize_memory {
+; CHECK-LABEL: define { i64, i32 } @make_pair_64_32(
+; CHECK-SAME: i64 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue { i64, i32 } { i64 -1, i32 -1 }, i64 [[TMP0]], 0
+; CHECK-NEXT:    [[A:%.*]] = insertvalue { i64, i32 } undef, i64 [[X]], 0
+; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue { i64, i32 } [[TMP2]], i32 [[TMP1]], 1
+; CHECK-NEXT:    [[B:%.*]] = insertvalue { i64, i32 } [[A]], i32 [[Y]], 1
+; CHECK-NEXT:    store { i64, i32 } [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret { i64, i32 } [[B]]
+;
+; ORIGIN-LABEL: define { i64, i32 } @make_pair_64_32(
+; ORIGIN-SAME: i64 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR6]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[TMP4:%.*]] = insertvalue { i64, i32 } { i64 -1, i32 -1 }, i64 [[TMP0]], 0
+; ORIGIN-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP0]], 0
+; ORIGIN-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP1]], i32 0
+; ORIGIN-NEXT:    [[A:%.*]] = insertvalue { i64, i32 } undef, i64 [[X]], 0
+; ORIGIN-NEXT:    [[TMP7:%.*]] = insertvalue { i64, i32 } [[TMP4]], i32 [[TMP2]], 1
+; ORIGIN-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP2]], 0
+; ORIGIN-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP3]], i32 [[TMP6]]
+; ORIGIN-NEXT:    [[B:%.*]] = insertvalue { i64, i32 } [[A]], i32 [[Y]], 1
+; ORIGIN-NEXT:    store { i64, i32 } [[TMP7]], ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP9]], ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret { i64, i32 } [[B]]
+;
+; CALLS-LABEL: define { i64, i32 } @make_pair_64_32(
+; CALLS-SAME: i64 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR6]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[TMP4:%.*]] = insertvalue { i64, i32 } { i64 -1, i32 -1 }, i64 [[TMP0]], 0
+; CALLS-NEXT:    [[TMP5:%.*]] = icmp ne i64 [[TMP0]], 0
+; CALLS-NEXT:    [[TMP6:%.*]] = select i1 [[TMP5]], i32 [[TMP1]], i32 0
+; CALLS-NEXT:    [[A:%.*]] = insertvalue { i64, i32 } undef, i64 [[X]], 0
+; CALLS-NEXT:    [[TMP7:%.*]] = insertvalue { i64, i32 } [[TMP4]], i32 [[TMP2]], 1
+; CALLS-NEXT:    [[TMP8:%.*]] = icmp ne i32 [[TMP2]], 0
+; CALLS-NEXT:    [[TMP9:%.*]] = select i1 [[TMP8]], i32 [[TMP3]], i32 [[TMP6]]
+; CALLS-NEXT:    [[B:%.*]] = insertvalue { i64, i32 } [[A]], i32 [[Y]], 1
+; CALLS-NEXT:    store { i64, i32 } [[TMP7]], ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP9]], ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret { i64, i32 } [[B]]
+;
 entry:
   %a = insertvalue { i64, i32 } undef, i64 %x, 0
   %b = insertvalue { i64, i32 } %a, i32 %y, 1
   ret { i64, i32 } %b
 }
 
-; ORIGINS: @make_pair_64_32
-; First element shadow
-; ORIGINS: insertvalue { i64, i32 } { i64 -1, i32 -1 }, i64 {{.*}}, 0
-; First element origin
-; ORIGINS: icmp ne i64
-; ORIGINS: select i1
-; First element app value
-; ORIGINS: insertvalue { i64, i32 } undef, i64 {{.*}}, 0
-; Second element shadow
-; ORIGINS: insertvalue { i64, i32 } {{.*}}, i32 {{.*}}, 1
-; Second element origin
-; ORIGINS: icmp ne i32
-; ORIGINS: select i1
-; Second element app value
-; ORIGINS: insertvalue { i64, i32 } {{.*}}, i32 {{.*}}, 1
-; ORIGINS: ret { i64, i32 }
-
-
 ; Test shadow propagation for aggregates passed through ellipsis.
 
 %struct.StructByVal = type { i32, i32, i32, i32 }
@@ -936,6 +3673,219 @@ entry:
 declare void @VAArgStructFn(i32 %guard, ...)
 
 define void @VAArgStruct(ptr nocapture %s) sanitize_memory {
+; CHECK-LABEL: define void @VAArgStruct(
+; CHECK-SAME: ptr nocapture [[S:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[AGG_TMP2:%.*]] = alloca [[STRUCT_STRUCTBYVAL:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 -1, i64 16, i1 false)
+; CHECK-NEXT:    [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[S]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[S]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP0]], 0
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0
+; CHECK-NEXT:    [[AGG_TMP_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCTBYVAL]], ptr [[S]], i64 0, i32 2
+; CHECK-NEXT:    [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0__SROA_IDX]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[AGG_TMP_SROA_2_0__SROA_IDX]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT:    [[_MSLD2:%.*]] = load i64, ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @__msan_memcpy(ptr [[AGG_TMP2]], ptr [[S]], i64 16)
+; CHECK-NEXT:    store i32 -1, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), ptr align 8 [[TMP13]], i64 16, i1 false)
+; CHECK-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080
+; CHECK-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 176) to ptr), ptr align 8 [[TMP16]], i64 16, i1 false)
+; CHECK-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void (i32, ...) @VAArgStructFn(i32 undef, i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], ptr byval([[STRUCT_STRUCTBYVAL]]) align 8 [[AGG_TMP2]])
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @VAArgStruct(
+; ORIGIN-SAME: ptr nocapture [[S:%.*]]) #[[ATTR6]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[AGG_TMP2:%.*]] = alloca [[STRUCT_STRUCTBYVAL:%.*]], align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; ORIGIN-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; ORIGIN-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; ORIGIN-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416
+; ORIGIN-NEXT:    [[TMP6:%.*]] = and i64 [[TMP5]], -4
+; ORIGIN-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP4]], i8 -1, i64 16, i1 false)
+; ORIGIN-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[AGG_TMP2]], i64 16, ptr @[[GLOB8:[0-9]+]], ptr @[[GLOB9:[0-9]+]])
+; ORIGIN-NEXT:    [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[S]], align 4
+; ORIGIN-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[S]] to i64
+; ORIGIN-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
+; ORIGIN-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; ORIGIN-NEXT:    [[TMP11:%.*]] = add i64 [[TMP9]], 17592186044416
+; ORIGIN-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; ORIGIN-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP10]], align 4
+; ORIGIN-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; ORIGIN-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP0]], 0
+; ORIGIN-NEXT:    [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0
+; ORIGIN-NEXT:    [[AGG_TMP_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCTBYVAL]], ptr [[S]], i64 0, i32 2
+; ORIGIN-NEXT:    [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0__SROA_IDX]], align 4
+; ORIGIN-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[AGG_TMP_SROA_2_0__SROA_IDX]] to i64
+; ORIGIN-NEXT:    [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080
+; ORIGIN-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; ORIGIN-NEXT:    [[TMP17:%.*]] = add i64 [[TMP15]], 17592186044416
+; ORIGIN-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
+; ORIGIN-NEXT:    [[_MSLD2:%.*]] = load i64, ptr [[TMP16]], align 4
+; ORIGIN-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+; ORIGIN-NEXT:    [[TMP20:%.*]] = call ptr @__msan_memcpy(ptr [[AGG_TMP2]], ptr [[S]], i64 16)
+; ORIGIN-NEXT:    store i32 -1, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    store i32 [[TMP13]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    store i32 [[TMP19]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; ORIGIN-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; ORIGIN-NEXT:    store i32 [[TMP13]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4
+; ORIGIN-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; ORIGIN-NEXT:    store i32 [[TMP19]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4
+; ORIGIN-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; ORIGIN-NEXT:    [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080
+; ORIGIN-NEXT:    [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr
+; ORIGIN-NEXT:    [[TMP24:%.*]] = add i64 [[TMP22]], 17592186044416
+; ORIGIN-NEXT:    [[TMP25:%.*]] = inttoptr i64 [[TMP24]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), ptr align 8 [[TMP23]], i64 16, i1 false)
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 40) to ptr), ptr align 4 [[TMP25]], i64 16, i1 false)
+; ORIGIN-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP26:%.*]] = zext i32 [[TMP13]] to i64
+; ORIGIN-NEXT:    [[TMP27:%.*]] = shl i64 [[TMP26]], 32
+; ORIGIN-NEXT:    [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; ORIGIN-NEXT:    store i64 [[TMP28]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP29:%.*]] = zext i32 [[TMP19]] to i64
+; ORIGIN-NEXT:    [[TMP30:%.*]] = shl i64 [[TMP29]], 32
+; ORIGIN-NEXT:    [[TMP31:%.*]] = or i64 [[TMP29]], [[TMP30]]
+; ORIGIN-NEXT:    store i64 [[TMP31]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 24) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP32:%.*]] = zext i32 [[TMP13]] to i64
+; ORIGIN-NEXT:    [[TMP33:%.*]] = shl i64 [[TMP32]], 32
+; ORIGIN-NEXT:    [[TMP34:%.*]] = or i64 [[TMP32]], [[TMP33]]
+; ORIGIN-NEXT:    store i64 [[TMP34]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 24) to ptr), align 8
+; ORIGIN-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 32) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP35:%.*]] = zext i32 [[TMP19]] to i64
+; ORIGIN-NEXT:    [[TMP36:%.*]] = shl i64 [[TMP35]], 32
+; ORIGIN-NEXT:    [[TMP37:%.*]] = or i64 [[TMP35]], [[TMP36]]
+; ORIGIN-NEXT:    store i64 [[TMP37]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 32) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; ORIGIN-NEXT:    [[TMP39:%.*]] = xor i64 [[TMP38]], 87960930222080
+; ORIGIN-NEXT:    [[TMP40:%.*]] = inttoptr i64 [[TMP39]] to ptr
+; ORIGIN-NEXT:    [[TMP41:%.*]] = add i64 [[TMP39]], 17592186044416
+; ORIGIN-NEXT:    [[TMP42:%.*]] = inttoptr i64 [[TMP41]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 176) to ptr), ptr align 8 [[TMP40]], i64 16, i1 false)
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 176) to ptr), ptr align 8 [[TMP42]], i64 16, i1 false)
+; ORIGIN-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; ORIGIN-NEXT:    call void (i32, ...) @VAArgStructFn(i32 undef, i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], ptr byval([[STRUCT_STRUCTBYVAL]]) align 8 [[AGG_TMP2]])
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @VAArgStruct(
+; CALLS-SAME: ptr nocapture [[S:%.*]]) #[[ATTR6]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[AGG_TMP2:%.*]] = alloca [[STRUCT_STRUCTBYVAL:%.*]], align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CALLS-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; CALLS-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CALLS-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416
+; CALLS-NEXT:    [[TMP6:%.*]] = and i64 [[TMP5]], -4
+; CALLS-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP4]], i8 -1, i64 16, i1 false)
+; CALLS-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[AGG_TMP2]], i64 16, ptr @[[GLOB8:[0-9]+]], ptr @[[GLOB9:[0-9]+]])
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[S]], align 4
+; CALLS-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[S]] to i64
+; CALLS-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
+; CALLS-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CALLS-NEXT:    [[TMP11:%.*]] = add i64 [[TMP9]], 17592186044416
+; CALLS-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CALLS-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP10]], align 4
+; CALLS-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CALLS-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP0]], 0
+; CALLS-NEXT:    [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0
+; CALLS-NEXT:    [[AGG_TMP_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCTBYVAL]], ptr [[S]], i64 0, i32 2
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[_MSPROP1]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0__SROA_IDX]], align 4
+; CALLS-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[AGG_TMP_SROA_2_0__SROA_IDX]] to i64
+; CALLS-NEXT:    [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080
+; CALLS-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; CALLS-NEXT:    [[TMP17:%.*]] = add i64 [[TMP15]], 17592186044416
+; CALLS-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
+; CALLS-NEXT:    [[_MSLD2:%.*]] = load i64, ptr [[TMP16]], align 4
+; CALLS-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+; CALLS-NEXT:    [[TMP20:%.*]] = call ptr @__msan_memcpy(ptr [[AGG_TMP2]], ptr [[S]], i64 16)
+; CALLS-NEXT:    store i32 -1, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    store i32 [[TMP13]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    store i32 [[TMP19]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; CALLS-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CALLS-NEXT:    store i32 [[TMP13]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4
+; CALLS-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CALLS-NEXT:    store i32 [[TMP19]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4
+; CALLS-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CALLS-NEXT:    [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080
+; CALLS-NEXT:    [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr
+; CALLS-NEXT:    [[TMP24:%.*]] = add i64 [[TMP22]], 17592186044416
+; CALLS-NEXT:    [[TMP25:%.*]] = inttoptr i64 [[TMP24]] to ptr
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), ptr align 8 [[TMP23]], i64 16, i1 false)
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 40) to ptr), ptr align 4 [[TMP25]], i64 16, i1 false)
+; CALLS-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP26:%.*]] = zext i32 [[TMP13]] to i64
+; CALLS-NEXT:    [[TMP27:%.*]] = shl i64 [[TMP26]], 32
+; CALLS-NEXT:    [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; CALLS-NEXT:    store i64 [[TMP28]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    [[TMP29:%.*]] = zext i32 [[TMP19]] to i64
+; CALLS-NEXT:    [[TMP30:%.*]] = shl i64 [[TMP29]], 32
+; CALLS-NEXT:    [[TMP31:%.*]] = or i64 [[TMP29]], [[TMP30]]
+; CALLS-NEXT:    store i64 [[TMP31]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 24) to ptr), align 8
+; CALLS-NEXT:    [[TMP32:%.*]] = zext i32 [[TMP13]] to i64
+; CALLS-NEXT:    [[TMP33:%.*]] = shl i64 [[TMP32]], 32
+; CALLS-NEXT:    [[TMP34:%.*]] = or i64 [[TMP32]], [[TMP33]]
+; CALLS-NEXT:    store i64 [[TMP34]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 24) to ptr), align 8
+; CALLS-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 32) to ptr), align 8
+; CALLS-NEXT:    [[TMP35:%.*]] = zext i32 [[TMP19]] to i64
+; CALLS-NEXT:    [[TMP36:%.*]] = shl i64 [[TMP35]], 32
+; CALLS-NEXT:    [[TMP37:%.*]] = or i64 [[TMP35]], [[TMP36]]
+; CALLS-NEXT:    store i64 [[TMP37]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 32) to ptr), align 8
+; CALLS-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CALLS-NEXT:    [[TMP39:%.*]] = xor i64 [[TMP38]], 87960930222080
+; CALLS-NEXT:    [[TMP40:%.*]] = inttoptr i64 [[TMP39]] to ptr
+; CALLS-NEXT:    [[TMP41:%.*]] = add i64 [[TMP39]], 17592186044416
+; CALLS-NEXT:    [[TMP42:%.*]] = inttoptr i64 [[TMP41]] to ptr
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 176) to ptr), ptr align 8 [[TMP40]], i64 16, i1 false)
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 176) to ptr), ptr align 8 [[TMP42]], i64 16, i1 false)
+; CALLS-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CALLS-NEXT:    call void (i32, ...) @VAArgStructFn(i32 undef, i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], ptr byval([[STRUCT_STRUCTBYVAL]]) align 8 [[AGG_TMP2]])
+; CALLS-NEXT:    ret void
+;
 entry:
   %agg.tmp2 = alloca %struct.StructByVal, align 8
   %agg.tmp.sroa.0.0.copyload = load i64, ptr %s, align 4
@@ -946,27 +3896,223 @@ entry:
   ret void
 }
 
-; "undef" and the first 2 structs go to general purpose registers;
-; the third struct goes to the overflow area byval
-
-; CHECK-LABEL: @VAArgStruct
-; undef not stored to __msan_va_arg_tls - it's a fixed argument
-; first struct through general purpose registers
-; CHECK: store i64 {{.*}}, ptr {{.*}}@__msan_va_arg_tls{{.*}}, i64 8){{.*}}, align 8
-; CHECK: store i64 {{.*}}, ptr {{.*}}@__msan_va_arg_tls{{.*}}, i64 16){{.*}}, align 8
-; second struct through general purpose registers
-; CHECK: store i64 {{.*}}, ptr {{.*}}@__msan_va_arg_tls{{.*}}, i64 24){{.*}}, align 8
-; CHECK: store i64 {{.*}}, ptr {{.*}}@__msan_va_arg_tls{{.*}}, i64 32){{.*}}, align 8
-; third struct through the overflow area byval
-; CHECK: ptrtoint ptr {{.*}} to i64
-; CHECK: call void @llvm.memcpy.p0.p0.i64{{.*}}@__msan_va_arg_tls {{.*}}, i64 176
-; CHECK: store i64 16, ptr @__msan_va_arg_overflow_size_tls
-; CHECK: call void (i32, ...) @VAArgStructFn
-; CHECK: ret void
 
 ; Same code compiled without SSE (see attributes below).
 ; The register save area is only 48 bytes instead of 176.
 define void @VAArgStructNoSSE(ptr nocapture %s) sanitize_memory #0 {
+; CHECK-LABEL: define void @VAArgStructNoSSE(
+; CHECK-SAME: ptr nocapture [[S:%.*]]) #[[ATTR9:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[AGG_TMP2:%.*]] = alloca [[STRUCT_STRUCTBYVAL:%.*]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP3]], i8 -1, i64 16, i1 false)
+; CHECK-NEXT:    [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[S]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[S]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
+; CHECK-NEXT:    [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP0]], 0
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0
+; CHECK-NEXT:    [[AGG_TMP_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCTBYVAL]], ptr [[S]], i64 0, i32 2
+; CHECK-NEXT:    [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0__SROA_IDX]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = ptrtoint ptr [[AGG_TMP_SROA_2_0__SROA_IDX]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT:    [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT:    [[_MSLD2:%.*]] = load i64, ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = call ptr @__msan_memcpy(ptr [[AGG_TMP2]], ptr [[S]], i64 16)
+; CHECK-NEXT:    store i32 -1, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT:    [[TMP11:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
+; CHECK-NEXT:    [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), ptr align 8 [[TMP13]], i64 16, i1 false)
+; CHECK-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CHECK-NEXT:    [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080
+; CHECK-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 48) to ptr), ptr align 8 [[TMP16]], i64 16, i1 false)
+; CHECK-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CHECK-NEXT:    call void (i32, ...) @VAArgStructFn(i32 undef, i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], ptr byval([[STRUCT_STRUCTBYVAL]]) align 8 [[AGG_TMP2]])
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @VAArgStructNoSSE(
+; ORIGIN-SAME: ptr nocapture [[S:%.*]]) #[[ATTR9:[0-9]+]] {
+; ORIGIN-NEXT:  [[ENTRY:.*:]]
+; ORIGIN-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    [[AGG_TMP2:%.*]] = alloca [[STRUCT_STRUCTBYVAL:%.*]], align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; ORIGIN-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; ORIGIN-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; ORIGIN-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416
+; ORIGIN-NEXT:    [[TMP6:%.*]] = and i64 [[TMP5]], -4
+; ORIGIN-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP4]], i8 -1, i64 16, i1 false)
+; ORIGIN-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[AGG_TMP2]], i64 16, ptr @[[GLOB10:[0-9]+]], ptr @[[GLOB11:[0-9]+]])
+; ORIGIN-NEXT:    [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[S]], align 4
+; ORIGIN-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[S]] to i64
+; ORIGIN-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
+; ORIGIN-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; ORIGIN-NEXT:    [[TMP11:%.*]] = add i64 [[TMP9]], 17592186044416
+; ORIGIN-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; ORIGIN-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP10]], align 4
+; ORIGIN-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; ORIGIN-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP0]], 0
+; ORIGIN-NEXT:    [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0
+; ORIGIN-NEXT:    [[AGG_TMP_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCTBYVAL]], ptr [[S]], i64 0, i32 2
+; ORIGIN-NEXT:    [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0__SROA_IDX]], align 4
+; ORIGIN-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[AGG_TMP_SROA_2_0__SROA_IDX]] to i64
+; ORIGIN-NEXT:    [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080
+; ORIGIN-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; ORIGIN-NEXT:    [[TMP17:%.*]] = add i64 [[TMP15]], 17592186044416
+; ORIGIN-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
+; ORIGIN-NEXT:    [[_MSLD2:%.*]] = load i64, ptr [[TMP16]], align 4
+; ORIGIN-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+; ORIGIN-NEXT:    [[TMP20:%.*]] = call ptr @__msan_memcpy(ptr [[AGG_TMP2]], ptr [[S]], i64 16)
+; ORIGIN-NEXT:    store i32 -1, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    store i32 [[TMP13]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; ORIGIN-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    store i32 [[TMP19]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; ORIGIN-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; ORIGIN-NEXT:    store i32 [[TMP13]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4
+; ORIGIN-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; ORIGIN-NEXT:    store i32 [[TMP19]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4
+; ORIGIN-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; ORIGIN-NEXT:    [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080
+; ORIGIN-NEXT:    [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr
+; ORIGIN-NEXT:    [[TMP24:%.*]] = add i64 [[TMP22]], 17592186044416
+; ORIGIN-NEXT:    [[TMP25:%.*]] = inttoptr i64 [[TMP24]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), ptr align 8 [[TMP23]], i64 16, i1 false)
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 40) to ptr), ptr align 4 [[TMP25]], i64 16, i1 false)
+; ORIGIN-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP26:%.*]] = zext i32 [[TMP13]] to i64
+; ORIGIN-NEXT:    [[TMP27:%.*]] = shl i64 [[TMP26]], 32
+; ORIGIN-NEXT:    [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; ORIGIN-NEXT:    store i64 [[TMP28]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 8) to ptr), align 8
+; ORIGIN-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP29:%.*]] = zext i32 [[TMP19]] to i64
+; ORIGIN-NEXT:    [[TMP30:%.*]] = shl i64 [[TMP29]], 32
+; ORIGIN-NEXT:    [[TMP31:%.*]] = or i64 [[TMP29]], [[TMP30]]
+; ORIGIN-NEXT:    store i64 [[TMP31]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 16) to ptr), align 8
+; ORIGIN-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 24) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP32:%.*]] = zext i32 [[TMP13]] to i64
+; ORIGIN-NEXT:    [[TMP33:%.*]] = shl i64 [[TMP32]], 32
+; ORIGIN-NEXT:    [[TMP34:%.*]] = or i64 [[TMP32]], [[TMP33]]
+; ORIGIN-NEXT:    store i64 [[TMP34]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 24) to ptr), align 8
+; ORIGIN-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 32) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP35:%.*]] = zext i32 [[TMP19]] to i64
+; ORIGIN-NEXT:    [[TMP36:%.*]] = shl i64 [[TMP35]], 32
+; ORIGIN-NEXT:    [[TMP37:%.*]] = or i64 [[TMP35]], [[TMP36]]
+; ORIGIN-NEXT:    store i64 [[TMP37]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 32) to ptr), align 8
+; ORIGIN-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; ORIGIN-NEXT:    [[TMP39:%.*]] = xor i64 [[TMP38]], 87960930222080
+; ORIGIN-NEXT:    [[TMP40:%.*]] = inttoptr i64 [[TMP39]] to ptr
+; ORIGIN-NEXT:    [[TMP41:%.*]] = add i64 [[TMP39]], 17592186044416
+; ORIGIN-NEXT:    [[TMP42:%.*]] = inttoptr i64 [[TMP41]] to ptr
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 48) to ptr), ptr align 8 [[TMP40]], i64 16, i1 false)
+; ORIGIN-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 48) to ptr), ptr align 8 [[TMP42]], i64 16, i1 false)
+; ORIGIN-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; ORIGIN-NEXT:    call void (i32, ...) @VAArgStructFn(i32 undef, i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], ptr byval([[STRUCT_STRUCTBYVAL]]) align 8 [[AGG_TMP2]])
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @VAArgStructNoSSE(
+; CALLS-SAME: ptr nocapture [[S:%.*]]) #[[ATTR9:[0-9]+]] {
+; CALLS-NEXT:  [[ENTRY:.*:]]
+; CALLS-NEXT:    [[TMP0:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    [[AGG_TMP2:%.*]] = alloca [[STRUCT_STRUCTBYVAL:%.*]], align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CALLS-NEXT:    [[TMP3:%.*]] = xor i64 [[TMP2]], 87960930222080
+; CALLS-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr
+; CALLS-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], 17592186044416
+; CALLS-NEXT:    [[TMP6:%.*]] = and i64 [[TMP5]], -4
+; CALLS-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CALLS-NEXT:    call void @llvm.memset.p0.i64(ptr align 8 [[TMP4]], i8 -1, i64 16, i1 false)
+; CALLS-NEXT:    call void @__msan_set_alloca_origin_with_descr(ptr [[AGG_TMP2]], i64 16, ptr @[[GLOB10:[0-9]+]], ptr @[[GLOB11:[0-9]+]])
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[TMP0]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[AGG_TMP_SROA_0_0_COPYLOAD:%.*]] = load i64, ptr [[S]], align 4
+; CALLS-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[S]] to i64
+; CALLS-NEXT:    [[TMP9:%.*]] = xor i64 [[TMP8]], 87960930222080
+; CALLS-NEXT:    [[TMP10:%.*]] = inttoptr i64 [[TMP9]] to ptr
+; CALLS-NEXT:    [[TMP11:%.*]] = add i64 [[TMP9]], 17592186044416
+; CALLS-NEXT:    [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CALLS-NEXT:    [[_MSLD:%.*]] = load i64, ptr [[TMP10]], align 4
+; CALLS-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
+; CALLS-NEXT:    [[_MSPROP:%.*]] = or i64 [[TMP0]], 0
+; CALLS-NEXT:    [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0
+; CALLS-NEXT:    [[AGG_TMP_SROA_2_0__SROA_IDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCTBYVAL]], ptr [[S]], i64 0, i32 2
+; CALLS-NEXT:    call void @__msan_maybe_warning_8(i64 zeroext [[_MSPROP1]], i32 zeroext [[TMP1]])
+; CALLS-NEXT:    [[AGG_TMP_SROA_2_0_COPYLOAD:%.*]] = load i64, ptr [[AGG_TMP_SROA_2_0__SROA_IDX]], align 4
+; CALLS-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[AGG_TMP_SROA_2_0__SROA_IDX]] to i64
+; CALLS-NEXT:    [[TMP15:%.*]] = xor i64 [[TMP14]], 87960930222080
+; CALLS-NEXT:    [[TMP16:%.*]] = inttoptr i64 [[TMP15]] to ptr
+; CALLS-NEXT:    [[TMP17:%.*]] = add i64 [[TMP15]], 17592186044416
+; CALLS-NEXT:    [[TMP18:%.*]] = inttoptr i64 [[TMP17]] to ptr
+; CALLS-NEXT:    [[_MSLD2:%.*]] = load i64, ptr [[TMP16]], align 4
+; CALLS-NEXT:    [[TMP19:%.*]] = load i32, ptr [[TMP18]], align 4
+; CALLS-NEXT:    [[TMP20:%.*]] = call ptr @__msan_memcpy(ptr [[AGG_TMP2]], ptr [[S]], i64 16)
+; CALLS-NEXT:    store i32 -1, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    store i32 0, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    store i32 [[TMP13]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 8) to ptr), align 4
+; CALLS-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    store i32 [[TMP19]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 16) to ptr), align 4
+; CALLS-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CALLS-NEXT:    store i32 [[TMP13]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 24) to ptr), align 4
+; CALLS-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CALLS-NEXT:    store i32 [[TMP19]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 32) to ptr), align 4
+; CALLS-NEXT:    [[TMP21:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CALLS-NEXT:    [[TMP22:%.*]] = xor i64 [[TMP21]], 87960930222080
+; CALLS-NEXT:    [[TMP23:%.*]] = inttoptr i64 [[TMP22]] to ptr
+; CALLS-NEXT:    [[TMP24:%.*]] = add i64 [[TMP22]], 17592186044416
+; CALLS-NEXT:    [[TMP25:%.*]] = inttoptr i64 [[TMP24]] to ptr
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), ptr align 8 [[TMP23]], i64 16, i1 false)
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 4 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_origin_tls to i64), i64 40) to ptr), ptr align 4 [[TMP25]], i64 16, i1 false)
+; CALLS-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    [[TMP26:%.*]] = zext i32 [[TMP13]] to i64
+; CALLS-NEXT:    [[TMP27:%.*]] = shl i64 [[TMP26]], 32
+; CALLS-NEXT:    [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; CALLS-NEXT:    store i64 [[TMP28]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 8) to ptr), align 8
+; CALLS-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    [[TMP29:%.*]] = zext i32 [[TMP19]] to i64
+; CALLS-NEXT:    [[TMP30:%.*]] = shl i64 [[TMP29]], 32
+; CALLS-NEXT:    [[TMP31:%.*]] = or i64 [[TMP29]], [[TMP30]]
+; CALLS-NEXT:    store i64 [[TMP31]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 16) to ptr), align 8
+; CALLS-NEXT:    store i64 [[_MSLD]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 24) to ptr), align 8
+; CALLS-NEXT:    [[TMP32:%.*]] = zext i32 [[TMP13]] to i64
+; CALLS-NEXT:    [[TMP33:%.*]] = shl i64 [[TMP32]], 32
+; CALLS-NEXT:    [[TMP34:%.*]] = or i64 [[TMP32]], [[TMP33]]
+; CALLS-NEXT:    store i64 [[TMP34]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 24) to ptr), align 8
+; CALLS-NEXT:    store i64 [[_MSLD2]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 32) to ptr), align 8
+; CALLS-NEXT:    [[TMP35:%.*]] = zext i32 [[TMP19]] to i64
+; CALLS-NEXT:    [[TMP36:%.*]] = shl i64 [[TMP35]], 32
+; CALLS-NEXT:    [[TMP37:%.*]] = or i64 [[TMP35]], [[TMP36]]
+; CALLS-NEXT:    store i64 [[TMP37]], ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 32) to ptr), align 8
+; CALLS-NEXT:    [[TMP38:%.*]] = ptrtoint ptr [[AGG_TMP2]] to i64
+; CALLS-NEXT:    [[TMP39:%.*]] = xor i64 [[TMP38]], 87960930222080
+; CALLS-NEXT:    [[TMP40:%.*]] = inttoptr i64 [[TMP39]] to ptr
+; CALLS-NEXT:    [[TMP41:%.*]] = add i64 [[TMP39]], 17592186044416
+; CALLS-NEXT:    [[TMP42:%.*]] = inttoptr i64 [[TMP41]] to ptr
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_tls to i64), i64 48) to ptr), ptr align 8 [[TMP40]], i64 16, i1 false)
+; CALLS-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr align 8 inttoptr (i64 add (i64 ptrtoint (ptr @__msan_va_arg_origin_tls to i64), i64 48) to ptr), ptr align 8 [[TMP42]], i64 16, i1 false)
+; CALLS-NEXT:    store i64 16, ptr @__msan_va_arg_overflow_size_tls, align 8
+; CALLS-NEXT:    call void (i32, ...) @VAArgStructFn(i32 undef, i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], i64 [[AGG_TMP_SROA_0_0_COPYLOAD]], i64 [[AGG_TMP_SROA_2_0_COPYLOAD]], ptr byval([[STRUCT_STRUCTBYVAL]]) align 8 [[AGG_TMP2]])
+; CALLS-NEXT:    ret void
+;
 entry:
   %agg.tmp2 = alloca %struct.StructByVal, align 8
   %agg.tmp.sroa.0.0.copyload = load i64, ptr %s, align 4
@@ -979,11 +4125,46 @@ entry:
 
 attributes #0 = { "target-features"="+fxsr,+x87,-sse" }
 
-; CHECK: call void @llvm.memcpy.p0.p0.i64{{.*}}@__msan_va_arg_tls {{.*}}, i64 48
 
 declare i32 @InnerTailCall(i32 %a)
 
 define void @MismatchedReturnTypeTailCall(i32 %a) sanitize_memory {
+; CHECK-LABEL: define void @MismatchedReturnTypeTailCall(
+; CHECK-SAME: i32 [[A:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 [[TMP1]], ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    [[B:%.*]] = tail call i32 @InnerTailCall(i32 [[A]])
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret void
+;
+; ORIGIN-LABEL: define void @MismatchedReturnTypeTailCall(
+; ORIGIN-SAME: i32 [[A:%.*]]) #[[ATTR6]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP2]], ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    [[B:%.*]] = tail call i32 @InnerTailCall(i32 [[A]])
+; ORIGIN-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; ORIGIN-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_retval_origin_tls, align 4
+; ORIGIN-NEXT:    ret void
+;
+; CALLS-LABEL: define void @MismatchedReturnTypeTailCall(
+; CALLS-SAME: i32 [[A:%.*]]) #[[ATTR6]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    store i32 [[TMP1]], ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP2]], ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    store i32 0, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    [[B:%.*]] = tail call i32 @InnerTailCall(i32 [[A]])
+; CALLS-NEXT:    [[_MSRET:%.*]] = load i32, ptr @__msan_retval_tls, align 8
+; CALLS-NEXT:    [[TMP3:%.*]] = load i32, ptr @__msan_retval_origin_tls, align 4
+; CALLS-NEXT:    ret void
+;
   %b = tail call i32 @InnerTailCall(i32 %a)
   ret void
 }
@@ -991,14 +4172,39 @@ define void @MismatchedReturnTypeTailCall(i32 %a) sanitize_memory {
 ; We used to strip off the 'tail' modifier, but now that we unpoison return slot
 ; shadow before the call, we don't need to anymore.
 
-; CHECK-LABEL: define void @MismatchedReturnTypeTailCall
-; CHECK: tail call i32 @InnerTailCall
-; CHECK: ret void
 
 
 declare i32 @MustTailCall(i32 %a)
 
 define i32 @CallMustTailCall(i32 %a) sanitize_memory {
+; CHECK-LABEL: define i32 @CallMustTailCall(
+; CHECK-SAME: i32 [[A:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 [[TMP1]], ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[B:%.*]] = musttail call i32 @MustTailCall(i32 [[A]])
+; CHECK-NEXT:    ret i32 [[B]]
+;
+; ORIGIN-LABEL: define i32 @CallMustTailCall(
+; ORIGIN-SAME: i32 [[A:%.*]]) #[[ATTR6]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP2]], ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[B:%.*]] = musttail call i32 @MustTailCall(i32 [[A]])
+; ORIGIN-NEXT:    ret i32 [[B]]
+;
+; CALLS-LABEL: define i32 @CallMustTailCall(
+; CALLS-SAME: i32 [[A:%.*]]) #[[ATTR6]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    store i32 [[TMP1]], ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP2]], ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[B:%.*]] = musttail call i32 @MustTailCall(i32 [[A]])
+; CALLS-NEXT:    ret i32 [[B]]
+;
   %b = musttail call i32 @MustTailCall(i32 %a)
   ret i32 %b
 }
@@ -1007,14 +4213,37 @@ define i32 @CallMustTailCall(i32 %a) sanitize_memory {
 ; call and the return instruction. And we don't need to, because everything is
 ; taken care of in the callee.
 
-; CHECK-LABEL: define i32 @CallMustTailCall
-; CHECK: musttail call i32 @MustTailCall
-; No instrumentation between call and ret.
-; CHECK-NEXT: ret i32
-
 declare ptr @MismatchingMustTailCall(i32 %a)
 
 define ptr @MismatchingCallMustTailCall(i32 %a) sanitize_memory {
+; CHECK-LABEL: define ptr @MismatchingCallMustTailCall(
+; CHECK-SAME: i32 [[A:%.*]]) #[[ATTR6]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    store i32 [[TMP1]], ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[B:%.*]] = musttail call ptr @MismatchingMustTailCall(i32 [[A]])
+; CHECK-NEXT:    ret ptr [[B]]
+;
+; ORIGIN-LABEL: define ptr @MismatchingCallMustTailCall(
+; ORIGIN-SAME: i32 [[A:%.*]]) #[[ATTR6]] {
+; ORIGIN-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    call void @llvm.donothing()
+; ORIGIN-NEXT:    store i32 [[TMP1]], ptr @__msan_param_tls, align 8
+; ORIGIN-NEXT:    store i32 [[TMP2]], ptr @__msan_param_origin_tls, align 4
+; ORIGIN-NEXT:    [[B:%.*]] = musttail call ptr @MismatchingMustTailCall(i32 [[A]])
+; ORIGIN-NEXT:    ret ptr [[B]]
+;
+; CALLS-LABEL: define ptr @MismatchingCallMustTailCall(
+; CALLS-SAME: i32 [[A:%.*]]) #[[ATTR6]] {
+; CALLS-NEXT:    [[TMP1:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    [[TMP2:%.*]] = load i32, ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    call void @llvm.donothing()
+; CALLS-NEXT:    store i32 [[TMP1]], ptr @__msan_param_tls, align 8
+; CALLS-NEXT:    store i32 [[TMP2]], ptr @__msan_param_origin_tls, align 4
+; CALLS-NEXT:    [[B:%.*]] = musttail call ptr @MismatchingMustTailCall(i32 [[A]])
+; CALLS-NEXT:    ret ptr [[B]]
+;
   %b = musttail call ptr @MismatchingMustTailCall(i32 %a)
   ret ptr %b
 }
@@ -1023,14 +4252,8 @@ define ptr @MismatchingCallMustTailCall(i32 %a) sanitize_memory {
 ; call and the return instruction. And we don't need to, because everything is
 ; taken care of in the callee.
 
-; CHECK-LABEL: define ptr @MismatchingCallMustTailCall
-; CHECK: musttail call ptr @MismatchingMustTailCall
-; No instrumentation between call and ret.
-; CHECK-NEXT: ret ptr
 
 
-; CHECK-LABEL: define internal void @msan.module_ctor() #[[#ATTR:]] {
-; CHECK: call void @__msan_init()
 
 ; CHECK-CALLS: declare void @__msan_maybe_warning_1(i8, i32)
 ; CHECK-CALLS: declare void @__msan_maybe_store_origin_1(i8, ptr, i32)
@@ -1040,5 +4263,8 @@ define ptr @MismatchingCallMustTailCall(i32 %a) sanitize_memory {
 ; CHECK-CALLS: declare void @__msan_maybe_store_origin_4(i32, ptr, i32)
 ; CHECK-CALLS: declare void @__msan_maybe_warning_8(i64, i32)
 ; CHECK-CALLS: declare void @__msan_maybe_store_origin_8(i64, ptr, i32)
-
-; CHECK:       attributes #[[#ATTR]] = { nounwind }
+;.
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
+; ORIGIN: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/pr32842.ll b/llvm/test/Instrumentation/MemorySanitizer/pr32842.ll
index 381ab1b3a4354f..17fe78d292bd04 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/pr32842.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/pr32842.ll
@@ -1,20 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; Regression test for https://bugs.llvm.org/show_bug.cgi?id=32842
 ;
 ; RUN: opt < %s -S -passes=msan 2>&1 | FileCheck %s
-;target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
+; Make sure the shadow of the (x < y) comparison isn't truncated to i1.
+
 define zeroext i1 @_Z1fii(i32 %x, i32 %y) sanitize_memory {
+; CHECK-LABEL: define zeroext i1 @_Z1fii(
+; CHECK-SAME: i32 [[X:%.*]], i32 [[Y:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr @__msan_param_tls, align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT:    call void @llvm.donothing()
+; CHECK-NEXT:    [[TMP28:%.*]] = shl i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[TMP28]], 1
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i32 [[TMP0]], [[TMP3]]
+; CHECK-NEXT:    [[TMP5:%.*]] = xor i32 [[TMP3]], -1
+; CHECK-NEXT:    [[TMP6:%.*]] = and i32 [[X]], [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = or i32 [[TMP6]], [[TMP4]]
+; CHECK-NEXT:    [[TMP8:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP8]], 1
+; CHECK-NEXT:    [[TMP10:%.*]] = xor i32 [[TMP1]], [[TMP9]]
+; CHECK-NEXT:    [[TMP11:%.*]] = xor i32 [[TMP10]], -1
+; CHECK-NEXT:    [[TMP12:%.*]] = and i32 [[Y]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = or i32 [[TMP12]], [[TMP9]]
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i32 [[TMP7]], [[TMP13]]
+; CHECK-NEXT:    [[TMP15:%.*]] = shl i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP16:%.*]] = lshr i32 [[TMP15]], 1
+; CHECK-NEXT:    [[TMP17:%.*]] = xor i32 [[TMP0]], [[TMP16]]
+; CHECK-NEXT:    [[TMP18:%.*]] = xor i32 [[TMP17]], -1
+; CHECK-NEXT:    [[TMP19:%.*]] = and i32 [[X]], [[TMP18]]
+; CHECK-NEXT:    [[TMP20:%.*]] = or i32 [[TMP19]], [[TMP16]]
+; CHECK-NEXT:    [[TMP21:%.*]] = shl i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP22:%.*]] = lshr i32 [[TMP21]], 1
+; CHECK-NEXT:    [[TMP23:%.*]] = xor i32 [[TMP1]], [[TMP22]]
+; CHECK-NEXT:    [[TMP24:%.*]] = xor i32 [[TMP22]], -1
+; CHECK-NEXT:    [[TMP25:%.*]] = and i32 [[Y]], [[TMP24]]
+; CHECK-NEXT:    [[TMP26:%.*]] = or i32 [[TMP25]], [[TMP23]]
+; CHECK-NEXT:    [[TMP27:%.*]] = icmp slt i32 [[TMP20]], [[TMP26]]
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i1 [[TMP14]], [[TMP27]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[X]], [[Y]]
+; CHECK-NEXT:    store i1 [[TMP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
 entry:
   %cmp = icmp slt i32 %x, %y
   ret i1 %cmp
 }
-
-; CHECK:      [[X:[^ ]+]] = load{{.*}}__msan_param_tls{{.*}}
-; CHECK:      [[Y:[^ ]+]] = load{{.*}}__msan_param_tls{{.*}}
-; CHECK:      [[OR:[^ ]+]] = or i32 [[X]], [[Y]]
-
-; Make sure the shadow of the (x < y) comparison isn't truncated to i1.
-; CHECK-NOT:  trunc i32 [[OR]] to i1
-; CHECK:      [[CMP:[^ ]+]] = icmp ne i32 [[OR]], 0
-; CHECK:      store i1 [[CMP]],{{.*}}__msan_retval_tls

>From aa4280d627d0f2b2d57bf1f9bf574d2f6ab55d38 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Mon, 21 Oct 2024 11:01:36 -0700
Subject: [PATCH 2/3] comment

Created using spr 1.3.4
---
 .../Transforms/Instrumentation/MemorySanitizer.cpp    | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 962d7cc5bca347..07fa9b729463e8 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2719,11 +2719,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
     auto GetMinMaxUnsigned = [&](Value *V, Value *S) {
       if (IsSigned) {
-        // Map from signed range to unsigned range. Relation A vs B should be
-        // preserved if checked with `getUnsignedPredicate()`.
-        // Calcualting Amin, Amax, Bmin, Bmax also will not be affected, as they
-        // are created by effectively adding/substructing from A or B a value,
-        // derived from shadow, which can't result in overflow.
+        // Sign-flip to map from signed range to unsigned range. Relation A vs B
+        // should be preserved, if checked with `getUnsignedPredicate()`.
+        // Relationship between Amin, Amax, Bmin, Bmax also will not be
+        // affected, as they are created by effectively adding/substructing from
+        // A or B a value, derived from shadow, which can't result in overflow,
+        // before or after sign flip.
         APInt MinVal =
             APInt::getSignedMinValue(V->getType()->getScalarSizeInBits());
         V = IRB.CreateXor(V, ConstantInt::get(V->getType(), MinVal));

>From afda5ceed69201fb24a46181462e2087aa768d55 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka at google.com>
Date: Mon, 21 Oct 2024 11:03:56 -0700
Subject: [PATCH 3/3] comment

Created using spr 1.3.4
---
 llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 07fa9b729463e8..391fb30d95e2ae 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2723,7 +2723,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         // should be preserved, if checked with `getUnsignedPredicate()`.
         // Relationship between Amin, Amax, Bmin, Bmax also will not be
         // affected, as they are created by effectively adding/substructing from
-        // A or B a value, derived from shadow, which can't result in overflow,
+        // A (or B) a value, derived from shadow, with no overflow, either
         // before or after sign flip.
         APInt MinVal =
             APInt::getSignedMinValue(V->getType()->getScalarSizeInBits());



More information about the llvm-commits mailing list