[llvm] [msan] Handle NEON vector load (PR #130457)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 13:42:11 PDT 2025
https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/130457
>From 0dbdc247434f9e05449cf1690ddd2437855ac08e Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Thu, 6 Mar 2025 02:21:17 +0000
Subject: [PATCH 1/5] [msan] Handle NEON vector load
This adds an explicit handler for:
- llvm.aarch64.neon.ld1x2, llvm.aarch64.neon.ld1x3, llvm.aarch64.neon.ld1x4
- llvm.aarch64.neon.ld2, llvm.aarch64.neon.ld3, llvm.aarch64.neon.ld4
- llvm.aarch64.neon.ld2lane, llvm.aarch64.neon.ld3lane, llvm.aarch64.neon.ld4lane
- llvm.aarch64.neon.ld2r, llvm.aarch64.neon.ld3r, llvm.aarch64.neon.ld4r
instead of relying on the default strict handler.
Updates the tests from https://github.com/llvm/llvm-project/pull/125267
---
.../Instrumentation/MemorySanitizer.cpp | 108 +-
.../MemorySanitizer/AArch64/arm64-ld1.ll | 2029 +++++++++--------
2 files changed, 1212 insertions(+), 925 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index a077c85ffc410..51e4f26d33d41 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4071,12 +4071,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
ShadowArgs.append(1, SrcShadowPtr);
ShadowArgs.append(1, Mask);
- CallInst *CI =
- IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
- // The intrinsic may require floating-point but shadows can be arbitrary
- // bit patterns, of which some would be interpreted as "invalid"
- // floating-point values (NaN etc.); we assume the intrinsic will happily
- // copy them.
+ CallInst *CI;
+ // The AVX masked load intrinsics do not have integer variants. We use the
+ // floating-point variants, and assume that the intrinsic will happily copy
+ // the shadows even if they are interpreted as "invalid" floating-point
+ // values (NaN etc.).
+ CI = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
if (!MS.TrackOrigins)
@@ -4242,6 +4242,82 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
+ // Handle Arm NEON vector load intrinsics (vld*).
+ //
+ // The WithLane instructions (ld[234]lane) are similar to:
+ // call {<4 x i32>, <4 x i32>, <4 x i32>}
+ // @llvm.aarch64.neon.ld3lane.v4i32.p0
+ // (<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 %lane, ptr
+ // %A)
+ //
+ // The non-WithLane instructions (ld[234], ld1x[234], ld[234]r) are similar
+ // to:
+ // call {<8 x i8>, <8 x i8>} @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
+ void handleNEONVectorLoad(IntrinsicInst &I, bool WithLane) {
+ unsigned int numArgs = I.arg_size();
+
+ // Return type is a struct of vectors of integers or floating-point
+ assert(I.getType()->isStructTy());
+ [[maybe_unused]] StructType *RetTy = cast<StructType>(I.getType());
+ assert(RetTy->getNumElements() > 0);
+ assert(isa<FixedVectorType>(RetTy->getElementType(0)));
+ assert(RetTy->getElementType(0)->isIntOrIntVectorTy() ||
+ RetTy->getElementType(0)->isFPOrFPVectorTy());
+ for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
+ assert(RetTy->getElementType(i) == RetTy->getElementType(0));
+
+ if (WithLane) {
+ // 2, 3 or 4 vectors, plus lane number, plus input pointer
+ assert(numArgs >= 4);
+ assert(numArgs <= 6);
+
+ // Return type is a struct of the input vectors
+ assert(RetTy->getNumElements() + 2 == numArgs);
+ for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
+ assert(I.getArgOperand(i)->getType() == RetTy->getElementType(0));
+ } else
+ assert(numArgs == 1);
+
+ IRBuilder<> IRB(&I);
+
+ SmallVector<Value *, 6> ShadowArgs;
+ if (WithLane) {
+ for (unsigned int i = 0; i < numArgs - 2; i++)
+ ShadowArgs.push_back(getShadow(I.getArgOperand(i)));
+
+ // Lane number, passed verbatim
+ Value *LaneNumber = I.getArgOperand(numArgs - 2);
+ ShadowArgs.push_back(LaneNumber);
+
+ // TODO: blend shadow of lane number into output shadow?
+ insertShadowCheck(LaneNumber, &I);
+ }
+
+ Value *Src = I.getArgOperand(numArgs - 1);
+ assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
+
+ const Align Alignment = Align(1);
+
+ Type *SrcShadowTy = getShadowTy(Src);
+ Value *SrcShadowPtr, *SrcOriginPtr;
+ std::tie(SrcShadowPtr, SrcOriginPtr) =
+ getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);
+ ShadowArgs.push_back(SrcShadowPtr);
+
+ CallInst *CI;
+ // The NEON vector load instructions handled by this function all have
+ // integer variants. It is easier to use those rather than trying to cast
+ // a struct of vectors of floats into a struct of vectors of integers.
+ CI = IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
+ setShadow(&I, CI);
+
+ if (!MS.TrackOrigins)
+ return;
+
+ Value *PtrSrcOrigin = IRB.CreateLoad(MS.OriginTy, SrcOriginPtr);
+ setOrigin(&I, PtrSrcOrigin);
+ }
+
/// Handle Arm NEON vector store intrinsics (vst{2,3,4}, vst1x_{2,3,4},
/// and vst{2,3,4}lane).
///
@@ -4946,6 +5022,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorReduceIntrinsic(I, /*AllowShadowCast=*/true);
break;
+ case Intrinsic::aarch64_neon_ld1x2:
+ case Intrinsic::aarch64_neon_ld1x3:
+ case Intrinsic::aarch64_neon_ld1x4:
+ case Intrinsic::aarch64_neon_ld2:
+ case Intrinsic::aarch64_neon_ld3:
+ case Intrinsic::aarch64_neon_ld4:
+ case Intrinsic::aarch64_neon_ld2r:
+ case Intrinsic::aarch64_neon_ld3r:
+ case Intrinsic::aarch64_neon_ld4r: {
+ handleNEONVectorLoad(I, /*WithLane=*/false);
+ break;
+ }
+
+ case Intrinsic::aarch64_neon_ld2lane:
+ case Intrinsic::aarch64_neon_ld3lane:
+ case Intrinsic::aarch64_neon_ld4lane: {
+ handleNEONVectorLoad(I, /*WithLane=*/true);
+ break;
+ }
+
// Saturating extract narrow
case Intrinsic::aarch64_neon_sqxtn:
case Intrinsic::aarch64_neon_sqxtun:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-ld1.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-ld1.ll
index 9bae334b2831f..99e9ab939847c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-ld1.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-ld1.ll
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
; RUN: opt < %s -passes=msan -S | FileCheck %s
+;
; Forked from llvm/test/CodeGen/AArch64/arm64-ld1.ll
-; Incorrectly handled (handleUnknownInstruction):
+;
+; Explicitly handled (handleNEONVectorLoad):
; - llvm.aarch64.neon.ld1x2, llvm.aarch64.neon.ld1x3, llvm.aarch64.neon.ld1x4
; - llvm.aarch64.neon.ld2, llvm.aarch64.neon.ld3, llvm.aarch64.neon.ld4
; - llvm.aarch64.neon.ld2lane, llvm.aarch64.neon.ld3lane, llvm.aarch64.neon.ld4lane
@@ -19,20 +21,21 @@ define %struct.__neon_int8x8x2_t @ld2_8b(ptr %A) nounwind #0 {
; and from the argument of the function also defined by ABI (i.e., x0)
; CHECK-LABEL: define %struct.__neon_int8x8x2_t @ld2_8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X2_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP13]], <8 x i8> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X2_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
-; CHECK-NEXT: store { <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A)
@@ -43,22 +46,25 @@ define %struct.__neon_int8x8x3_t @ld3_8b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x8x3_t @ld3_8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP15]], <8 x i8> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP12]], <8 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T]] [[TMP8]], <8 x i8> [[TMP9]], 2
-; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A)
@@ -69,24 +75,29 @@ define %struct.__neon_int8x8x4_t @ld4_8b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x8x4_t @ld4_8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP17]], <8 x i8> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP21]], <8 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP8]], <8 x i8> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP16]], <8 x i8> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP10]], <8 x i8> [[TMP11]], 3
-; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A)
@@ -105,20 +116,21 @@ define %struct.__neon_int8x16x2_t @ld2_16b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x16x2_t @ld2_16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X2_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i8>, <16 x i8> } [[TMP13]], <16 x i8> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X2_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
-; CHECK-NEXT: store { <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A)
@@ -129,22 +141,25 @@ define %struct.__neon_int8x16x3_t @ld3_16b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x16x3_t @ld3_16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP15]], <16 x i8> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP12]], <16 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T]] [[TMP8]], <16 x i8> [[TMP9]], 2
-; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A)
@@ -155,24 +170,29 @@ define %struct.__neon_int8x16x4_t @ld4_16b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x16x4_t @ld4_16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP17]], <16 x i8> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP21]], <16 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP8]], <16 x i8> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP16]], <16 x i8> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP10]], <16 x i8> [[TMP11]], 3
-; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0(ptr %A)
@@ -191,20 +211,21 @@ define %struct.__neon_int16x4x2_t @ld2_4h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x4x2_t @ld2_4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <4 x i16>, <4 x i16> } { <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1) }, <4 x i16> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X2_T:%.*]] poison, <4 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP13]], <4 x i16> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X2_T]] [[TMP6]], <4 x i16> [[TMP7]], 1
-; CHECK-NEXT: store { <4 x i16>, <4 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i16>, <4 x i16> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X4X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0(ptr %A)
@@ -215,22 +236,25 @@ define %struct.__neon_int16x4x3_t @ld3_4h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x4x3_t @ld3_4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } { <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1) }, <4 x i16> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X3_T:%.*]] poison, <4 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP15]], <4 x i16> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X3_T]] [[TMP6]], <4 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP12]], <4 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X3_T]] [[TMP8]], <4 x i16> [[TMP9]], 2
-; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X4X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0(ptr %A)
@@ -241,24 +265,29 @@ define %struct.__neon_int16x4x4_t @ld4_4h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x4x4_t @ld4_4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } { <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1) }, <4 x i16> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T:%.*]] poison, <4 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP17]], <4 x i16> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T]] [[TMP6]], <4 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP21]], <4 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T]] [[TMP8]], <4 x i16> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP16]], <4 x i16> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T]] [[TMP10]], <4 x i16> [[TMP11]], 3
-; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X4X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0(ptr %A)
@@ -277,20 +306,21 @@ define %struct.__neon_int16x8x2_t @ld2_8h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x8x2_t @ld2_8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X2_T:%.*]] poison, <8 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i16>, <8 x i16> } [[TMP13]], <8 x i16> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X2_T]] [[TMP6]], <8 x i16> [[TMP7]], 1
-; CHECK-NEXT: store { <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0(ptr %A)
@@ -301,22 +331,25 @@ define %struct.__neon_int16x8x3_t @ld3_8h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x8x3_t @ld3_8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T:%.*]] poison, <8 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP15]], <8 x i16> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T]] [[TMP6]], <8 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP12]], <8 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T]] [[TMP8]], <8 x i16> [[TMP9]], 2
-; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0(ptr %A)
@@ -327,24 +360,29 @@ define %struct.__neon_int16x8x4_t @ld4_8h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x8x4_t @ld4_8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T:%.*]] poison, <8 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP17]], <8 x i16> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP6]], <8 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP21]], <8 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP8]], <8 x i16> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP16]], <8 x i16> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP10]], <8 x i16> [[TMP11]], 3
-; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0(ptr %A)
@@ -363,20 +401,21 @@ define %struct.__neon_int32x2x2_t @ld2_2s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x2x2_t @ld2_2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X2_T:%.*]] poison, <2 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP13]], <2 x i32> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X2_T]] [[TMP6]], <2 x i32> [[TMP7]], 1
-; CHECK-NEXT: store { <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X2X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0(ptr %A)
@@ -387,22 +426,25 @@ define %struct.__neon_int32x2x3_t @ld3_2s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x2x3_t @ld3_2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X3_T:%.*]] poison, <2 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP15]], <2 x i32> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X3_T]] [[TMP6]], <2 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP12]], <2 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X3_T]] [[TMP8]], <2 x i32> [[TMP9]], 2
-; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X2X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0(ptr %A)
@@ -413,24 +455,29 @@ define %struct.__neon_int32x2x4_t @ld4_2s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x2x4_t @ld4_2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T:%.*]] poison, <2 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP17]], <2 x i32> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T]] [[TMP6]], <2 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP21]], <2 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T]] [[TMP8]], <2 x i32> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP16]], <2 x i32> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T]] [[TMP10]], <2 x i32> [[TMP11]], 3
-; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X2X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0(ptr %A)
@@ -449,20 +496,21 @@ define %struct.__neon_int32x4x2_t @ld2_4s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x4x2_t @ld2_4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X2_T:%.*]] poison, <4 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP13]], <4 x i32> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X2_T]] [[TMP6]], <4 x i32> [[TMP7]], 1
-; CHECK-NEXT: store { <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0(ptr %A)
@@ -473,22 +521,25 @@ define %struct.__neon_int32x4x3_t @ld3_4s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x4x3_t @ld3_4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T:%.*]] poison, <4 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP15]], <4 x i32> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T]] [[TMP6]], <4 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP12]], <4 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T]] [[TMP8]], <4 x i32> [[TMP9]], 2
-; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0(ptr %A)
@@ -499,24 +550,29 @@ define %struct.__neon_int32x4x4_t @ld4_4s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x4x4_t @ld4_4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T:%.*]] poison, <4 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP17]], <4 x i32> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP6]], <4 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP21]], <4 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP8]], <4 x i32> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP16]], <4 x i32> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP10]], <4 x i32> [[TMP11]], 3
-; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0(ptr %A)
@@ -535,20 +591,21 @@ define %struct.__neon_int64x2x2_t @ld2_2d(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x2x2_t @ld2_2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X2_T:%.*]] poison, <2 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i64>, <2 x i64> } [[TMP13]], <2 x i64> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X2_T]] [[TMP6]], <2 x i64> [[TMP7]], 1
-; CHECK-NEXT: store { <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0(ptr %A)
@@ -559,22 +616,25 @@ define %struct.__neon_int64x2x3_t @ld3_2d(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x2x3_t @ld3_2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T:%.*]] poison, <2 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP15]], <2 x i64> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T]] [[TMP6]], <2 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP12]], <2 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T]] [[TMP8]], <2 x i64> [[TMP9]], 2
-; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0(ptr %A)
@@ -585,24 +645,29 @@ define %struct.__neon_int64x2x4_t @ld4_2d(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x2x4_t @ld4_2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T:%.*]] poison, <2 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP17]], <2 x i64> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP6]], <2 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], <2 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP8]], <2 x i64> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP16]], <2 x i64> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP10]], <2 x i64> [[TMP11]], 3
-; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0(ptr %A)
@@ -622,20 +687,21 @@ define %struct.__neon_int64x1x2_t @ld2_1di64(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x1x2_t @ld2_1di64(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X2_T:%.*]] poison, <1 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <1 x i64>, <1 x i64> } [[TMP13]], <1 x i64> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X2_T]] [[TMP6]], <1 x i64> [[TMP7]], 1
-; CHECK-NEXT: store { <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X1X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0(ptr %A)
@@ -646,22 +712,25 @@ define %struct.__neon_int64x1x3_t @ld3_1di64(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x1x3_t @ld3_1di64(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X3_T:%.*]] poison, <1 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP15]], <1 x i64> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X3_T]] [[TMP6]], <1 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP12]], <1 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X3_T]] [[TMP8]], <1 x i64> [[TMP9]], 2
-; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X1X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0(ptr %A)
@@ -672,24 +741,29 @@ define %struct.__neon_int64x1x4_t @ld4_1di64(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x1x4_t @ld4_1di64(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T:%.*]] poison, <1 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP17]], <1 x i64> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T]] [[TMP6]], <1 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP21]], <1 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T]] [[TMP8]], <1 x i64> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP16]], <1 x i64> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T]] [[TMP10]], <1 x i64> [[TMP11]], 3
-; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X1X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0(ptr %A)
@@ -710,20 +784,21 @@ define %struct.__neon_float64x1x2_t @ld2_1df64(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_float64x1x2_t @ld2_1df64(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x double>, <1 x double> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X2_T:%.*]] poison, <1 x double> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x double>, <1 x double> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <1 x i64>, <1 x i64> } [[TMP13]], <1 x i64> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X2_T]] [[TMP6]], <1 x double> [[TMP7]], 1
-; CHECK-NEXT: store { <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT64X1X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0(ptr %A)
@@ -734,22 +809,25 @@ define %struct.__neon_float64x1x3_t @ld3_1df64(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_float64x1x3_t @ld3_1df64(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X3_T:%.*]] poison, <1 x double> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP15]], <1 x i64> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X3_T]] [[TMP6]], <1 x double> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP12]], <1 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X3_T]] [[TMP8]], <1 x double> [[TMP9]], 2
-; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT64X1X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0(ptr %A)
@@ -760,24 +838,29 @@ define %struct.__neon_float64x1x4_t @ld4_1df64(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_float64x1x4_t @ld4_1df64(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X4_T:%.*]] poison, <1 x double> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP17]], <1 x i64> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X4_T]] [[TMP6]], <1 x double> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP21]], <1 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X4_T]] [[TMP8]], <1 x double> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP16]], <1 x i64> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X4_T]] [[TMP10]], <1 x double> [[TMP11]], 3
-; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT64X1X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr %A)
@@ -795,26 +878,21 @@ define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr
; CHECK-SAME: <16 x i8> [[L1:%.*]], <16 x i8> [[L2:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CHECK-NEXT: [[TMP6:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i64 1, ptr [[TMP5]])
; CHECK-NEXT: [[TMP8:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[L1]], <16 x i8> [[L2]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP6]], 0
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP8]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP13]], 0
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X2_T:%.*]] poison, <16 x i8> [[TMP9]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP6]], 1
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP8]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { <16 x i8>, <16 x i8> } [[TMP15]], <16 x i8> [[TMP16]], 1
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X2_T]] [[TMP10]], <16 x i8> [[TMP11]], 1
-; CHECK-NEXT: store { <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8> } [[TMP14]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X2_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A)
@@ -828,31 +906,25 @@ define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[TMP7:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[TMP10:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[L1]], <16 x i8> [[L2]], <16 x i8> [[L3]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP7]], 0
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP10]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP9]], 0
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T:%.*]] poison, <16 x i8> [[TMP11]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP7]], 1
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP10]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP18]], <16 x i8> [[TMP20]], 1
; CHECK-NEXT: [[TMP14:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T]] [[TMP12]], <16 x i8> [[TMP13]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP7]], 2
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP10]], 2
+; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP21]], <16 x i8> [[TMP17]], 2
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T]] [[TMP14]], <16 x i8> [[TMP15]], 2
-; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP19]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X3_T]] [[TMP16]]
;
%tmpvar2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A)
@@ -867,36 +939,29 @@ define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[TMP8:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i64 1, ptr [[TMP7]])
; CHECK-NEXT: [[TMP12:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[L1]], <16 x i8> [[L2]], <16 x i8> [[L3]], <16 x i8> [[L4]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP8]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP12]], 0
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP10]], 0
; CHECK-NEXT: [[TMP14:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T:%.*]] poison, <16 x i8> [[TMP13]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP8]], 1
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP12]], 1
+; CHECK-NEXT: [[TMP25:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP21]], <16 x i8> [[TMP23]], 1
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP14]], <16 x i8> [[TMP15]], 1
+; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP8]], 2
; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP12]], 2
+; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP25]], <16 x i8> [[TMP26]], 2
; CHECK-NEXT: [[TMP18:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP16]], <16 x i8> [[TMP17]], 2
+; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP8]], 3
; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP12]], 3
+; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP27]], <16 x i8> [[TMP22]], 3
; CHECK-NEXT: [[TMP20:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP18]], <16 x i8> [[TMP19]], 3
-; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP24]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X4_T]] [[TMP20]]
;
%tmpvar2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A)
@@ -913,26 +978,21 @@ define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr
; CHECK-SAME: <8 x i16> [[L1:%.*]], <8 x i16> [[L2:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CHECK-NEXT: [[TMP6:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i64 1, ptr [[TMP5]])
; CHECK-NEXT: [[TMP8:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> [[L1]], <8 x i16> [[L2]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP6]], 0
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP8]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP13]], 0
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X2_T:%.*]] poison, <8 x i16> [[TMP9]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP6]], 1
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP8]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { <8 x i16>, <8 x i16> } [[TMP15]], <8 x i16> [[TMP16]], 1
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X2_T]] [[TMP10]], <8 x i16> [[TMP11]], 1
-; CHECK-NEXT: store { <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16> } [[TMP14]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X2_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A)
@@ -946,31 +1006,25 @@ define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[TMP7:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[TMP10:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> [[L1]], <8 x i16> [[L2]], <8 x i16> [[L3]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP7]], 0
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP10]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP9]], 0
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T:%.*]] poison, <8 x i16> [[TMP11]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP7]], 1
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP10]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP18]], <8 x i16> [[TMP20]], 1
; CHECK-NEXT: [[TMP14:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T]] [[TMP12]], <8 x i16> [[TMP13]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP7]], 2
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP10]], 2
+; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP21]], <8 x i16> [[TMP17]], 2
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T]] [[TMP14]], <8 x i16> [[TMP15]], 2
-; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP19]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X3_T]] [[TMP16]]
;
%tmpvar2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A)
@@ -985,36 +1039,29 @@ define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[TMP8:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i16> [[TMP4]], i64 1, ptr [[TMP7]])
; CHECK-NEXT: [[TMP12:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> [[L1]], <8 x i16> [[L2]], <8 x i16> [[L3]], <8 x i16> [[L4]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP8]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP12]], 0
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP10]], 0
; CHECK-NEXT: [[TMP14:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T:%.*]] poison, <8 x i16> [[TMP13]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP8]], 1
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP12]], 1
+; CHECK-NEXT: [[TMP25:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP21]], <8 x i16> [[TMP23]], 1
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP14]], <8 x i16> [[TMP15]], 1
+; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP8]], 2
; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP12]], 2
+; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP25]], <8 x i16> [[TMP26]], 2
; CHECK-NEXT: [[TMP18:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP16]], <8 x i16> [[TMP17]], 2
+; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP8]], 3
; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP12]], 3
+; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP27]], <8 x i16> [[TMP22]], 3
; CHECK-NEXT: [[TMP20:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP18]], <8 x i16> [[TMP19]], 3
-; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP24]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X4_T]] [[TMP20]]
;
%tmpvar2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A)
@@ -1031,26 +1078,21 @@ define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr
; CHECK-SAME: <4 x i32> [[L1:%.*]], <4 x i32> [[L2:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CHECK-NEXT: [[TMP6:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i64 1, ptr [[TMP5]])
; CHECK-NEXT: [[TMP8:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> [[L1]], <4 x i32> [[L2]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP6]], 0
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP8]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP13]], 0
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X2_T:%.*]] poison, <4 x i32> [[TMP9]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP6]], 1
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP8]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP15]], <4 x i32> [[TMP16]], 1
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X2_T]] [[TMP10]], <4 x i32> [[TMP11]], 1
-; CHECK-NEXT: store { <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32> } [[TMP14]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X2_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A)
@@ -1064,31 +1106,25 @@ define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[TMP7:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[TMP10:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> [[L1]], <4 x i32> [[L2]], <4 x i32> [[L3]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP7]], 0
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP10]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP9]], 0
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T:%.*]] poison, <4 x i32> [[TMP11]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP7]], 1
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP10]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP18]], <4 x i32> [[TMP20]], 1
; CHECK-NEXT: [[TMP14:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T]] [[TMP12]], <4 x i32> [[TMP13]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP7]], 2
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP10]], 2
+; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP21]], <4 x i32> [[TMP17]], 2
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T]] [[TMP14]], <4 x i32> [[TMP15]], 2
-; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP19]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X3_T]] [[TMP16]]
;
%tmpvar2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A)
@@ -1103,36 +1139,29 @@ define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[TMP8:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> [[TMP4]], i64 1, ptr [[TMP7]])
; CHECK-NEXT: [[TMP12:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> [[L1]], <4 x i32> [[L2]], <4 x i32> [[L3]], <4 x i32> [[L4]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP8]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP12]], 0
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP10]], 0
; CHECK-NEXT: [[TMP14:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T:%.*]] poison, <4 x i32> [[TMP13]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP8]], 1
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP12]], 1
+; CHECK-NEXT: [[TMP25:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP21]], <4 x i32> [[TMP23]], 1
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP14]], <4 x i32> [[TMP15]], 1
+; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP8]], 2
; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP12]], 2
+; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP25]], <4 x i32> [[TMP26]], 2
; CHECK-NEXT: [[TMP18:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP16]], <4 x i32> [[TMP17]], 2
+; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP8]], 3
; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP12]], 3
+; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP27]], <4 x i32> [[TMP22]], 3
; CHECK-NEXT: [[TMP20:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP18]], <4 x i32> [[TMP19]], 3
-; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP24]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X4_T]] [[TMP20]]
;
%tmpvar2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A)
@@ -1149,26 +1178,21 @@ define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr
; CHECK-SAME: <2 x i64> [[L1:%.*]], <2 x i64> [[L2:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = xor i64 [[TMP3]], 193514046488576
+; CHECK-NEXT: [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to ptr
+; CHECK-NEXT: [[TMP6:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i64 1, ptr [[TMP5]])
; CHECK-NEXT: [[TMP8:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> [[L1]], <2 x i64> [[L2]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP6]], 0
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP8]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP13]], 0
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X2_T:%.*]] poison, <2 x i64> [[TMP9]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP6]], 1
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP8]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = insertvalue { <2 x i64>, <2 x i64> } [[TMP15]], <2 x i64> [[TMP16]], 1
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X2_T]] [[TMP10]], <2 x i64> [[TMP11]], 1
-; CHECK-NEXT: store { <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64> } [[TMP14]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X2_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A)
@@ -1182,31 +1206,25 @@ define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[TMP7:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], i64 1, ptr [[TMP6]])
; CHECK-NEXT: [[TMP10:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> [[L1]], <2 x i64> [[L2]], <2 x i64> [[L3]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP7]], 0
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP10]], 0
+; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP9]], 0
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T:%.*]] poison, <2 x i64> [[TMP11]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP7]], 1
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP10]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP18]], <2 x i64> [[TMP20]], 1
; CHECK-NEXT: [[TMP14:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T]] [[TMP12]], <2 x i64> [[TMP13]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP7]], 2
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP10]], 2
+; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], <2 x i64> [[TMP17]], 2
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T]] [[TMP14]], <2 x i64> [[TMP15]], 2
-; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP19]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X3_T]] [[TMP16]]
;
%tmpvar2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A)
@@ -1221,36 +1239,29 @@ define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x
; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP4]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[TMP8:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i64> [[TMP4]], i64 1, ptr [[TMP7]])
; CHECK-NEXT: [[TMP12:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> [[L1]], <2 x i64> [[L2]], <2 x i64> [[L3]], <2 x i64> [[L4]], i64 1, ptr [[A]])
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 0
; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP12]], 0
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP10]], 0
; CHECK-NEXT: [[TMP14:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T:%.*]] poison, <2 x i64> [[TMP13]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 1
; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP12]], 1
+; CHECK-NEXT: [[TMP25:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], <2 x i64> [[TMP23]], 1
; CHECK-NEXT: [[TMP16:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP14]], <2 x i64> [[TMP15]], 1
+; CHECK-NEXT: [[TMP26:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 2
; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP12]], 2
+; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP25]], <2 x i64> [[TMP26]], 2
; CHECK-NEXT: [[TMP18:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP16]], <2 x i64> [[TMP17]], 2
+; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP8]], 3
; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP12]], 3
+; CHECK-NEXT: [[TMP24:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP27]], <2 x i64> [[TMP22]], 3
; CHECK-NEXT: [[TMP20:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP18]], <2 x i64> [[TMP19]], 3
-; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP24]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X4_T]] [[TMP20]]
;
%tmpvar2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A)
@@ -1268,9 +1279,9 @@ define <8 x i8> @ld1r_8b(ptr %bar) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7:[0-9]+]]
; CHECK-NEXT: unreachable
; CHECK: 3:
; CHECK-NEXT: [[TMPVAR1:%.*]] = load i8, ptr [[BAR]], align 1
@@ -1565,20 +1576,21 @@ define %struct.__neon_int8x8x2_t @ld2r_8b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x8x2_t @ld2r_8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X2_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP13]], <8 x i8> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X2_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
-; CHECK-NEXT: store { <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %A)
@@ -1589,22 +1601,25 @@ define %struct.__neon_int8x8x3_t @ld3r_8b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x8x3_t @ld3r_8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP15]], <8 x i8> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP12]], <8 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T]] [[TMP8]], <8 x i8> [[TMP9]], 2
-; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %A)
@@ -1615,24 +1630,29 @@ define %struct.__neon_int8x8x4_t @ld4r_8b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x8x4_t @ld4r_8b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP17]], <8 x i8> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP21]], <8 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP8]], <8 x i8> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP16]], <8 x i8> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP10]], <8 x i8> [[TMP11]], 3
-; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %A)
@@ -1647,20 +1667,21 @@ define %struct.__neon_int8x16x2_t @ld2r_16b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x16x2_t @ld2r_16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X2_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i8>, <16 x i8> } [[TMP13]], <16 x i8> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X2_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
-; CHECK-NEXT: store { <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %A)
@@ -1671,22 +1692,25 @@ define %struct.__neon_int8x16x3_t @ld3r_16b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x16x3_t @ld3r_16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP15]], <16 x i8> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP12]], <16 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T]] [[TMP8]], <16 x i8> [[TMP9]], 2
-; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %A)
@@ -1697,24 +1721,29 @@ define %struct.__neon_int8x16x4_t @ld4r_16b(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int8x16x4_t @ld4r_16b(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP17]], <16 x i8> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP21]], <16 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP8]], <16 x i8> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP16]], <16 x i8> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP10]], <16 x i8> [[TMP11]], 3
-; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %A)
@@ -1729,20 +1758,21 @@ define %struct.__neon_int16x4x2_t @ld2r_4h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x4x2_t @ld2r_4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <4 x i16>, <4 x i16> } { <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1) }, <4 x i16> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X2_T:%.*]] poison, <4 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP13]], <4 x i16> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X2_T]] [[TMP6]], <4 x i16> [[TMP7]], 1
-; CHECK-NEXT: store { <4 x i16>, <4 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i16>, <4 x i16> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X4X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %A)
@@ -1753,22 +1783,25 @@ define %struct.__neon_int16x4x3_t @ld3r_4h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x4x3_t @ld3r_4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } { <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1) }, <4 x i16> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X3_T:%.*]] poison, <4 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP15]], <4 x i16> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X3_T]] [[TMP6]], <4 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP12]], <4 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X3_T]] [[TMP8]], <4 x i16> [[TMP9]], 2
-; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X4X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %A)
@@ -1779,24 +1812,29 @@ define %struct.__neon_int16x4x4_t @ld4r_4h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x4x4_t @ld4r_4h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } { <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1) }, <4 x i16> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T:%.*]] poison, <4 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP17]], <4 x i16> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T]] [[TMP6]], <4 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP21]], <4 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T]] [[TMP8]], <4 x i16> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP16]], <4 x i16> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T]] [[TMP10]], <4 x i16> [[TMP11]], 3
-; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X4X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %A)
@@ -1811,20 +1849,21 @@ define %struct.__neon_int16x8x2_t @ld2r_8h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x8x2_t @ld2r_8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X2_T:%.*]] poison, <8 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i16>, <8 x i16> } [[TMP13]], <8 x i16> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X2_T]] [[TMP6]], <8 x i16> [[TMP7]], 1
-; CHECK-NEXT: store { <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %A)
@@ -1835,22 +1874,25 @@ define %struct.__neon_int16x8x3_t @ld3r_8h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x8x3_t @ld3r_8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T:%.*]] poison, <8 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP15]], <8 x i16> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T]] [[TMP6]], <8 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP12]], <8 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T]] [[TMP8]], <8 x i16> [[TMP9]], 2
-; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %A)
@@ -1861,24 +1903,29 @@ define %struct.__neon_int16x8x4_t @ld4r_8h(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int16x8x4_t @ld4r_8h(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T:%.*]] poison, <8 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP17]], <8 x i16> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP6]], <8 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP21]], <8 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP8]], <8 x i16> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP16]], <8 x i16> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP10]], <8 x i16> [[TMP11]], 3
-; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %A)
@@ -1893,20 +1940,21 @@ define %struct.__neon_int32x2x2_t @ld2r_2s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x2x2_t @ld2r_2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X2_T:%.*]] poison, <2 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP13]], <2 x i32> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X2_T]] [[TMP6]], <2 x i32> [[TMP7]], 1
-; CHECK-NEXT: store { <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X2X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %A)
@@ -1917,22 +1965,25 @@ define %struct.__neon_int32x2x3_t @ld3r_2s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x2x3_t @ld3r_2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X3_T:%.*]] poison, <2 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP15]], <2 x i32> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X3_T]] [[TMP6]], <2 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP12]], <2 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X3_T]] [[TMP8]], <2 x i32> [[TMP9]], 2
-; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X2X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %A)
@@ -1943,24 +1994,29 @@ define %struct.__neon_int32x2x4_t @ld4r_2s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x2x4_t @ld4r_2s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T:%.*]] poison, <2 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP17]], <2 x i32> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T]] [[TMP6]], <2 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP21]], <2 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T]] [[TMP8]], <2 x i32> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP16]], <2 x i32> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T]] [[TMP10]], <2 x i32> [[TMP11]], 3
-; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X2X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %A)
@@ -1975,20 +2031,21 @@ define %struct.__neon_int32x4x2_t @ld2r_4s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x4x2_t @ld2r_4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X2_T:%.*]] poison, <4 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP13]], <4 x i32> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X2_T]] [[TMP6]], <4 x i32> [[TMP7]], 1
-; CHECK-NEXT: store { <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %A)
@@ -1999,22 +2056,25 @@ define %struct.__neon_int32x4x3_t @ld3r_4s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x4x3_t @ld3r_4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T:%.*]] poison, <4 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP15]], <4 x i32> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T]] [[TMP6]], <4 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP12]], <4 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T]] [[TMP8]], <4 x i32> [[TMP9]], 2
-; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %A)
@@ -2025,24 +2085,29 @@ define %struct.__neon_int32x4x4_t @ld4r_4s(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int32x4x4_t @ld4r_4s(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T:%.*]] poison, <4 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP17]], <4 x i32> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP6]], <4 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP21]], <4 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP8]], <4 x i32> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP16]], <4 x i32> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP10]], <4 x i32> [[TMP11]], 3
-; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %A)
@@ -2057,20 +2122,21 @@ define %struct.__neon_int64x1x2_t @ld2r_1d(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x1x2_t @ld2r_1d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X2_T:%.*]] poison, <1 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <1 x i64>, <1 x i64> } [[TMP13]], <1 x i64> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X2_T]] [[TMP6]], <1 x i64> [[TMP7]], 1
-; CHECK-NEXT: store { <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X1X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %A)
@@ -2081,22 +2147,25 @@ define %struct.__neon_int64x1x3_t @ld3r_1d(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x1x3_t @ld3r_1d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X3_T:%.*]] poison, <1 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP15]], <1 x i64> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X3_T]] [[TMP6]], <1 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP12]], <1 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X3_T]] [[TMP8]], <1 x i64> [[TMP9]], 2
-; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X1X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %A)
@@ -2107,24 +2176,29 @@ define %struct.__neon_int64x1x4_t @ld4r_1d(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x1x4_t @ld4r_1d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T:%.*]] poison, <1 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP17]], <1 x i64> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T]] [[TMP6]], <1 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP21]], <1 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T]] [[TMP8]], <1 x i64> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP16]], <1 x i64> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T]] [[TMP10]], <1 x i64> [[TMP11]], 3
-; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X1X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %A)
@@ -2139,20 +2213,21 @@ define %struct.__neon_int64x2x2_t @ld2r_2d(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x2x2_t @ld2r_2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X2_T:%.*]] poison, <2 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i64>, <2 x i64> } [[TMP13]], <2 x i64> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X2_T]] [[TMP6]], <2 x i64> [[TMP7]], 1
-; CHECK-NEXT: store { <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X2_T]] [[TMP8]]
;
%tmpvar2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %A)
@@ -2163,22 +2238,25 @@ define %struct.__neon_int64x2x3_t @ld3r_2d(ptr %A) nounwind #0 {
; Make sure we are using the operands defined by the ABI
; CHECK-LABEL: define %struct.__neon_int64x2x3_t @ld3r_2d(
; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T:%.*]] poison, <2 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP15]], <2 x i64> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T]] [[TMP6]], <2 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP12]], <2 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T]] [[TMP8]], <2 x i64> [[TMP9]], 2
-; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X3_T]] [[TMP10]]
;
%tmpvar2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %A)
@@ -2186,27 +2264,32 @@ define %struct.__neon_int64x2x3_t @ld3r_2d(ptr %A) nounwind #0 {
}
define %struct.__neon_int64x2x4_t @ld4r_2d(ptr %A) nounwind #0 {
-; Make sure we are using the operands defined by the ABI
-; CHECK-LABEL: define %struct.__neon_int64x2x4_t @ld4r_2d(
-; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; Make sure we are using the operands defined by the ABI
+; CHECK-LABEL: define %struct.__neon_int64x2x4_t @ld4r_2d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0(ptr [[A]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T:%.*]] poison, <2 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP17]], <2 x i64> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP6]], <2 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], <2 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP8]], <2 x i64> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP16]], <2 x i64> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP10]], <2 x i64> [[TMP11]], 3
-; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X4_T]] [[TMP12]]
;
%tmpvar2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %A)
@@ -2883,20 +2966,21 @@ declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr) noun
define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X2_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i8>, <8 x i8> } [[TMP13]], <8 x i8> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X2_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
-; CHECK-NEXT: store { <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X2_T]] [[TMP8]]
;
%val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr %addr)
@@ -2906,20 +2990,21 @@ define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(ptr %addr) #0 {
define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <4 x i16>, <4 x i16> } { <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1) }, <4 x i16> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X2_T:%.*]] poison, <4 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i16>, <4 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i16>, <4 x i16> } [[TMP13]], <4 x i16> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X2_T]] [[TMP6]], <4 x i16> [[TMP7]], 1
-; CHECK-NEXT: store { <4 x i16>, <4 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i16>, <4 x i16> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X4X2_T]] [[TMP8]]
;
%val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr %addr)
@@ -2929,20 +3014,21 @@ define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(ptr %addr) #0 {
define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X2_T:%.*]] poison, <2 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP13]], <2 x i32> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X2_T]] [[TMP6]], <2 x i32> [[TMP7]], 1
-; CHECK-NEXT: store { <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X2X2_T]] [[TMP8]]
;
%val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr %addr)
@@ -2952,20 +3038,21 @@ define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(ptr %addr) #0 {
define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X2X2_T:%.*]] poison, <2 x float> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i32>, <2 x i32> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x float>, <2 x float> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i32>, <2 x i32> } [[TMP13]], <2 x i32> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X2X2_T]] [[TMP6]], <2 x float> [[TMP7]], 1
-; CHECK-NEXT: store { <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT32X2X2_T]] [[TMP8]]
;
%val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr %addr)
@@ -2975,20 +3062,21 @@ define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(ptr %addr) #0 {
define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X2_T:%.*]] poison, <1 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <1 x i64>, <1 x i64> } [[TMP13]], <1 x i64> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X2_T]] [[TMP6]], <1 x i64> [[TMP7]], 1
-; CHECK-NEXT: store { <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X1X2_T]] [[TMP8]]
;
%val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %addr)
@@ -2998,20 +3086,21 @@ define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(ptr %addr) #0 {
define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x double>, <1 x double> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X2_T:%.*]] poison, <1 x double> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <1 x i64>, <1 x i64> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x double>, <1 x double> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <1 x i64>, <1 x i64> } [[TMP13]], <1 x i64> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X2_T]] [[TMP6]], <1 x double> [[TMP7]], 1
-; CHECK-NEXT: store { <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT64X1X2_T]] [[TMP8]]
;
%val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %addr)
@@ -3037,20 +3126,21 @@ declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr) noun
define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X2_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i8>, <16 x i8> } [[TMP13]], <16 x i8> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X2_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
-; CHECK-NEXT: store { <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X2_T]] [[TMP8]]
;
%val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr %addr)
@@ -3060,20 +3150,21 @@ define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(ptr %addr) #0 {
define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X2_T:%.*]] poison, <8 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i16>, <8 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i16>, <8 x i16> } [[TMP13]], <8 x i16> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X2_T]] [[TMP6]], <8 x i16> [[TMP7]], 1
-; CHECK-NEXT: store { <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X2_T]] [[TMP8]]
;
%val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr %addr)
@@ -3083,20 +3174,21 @@ define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(ptr %addr) #0 {
define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X2_T:%.*]] poison, <4 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP13]], <4 x i32> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X2_T]] [[TMP6]], <4 x i32> [[TMP7]], 1
-; CHECK-NEXT: store { <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X2_T]] [[TMP8]]
;
%val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr %addr)
@@ -3106,20 +3198,21 @@ define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(ptr %addr) #0 {
define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X4X2_T:%.*]] poison, <4 x float> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <4 x i32>, <4 x i32> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i32>, <4 x i32> } [[TMP13]], <4 x i32> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X4X2_T]] [[TMP6]], <4 x float> [[TMP7]], 1
-; CHECK-NEXT: store { <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT32X4X2_T]] [[TMP8]]
;
%val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %addr)
@@ -3129,20 +3222,21 @@ define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(ptr %addr) #0 {
define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X2_T:%.*]] poison, <2 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i64>, <2 x i64> } [[TMP13]], <2 x i64> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X2_T]] [[TMP6]], <2 x i64> [[TMP7]], 1
-; CHECK-NEXT: store { <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X2_T]] [[TMP8]]
;
%val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %addr)
@@ -3152,20 +3246,21 @@ define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(ptr %addr) #0 {
define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP9:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP9]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = insertvalue { <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP11]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X2X2_T:%.*]] poison, <2 x double> [[TMP5]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { <2 x i64>, <2 x i64> } [[TMP9]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x double>, <2 x double> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i64>, <2 x i64> } [[TMP13]], <2 x i64> [[TMP10]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X2X2_T]] [[TMP6]], <2 x double> [[TMP7]], 1
-; CHECK-NEXT: store { <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64> } [[TMP12]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT64X2X2_T]] [[TMP8]]
;
%val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %addr)
@@ -3182,22 +3277,25 @@ declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr) noun
define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP15]], <8 x i8> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP12]], <8 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X3_T]] [[TMP8]], <8 x i8> [[TMP9]], 2
-; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X3_T]] [[TMP10]]
;
%val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr %addr)
@@ -3207,22 +3305,25 @@ define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(ptr %addr) #0 {
define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } { <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1) }, <4 x i16> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X3_T:%.*]] poison, <4 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP15]], <4 x i16> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X3_T]] [[TMP6]], <4 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP12]], <4 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X3_T]] [[TMP8]], <4 x i16> [[TMP9]], 2
-; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X4X3_T]] [[TMP10]]
;
%val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr %addr)
@@ -3232,22 +3333,25 @@ define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(ptr %addr) #0 {
define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X3_T:%.*]] poison, <2 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP15]], <2 x i32> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X3_T]] [[TMP6]], <2 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP12]], <2 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X3_T]] [[TMP8]], <2 x i32> [[TMP9]], 2
-; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X2X3_T]] [[TMP10]]
;
%val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr %addr)
@@ -3257,22 +3361,25 @@ define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(ptr %addr) #0 {
define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X2X3_T:%.*]] poison, <2 x float> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP15]], <2 x i32> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X2X3_T]] [[TMP6]], <2 x float> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP12]], <2 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X2X3_T]] [[TMP8]], <2 x float> [[TMP9]], 2
-; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT32X2X3_T]] [[TMP10]]
;
%val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr %addr)
@@ -3282,22 +3389,25 @@ define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(ptr %addr) #0 {
define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X3_T:%.*]] poison, <1 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP15]], <1 x i64> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X3_T]] [[TMP6]], <1 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP12]], <1 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X3_T]] [[TMP8]], <1 x i64> [[TMP9]], 2
-; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X1X3_T]] [[TMP10]]
;
%val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %addr)
@@ -3307,22 +3417,25 @@ define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(ptr %addr) #0 {
define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X3_T:%.*]] poison, <1 x double> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP15]], <1 x i64> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X3_T]] [[TMP6]], <1 x double> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP12]], <1 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X3_T]] [[TMP8]], <1 x double> [[TMP9]], 2
-; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT64X1X3_T]] [[TMP10]]
;
%val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %addr)
@@ -3339,22 +3452,25 @@ declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr) noun
define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP15]], <16 x i8> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP12]], <16 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X3_T]] [[TMP8]], <16 x i8> [[TMP9]], 2
-; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X3_T]] [[TMP10]]
;
%val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr %addr)
@@ -3364,22 +3480,25 @@ define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(ptr %addr) #0 {
define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T:%.*]] poison, <8 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP15]], <8 x i16> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T]] [[TMP6]], <8 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP12]], <8 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X3_T]] [[TMP8]], <8 x i16> [[TMP9]], 2
-; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X3_T]] [[TMP10]]
;
%val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr %addr)
@@ -3389,22 +3508,25 @@ define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(ptr %addr) #0 {
define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T:%.*]] poison, <4 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP15]], <4 x i32> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T]] [[TMP6]], <4 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP12]], <4 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X3_T]] [[TMP8]], <4 x i32> [[TMP9]], 2
-; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X3_T]] [[TMP10]]
;
%val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr %addr)
@@ -3414,22 +3536,25 @@ define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(ptr %addr) #0 {
define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X4X3_T:%.*]] poison, <4 x float> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP15]], <4 x i32> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X4X3_T]] [[TMP6]], <4 x float> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP12]], <4 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X4X3_T]] [[TMP8]], <4 x float> [[TMP9]], 2
-; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT32X4X3_T]] [[TMP10]]
;
%val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr %addr)
@@ -3439,22 +3564,25 @@ define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(ptr %addr) #0 {
define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T:%.*]] poison, <2 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP15]], <2 x i64> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T]] [[TMP6]], <2 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP12]], <2 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X3_T]] [[TMP8]], <2 x i64> [[TMP9]], 2
-; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X3_T]] [[TMP10]]
;
%val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %addr)
@@ -3464,22 +3592,25 @@ define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(ptr %addr) #0 {
define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP11:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP13:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP13]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X2X3_T:%.*]] poison, <2 x double> [[TMP5]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP12:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP15]], <2 x i64> [[TMP17]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X2X3_T]] [[TMP6]], <2 x double> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP11]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP12]], <2 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X2X3_T]] [[TMP8]], <2 x double> [[TMP9]], 2
-; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64> } [[TMP16]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT64X2X3_T]] [[TMP10]]
;
%val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %addr)
@@ -3496,24 +3627,29 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr) noun
define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } { <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1), <8 x i8> splat (i8 -1) }, <8 x i8> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T:%.*]] poison, <8 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP17]], <8 x i8> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP6]], <8 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP21]], <8 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP8]], <8 x i8> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP16]], <8 x i8> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT8X8X4_T]] [[TMP10]], <8 x i8> [[TMP11]], 3
-; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X8X4_T]] [[TMP12]]
;
%val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr %addr)
@@ -3523,24 +3659,29 @@ define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(ptr %addr) #0 {
define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } { <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1), <4 x i16> splat (i16 -1) }, <4 x i16> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T:%.*]] poison, <4 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP17]], <4 x i16> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T]] [[TMP6]], <4 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP21]], <4 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T]] [[TMP8]], <4 x i16> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP16]], <4 x i16> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT16X4X4_T]] [[TMP10]], <4 x i16> [[TMP11]], 3
-; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X4X4_T]] [[TMP12]]
;
%val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr %addr)
@@ -3550,24 +3691,29 @@ define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(ptr %addr) #0 {
define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T:%.*]] poison, <2 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP17]], <2 x i32> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T]] [[TMP6]], <2 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP21]], <2 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T]] [[TMP8]], <2 x i32> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP16]], <2 x i32> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT32X2X4_T]] [[TMP10]], <2 x i32> [[TMP11]], 3
-; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X2X4_T]] [[TMP12]]
;
%val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr %addr)
@@ -3577,24 +3723,29 @@ define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(ptr %addr) #0 {
define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } { <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1), <2 x i32> splat (i32 -1) }, <2 x i32> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X2X4_T:%.*]] poison, <2 x float> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP17]], <2 x i32> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X2X4_T]] [[TMP6]], <2 x float> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP21]], <2 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X2X4_T]] [[TMP8]], <2 x float> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x float>, <2 x float>, <2 x float>, <2 x float> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP16]], <2 x i32> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X2X4_T]] [[TMP10]], <2 x float> [[TMP11]], 3
-; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT32X2X4_T]] [[TMP12]]
;
%val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr %addr)
@@ -3604,24 +3755,29 @@ define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(ptr %addr) #0 {
define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T:%.*]] poison, <1 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP17]], <1 x i64> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T]] [[TMP6]], <1 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP21]], <1 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T]] [[TMP8]], <1 x i64> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP16]], <1 x i64> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT64X1X4_T]] [[TMP10]], <1 x i64> [[TMP11]], 3
-; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X1X4_T]] [[TMP12]]
;
%val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %addr)
@@ -3631,24 +3787,29 @@ define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(ptr %addr) #0 {
define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } { <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1), <1 x i64> splat (i64 -1) }, <1 x i64> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X4_T:%.*]] poison, <1 x double> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP17]], <1 x i64> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X4_T]] [[TMP6]], <1 x double> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP21]], <1 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X4_T]] [[TMP8]], <1 x double> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <1 x double>, <1 x double>, <1 x double>, <1 x double> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP16]], <1 x i64> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X1X4_T]] [[TMP10]], <1 x double> [[TMP11]], 3
-; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT64X1X4_T]] [[TMP12]]
;
%val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %addr)
@@ -3665,24 +3826,29 @@ declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr) noun
define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } { <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1), <16 x i8> splat (i8 -1) }, <16 x i8> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T:%.*]] poison, <16 x i8> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP17]], <16 x i8> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP6]], <16 x i8> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP21]], <16 x i8> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP8]], <16 x i8> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP16]], <16 x i8> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT8X16X4_T]] [[TMP10]], <16 x i8> [[TMP11]], 3
-; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT8X16X4_T]] [[TMP12]]
;
%val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr %addr)
@@ -3692,24 +3858,29 @@ define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(ptr %addr) #0 {
define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } { <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1), <8 x i16> splat (i16 -1) }, <8 x i16> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T:%.*]] poison, <8 x i16> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP17]], <8 x i16> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP6]], <8 x i16> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP21]], <8 x i16> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP8]], <8 x i16> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP16]], <8 x i16> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT16X8X4_T]] [[TMP10]], <8 x i16> [[TMP11]], 3
-; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT16X8X4_T]] [[TMP12]]
;
%val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr %addr)
@@ -3719,24 +3890,29 @@ define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(ptr %addr) #0 {
define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T:%.*]] poison, <4 x i32> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP17]], <4 x i32> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP6]], <4 x i32> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP21]], <4 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP8]], <4 x i32> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP16]], <4 x i32> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT32X4X4_T]] [[TMP10]], <4 x i32> [[TMP11]], 3
-; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT32X4X4_T]] [[TMP12]]
;
%val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr %addr)
@@ -3746,24 +3922,29 @@ define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(ptr %addr) #0 {
define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } { <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1), <4 x i32> splat (i32 -1) }, <4 x i32> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X4X4_T:%.*]] poison, <4 x float> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP17]], <4 x i32> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X4X4_T]] [[TMP6]], <4 x float> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP21]], <4 x i32> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X4X4_T]] [[TMP8]], <4 x float> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP16]], <4 x i32> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_FLOAT32X4X4_T]] [[TMP10]], <4 x float> [[TMP11]], 3
-; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT32X4X4_T]] [[TMP12]]
;
%val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr %addr)
@@ -3773,24 +3954,29 @@ define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(ptr %addr) #0 {
define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T:%.*]] poison, <2 x i64> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP17]], <2 x i64> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP6]], <2 x i64> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], <2 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP8]], <2 x i64> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP16]], <2 x i64> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_INT64X2X4_T]] [[TMP10]], <2 x i64> [[TMP11]], 3
-; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_INT64X2X4_T]] [[TMP12]]
;
%val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %addr)
@@ -3800,24 +3986,29 @@ define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(ptr %addr) #0 {
define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(ptr %addr) #0 {
; CHECK-LABEL: define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(
; CHECK-SAME: ptr [[ADDR:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[ADDR]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = xor i64 [[TMP1]], 193514046488576
+; CHECK-NEXT: [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to ptr
+; CHECK-NEXT: [[TMP13:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr [[TMP3]])
; CHECK-NEXT: [[TMP4:%.*]] = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr [[ADDR]])
+; CHECK-NEXT: [[TMP15:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 0
; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[TMP4]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } { <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1), <2 x i64> splat (i64 -1) }, <2 x i64> [[TMP15]], 0
; CHECK-NEXT: [[TMP6:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X2X4_T:%.*]] poison, <2 x double> [[TMP5]], 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 1
; CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[TMP4]], 1
+; CHECK-NEXT: [[TMP21:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP17]], <2 x i64> [[TMP19]], 1
; CHECK-NEXT: [[TMP8:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X2X4_T]] [[TMP6]], <2 x double> [[TMP7]], 1
+; CHECK-NEXT: [[TMP14:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 2
; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[TMP4]], 2
+; CHECK-NEXT: [[TMP16:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP21]], <2 x i64> [[TMP14]], 2
; CHECK-NEXT: [[TMP10:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X2X4_T]] [[TMP8]], <2 x double> [[TMP9]], 2
+; CHECK-NEXT: [[TMP18:%.*]] = extractvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP13]], 3
; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { <2 x double>, <2 x double>, <2 x double>, <2 x double> } [[TMP4]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = insertvalue { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP16]], <2 x i64> [[TMP18]], 3
; CHECK-NEXT: [[TMP12:%.*]] = insertvalue [[STRUCT___NEON_FLOAT64X2X4_T]] [[TMP10]], <2 x double> [[TMP11]], 3
-; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[TMP20]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret [[STRUCT___NEON_FLOAT64X2X4_T]] [[TMP12]]
;
%val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %addr)
>From 75654804da84085de2bbbab1748a36af4b4e6866 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Sun, 9 Mar 2025 03:33:24 +0000
Subject: [PATCH 2/5] Minimize diff
---
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 51e4f26d33d41..9ce29ba7618a2 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4071,12 +4071,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
ShadowArgs.append(1, SrcShadowPtr);
ShadowArgs.append(1, Mask);
- CallInst *CI;
+ CallInst *CI =
+ IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
// The AVX masked load intrinsics do not have integer variants. We use the
// floating-point variants, and assume that the intrinsic will happily copy
// the shadows even if they are interpreted as "invalid" floating-point
// values (NaN etc.).
- CI = IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
if (!MS.TrackOrigins)
>From 3a09dcb1cb5648fb20ea65c7ca1bff7fdb2c7515 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Mon, 17 Mar 2025 23:49:22 +0000
Subject: [PATCH 3/5] Address Florian1 feedback
---
.../Instrumentation/MemorySanitizer.cpp | 18 +++++++-----------
1 file changed, 7 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 9ce29ba7618a2..be93452c3af80 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4074,9 +4074,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
CallInst *CI =
IRB.CreateIntrinsic(I.getType(), I.getIntrinsicID(), ShadowArgs);
// The AVX masked load intrinsics do not have integer variants. We use the
- // floating-point variants, and assume that the intrinsic will happily copy
- // the shadows even if they are interpreted as "invalid" floating-point
- // values (NaN etc.).
+ // floating-point variants, which will happily copy the shadows even if
+ // they are interpreted as "invalid" floating-point values (NaN etc.).
setShadow(&I, IRB.CreateBitCast(CI, getShadowTy(&I)));
if (!MS.TrackOrigins)
@@ -4260,7 +4259,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
assert(I.getType()->isStructTy());
[[maybe_unused]] StructType *RetTy = cast<StructType>(I.getType());
assert(RetTy->getNumElements() > 0);
- assert(isa<FixedVectorType>(RetTy->getElementType(0)));
assert(RetTy->getElementType(0)->isIntOrIntVectorTy() ||
RetTy->getElementType(0)->isFPOrFPVectorTy());
for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
@@ -4268,15 +4266,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (WithLane) {
// 2, 3 or 4 vectors, plus lane number, plus input pointer
- assert(numArgs >= 4);
- assert(numArgs <= 6);
+ assert(4 <= numArgs && numArgs <= 6);
// Return type is a struct of the input vectors
assert(RetTy->getNumElements() + 2 == numArgs);
for (unsigned int i = 0; i < RetTy->getNumElements(); i++)
assert(I.getArgOperand(i)->getType() == RetTy->getElementType(0));
- } else
+ } else {
assert(numArgs == 1);
+ }
IRBuilder<> IRB(&I);
@@ -4299,16 +4297,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const Align Alignment = Align(1);
Type *SrcShadowTy = getShadowTy(Src);
- Value *SrcShadowPtr, *SrcOriginPtr;
- std::tie(SrcShadowPtr, SrcOriginPtr) =
+ auto [SrcShadowPtr, SrcOriginPtr] =
getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);
ShadowArgs.push_back(SrcShadowPtr);
- CallInst *CI;
// The NEON vector load instructions handled by this function all have
// integer variants. It is easier to use those rather than trying to cast
// a struct of vectors of floats into a struct of vectors of integers.
- CI = IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
+ CallInst *CI = IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
setShadow(&I, CI);
if (!MS.TrackOrigins)
>From 324be7dc827f0f50774025e2ec9b2c2bc0a25d03 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Tue, 18 Mar 2025 00:11:50 +0000
Subject: [PATCH 4/5] clang-format
---
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index be93452c3af80..b15339fc41739 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4304,7 +4304,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// The NEON vector load instructions handled by this function all have
// integer variants. It is easier to use those rather than trying to cast
// a struct of vectors of floats into a struct of vectors of integers.
- CallInst *CI = IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
+ CallInst *CI =
+ IRB.CreateIntrinsic(getShadowTy(&I), I.getIntrinsicID(), ShadowArgs);
setShadow(&I, CI);
if (!MS.TrackOrigins)
>From a74f1f5acd39b6b68c469226bed574d503fdc052 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 19 Mar 2025 20:41:41 +0000
Subject: [PATCH 5/5] Increase alignment with Florian's feedback
---
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b15339fc41739..fc15991b9435e 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4294,11 +4294,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *Src = I.getArgOperand(numArgs - 1);
assert(Src->getType()->isPointerTy() && "Source is not a pointer!");
- const Align Alignment = Align(1);
-
Type *SrcShadowTy = getShadowTy(Src);
auto [SrcShadowPtr, SrcOriginPtr] =
- getShadowOriginPtr(Src, IRB, SrcShadowTy, Alignment, /*isStore*/ false);
+ getShadowOriginPtr(Src, IRB, SrcShadowTy, Align(1), /*isStore*/ false);
ShadowArgs.push_back(SrcShadowPtr);
// The NEON vector load instructions handled by this function all have
More information about the llvm-commits
mailing list