[llvm] 5d0a12d - [msan] Precommit tests for Arm NEON vector shift (#101420)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 7 16:36:21 PDT 2024
Author: Thurston Dang
Date: 2024-08-07T16:36:15-07:00
New Revision: 5d0a12d3e9b1606c36430cf908da20d19d101e04
URL: https://github.com/llvm/llvm-project/commit/5d0a12d3e9b1606c36430cf908da20d19d101e04
DIFF: https://github.com/llvm/llvm-project/commit/5d0a12d3e9b1606c36430cf908da20d19d101e04.diff
LOG: [msan] Precommit tests for Arm NEON vector shift (#101420)
MSan currently does not correctly instrument most (all?) Arm NEON vector
shift instructions. This patch shows the current state of
instrumentation.
A followup patch will apply handleVectorShiftIntrinsic to most of the
vector shift instructions and update this test accordingly.
Test forked from llvm/test/CodeGen/AArch64/arm64-vshift.ll.
Added:
llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll
new file mode 100644
index 0000000000000..a755562d683fb
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll
@@ -0,0 +1,9546 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool opt --version 4
+;
+; RUN: opt < %s -passes=msan -S | FileCheck %s
+;
+; Forked from llvm/test/CodeGen/AArch64/arm64-vshift.ll
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @sqshl8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = load <8 x i8>, ptr %B
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @sqshl4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = load <4 x i16>, ptr %B
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @sqshl2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = load <2 x i32>, ptr %B
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @sqshl1d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp2 = load <1 x i64>, ptr %B
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @sqshl1d_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @sqshl_scalar(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[TMP1]], i64 [[TMP2]])
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp2 = load i64, ptr %B
+ %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 %tmp2)
+ ret i64 %tmp3
+}
+
+define i64 @sqshl_scalar_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @sqshl_scalar_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.sqshl.i64(i64 %tmp1, i64 1)
+ ret i64 %tmp3
+}
+
+define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @uqshl8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = load <8 x i8>, ptr %B
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @uqshl4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = load <4 x i16>, ptr %B
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @uqshl2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = load <2 x i32>, ptr %B
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @sqshl16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = load <16 x i8>, ptr %B
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sqshl8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = load <8 x i16>, ptr %B
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sqshl4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = load <4 x i32>, ptr %B
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @sqshl2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = load <2 x i64>, ptr %B
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @uqshl16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = load <16 x i8>, ptr %B
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @uqshl8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = load <8 x i16>, ptr %B
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @uqshl4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = load <4 x i32>, ptr %B
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @uqshl2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = load <2 x i64>, ptr %B
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @uqshl1d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp2 = load <1 x i64>, ptr %B
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @uqshl1d_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @uqshl1d_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @uqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @uqshl_scalar(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[TMP1]], i64 [[TMP2]])
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp2 = load i64, ptr %B
+ %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 %tmp2)
+ ret i64 %tmp3
+}
+
+define i64 @uqshl_scalar_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @uqshl_scalar_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqshl.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.uqshl.i64(i64 %tmp1, i64 1)
+ ret i64 %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.sqshl.i64(i64, i64) nounwind readnone
+
+
+declare <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.uqshl.i64(i64, i64) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @srshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @srshl8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = load <8 x i8>, ptr %B
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @srshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @srshl4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = load <4 x i16>, ptr %B
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @srshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @srshl2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = load <2 x i32>, ptr %B
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @srshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @srshl1d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp2 = load <1 x i64>, ptr %B
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @srshl1d_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @srshl1d_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @srshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @srshl_scalar(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 [[TMP2]])
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp2 = load i64, ptr %B
+ %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 %tmp2)
+ ret i64 %tmp3
+}
+
+define i64 @srshl_scalar_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @srshl_scalar_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 1)
+ ret i64 %tmp3
+}
+
+define <8 x i8> @urshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @urshl8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = load <8 x i8>, ptr %B
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @urshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @urshl4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = load <4 x i16>, ptr %B
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @urshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @urshl2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = load <2 x i32>, ptr %B
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @urshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @urshl1d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp2 = load <1 x i64>, ptr %B
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @urshl1d_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @urshl1d_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @urshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @urshl_scalar(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 [[TMP2]])
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp2 = load i64, ptr %B
+ %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 %tmp2)
+ ret i64 %tmp3
+}
+
+define i64 @urshl_scalar_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @urshl_scalar_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 1)
+ ret i64 %tmp3
+}
+
+define <16 x i8> @srshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @srshl16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = load <16 x i8>, ptr %B
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @srshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @srshl8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = load <8 x i16>, ptr %B
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @srshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @srshl4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = load <4 x i32>, ptr %B
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @srshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @srshl2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = load <2 x i64>, ptr %B
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <16 x i8> @urshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @urshl16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = load <16 x i8>, ptr %B
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @urshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @urshl8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = load <8 x i16>, ptr %B
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @urshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @urshl4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = load <4 x i32>, ptr %B
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @urshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @urshl2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = load <2 x i64>, ptr %B
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.srshl.i64(i64, i64) nounwind readnone
+
+declare <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.urshl.i64(i64, i64) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @sqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @sqrshl8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = load <8 x i8>, ptr %B
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @sqrshl4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = load <4 x i16>, ptr %B
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @sqrshl2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = load <2 x i32>, ptr %B
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <8 x i8> @uqrshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @uqrshl8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = load <8 x i8>, ptr %B
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqrshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @uqrshl4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = load <4 x i16>, ptr %B
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqrshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @uqrshl2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = load <2 x i32>, ptr %B
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @sqrshl16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = load <16 x i8>, ptr %B
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sqrshl8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = load <8 x i16>, ptr %B
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sqrshl4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = load <4 x i32>, ptr %B
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @sqrshl2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = load <2 x i64>, ptr %B
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <1 x i64> @sqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @sqrshl1d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp2 = load <1 x i64>, ptr %B
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @sqrshl1d_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @sqrshl1d_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @sqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @sqrshl_scalar(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[TMP1]], i64 [[TMP2]])
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp2 = load i64, ptr %B
+ %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 %tmp2)
+ ret i64 %tmp3
+}
+
+define i64 @sqrshl_scalar_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @sqrshl_scalar_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.sqrshl.i64(i64 %tmp1, i64 1)
+ ret i64 %tmp3
+}
+
+define <16 x i8> @uqrshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @uqrshl16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = load <16 x i8>, ptr %B
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @uqrshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @uqrshl8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = load <8 x i16>, ptr %B
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uqrshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @uqrshl4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = load <4 x i32>, ptr %B
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uqrshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @uqrshl2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = load <2 x i64>, ptr %B
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2)
+ ret <2 x i64> %tmp3
+}
+
+define <1 x i64> @uqrshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @uqrshl1d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp2 = load <1 x i64>, ptr %B
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2)
+ ret <1 x i64> %tmp3
+}
+
+define <1 x i64> @uqrshl1d_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @uqrshl1d_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @uqrshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @uqrshl_scalar(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[TMP1]], i64 [[TMP2]])
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp2 = load i64, ptr %B
+ %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 %tmp2)
+ ret i64 %tmp3
+}
+
+define i64 @uqrshl_scalar_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @uqrshl_scalar_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.uqrshl.i64(i64 %tmp1, i64 1)
+ ret i64 %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.sqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.sqrshl.i64(i64, i64) nounwind readnone
+
+declare <8 x i8> @llvm.aarch64.neon.uqrshl.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqrshl.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqrshl.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.uqrshl.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.uqrshl.i64(i64, i64) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.uqrshl.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.uqrshl.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.uqrshl.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.uqrshl.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @urshr8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @urshr8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @urshr4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @urshr4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @urshr2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @urshr2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @urshr16b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @urshr16b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @urshr8h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @urshr8h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @urshr4s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @urshr4s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @urshr2d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @urshr2d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+ ret <2 x i64> %tmp3
+}
+
+define <1 x i64> @urshr1d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @urshr1d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @urshr_scalar(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @urshr_scalar(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1)
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
+ ret i64 %tmp3
+}
+
+define <8 x i8> @srshr8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @srshr8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @srshr4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @srshr4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @srshr2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @srshr2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @srshr16b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @srshr16b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @srshr8h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @srshr8h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @srshr4s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @srshr4s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @srshr2d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @srshr2d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+ ret <2 x i64> %tmp3
+}
+
+define <1 x i64> @srshr1d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @srshr1d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @srshr_scalar(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @srshr_scalar(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1)
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
+ ret i64 %tmp3
+}
+
+define <8 x i8> @sqshlu8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @sqshlu8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqshlu4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @sqshlu4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqshlu2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @sqshlu2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqshlu16b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @sqshlu16b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqshlu8h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sqshlu8h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqshlu4s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sqshlu4s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqshlu2d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @sqshlu2d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+ ret <2 x i64> %tmp3
+}
+
+define <1 x i64> @sqshlu1d_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @sqshlu1d_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @sqshlu_i64_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @sqshlu_i64_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 [[TMP1]], i64 1)
+; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.sqshlu.i64(i64 %tmp1, i64 1)
+ ret i64 %tmp3
+}
+
+define i32 @sqshlu_i32_constant(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i32 @sqshlu_i32_constant(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i32 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 [[TMP1]], i32 1)
+; CHECK-NEXT: store i32 [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %tmp1 = load i32, ptr %A
+ %tmp3 = call i32 @llvm.aarch64.neon.sqshlu.i32(i32 %tmp1, i32 1)
+ ret i32 %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.sqshlu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshlu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshlu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.sqshlu.v1i64(<1 x i64>, <1 x i64>) nounwind readnone
+declare i64 @llvm.aarch64.neon.sqshlu.i64(i64, i64) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqshlu.i32(i32, i32) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.sqshlu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqshlu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqshlu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64>, <2 x i64>) nounwind readnone
+
+define <8 x i8> @rshrn8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @rshrn8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @rshrn4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @rshrn4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @rshrn2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @rshrn2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @rshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @rshrn16b(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP4]]
+;
+ %out = load <8 x i8>, ptr %ret
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16> %tmp1, i32 1)
+ %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @rshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @rshrn8h(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP4]]
+;
+ %out = load <4 x i16>, ptr %ret
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %tmp1, i32 1)
+ %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @rshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @rshrn4s(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+ %out = load <2 x i32>, ptr %ret
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64> %tmp1, i32 1)
+ %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %tmp4
+}
+
+declare <8 x i8> @llvm.aarch64.neon.rshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.rshrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define <8 x i8> @shrn8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @shrn8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = lshr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP8]] to <8 x i8>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @shrn4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @shrn4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = lshr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+ %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @shrn2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @shrn2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = lshr <2 x i64> [[_MSLD]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
+ %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @shrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @shrn16b(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i16> [[TMP14]] to <8 x i8>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
+; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> [[_MSPROP]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP4]]
+;
+ %out = load <8 x i8>, ptr %ret
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
+ %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @shrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @shrn8h(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <4 x i32> [[TMP14]] to <4 x i16>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
+; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> [[_MSPROP]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP4]]
+;
+ %out = load <4 x i16>, ptr %ret
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+ %tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
+ %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @shrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @shrn4s(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD1]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <2 x i64> [[TMP14]] to <2 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: [[_MSPROP2:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> [[_MSPROP]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+ %out = load <2 x i32>, ptr %ret
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
+ %tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
+ %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %tmp4
+}
+
+declare <8 x i8> @llvm.aarch64.neon.shrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.shrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.shrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @sqshrn1s(i64 %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i32 @sqshrn1s(
+; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 [[A]], i32 1)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[TMP]]
+;
+ %tmp = call i32 @llvm.aarch64.neon.sqshrn.i32(i64 %A, i32 1)
+ ret i32 %tmp
+}
+
+define <8 x i8> @sqshrn8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @sqshrn8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqshrn4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @sqshrn4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqshrn2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @sqshrn2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+ ret <2 x i32> %tmp3
+}
+
+
+define <16 x i8> @sqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @sqshrn16b(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP4]]
+;
+ %out = load <8 x i8>, ptr %ret
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+ %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @sqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sqshrn8h(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP4]]
+;
+ %out = load <4 x i16>, ptr %ret
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+ %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @sqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sqshrn4s(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+ %out = load <2 x i32>, ptr %ret
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+ %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %tmp4
+}
+
+declare i32 @llvm.aarch64.neon.sqshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.sqshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @sqshrun1s(i64 %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i32 @sqshrun1s(
+; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 [[A]], i32 1)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[TMP]]
+;
+ %tmp = call i32 @llvm.aarch64.neon.sqshrun.i32(i64 %A, i32 1)
+ ret i32 %tmp
+}
+
+define <8 x i8> @sqshrun8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @sqshrun8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqshrun4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @sqshrun4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqshrun2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @sqshrun2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @sqshrun16b(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP4]]
+;
+ %out = load <8 x i8>, ptr %ret
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16> %tmp1, i32 1)
+ %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @sqshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sqshrun8h(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP4]]
+;
+ %out = load <4 x i16>, ptr %ret
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32> %tmp1, i32 1)
+ %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @sqshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sqshrun4s(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+ %out = load <2 x i32>, ptr %ret
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64> %tmp1, i32 1)
+ %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %tmp4
+}
+
+declare i32 @llvm.aarch64.neon.sqshrun.i32(i64, i32) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.sqshrun.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqshrun.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqshrun.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @sqrshrn1s(i64 %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i32 @sqrshrn1s(
+; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 [[A]], i32 1)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[TMP]]
+;
+ %tmp = call i32 @llvm.aarch64.neon.sqrshrn.i32(i64 %A, i32 1)
+ ret i32 %tmp
+}
+
+define <8 x i8> @sqrshrn8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @sqrshrn8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqrshrn4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @sqrshrn4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqrshrn2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @sqrshrn2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @sqrshrn16b(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP4]]
+;
+ %out = load <8 x i8>, ptr %ret
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+ %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @sqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sqrshrn8h(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP4]]
+;
+ %out = load <4 x i16>, ptr %ret
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+ %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @sqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sqrshrn4s(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+ %out = load <2 x i32>, ptr %ret
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+ %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %tmp4
+}
+
+declare i32 @llvm.aarch64.neon.sqrshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.sqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @sqrshrun1s(i64 %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i32 @sqrshrun1s(
+; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 [[A]], i32 1)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[TMP]]
+;
+ %tmp = call i32 @llvm.aarch64.neon.sqrshrun.i32(i64 %A, i32 1)
+ ret i32 %tmp
+}
+
+define <8 x i8> @sqrshrun8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @sqrshrun8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqrshrun4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @sqrshrun4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqrshrun2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @sqrshrun2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqrshrun16b(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @sqrshrun16b(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP4]]
+;
+ %out = load <8 x i8>, ptr %ret
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16> %tmp1, i32 1)
+ %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @sqrshrun8h(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sqrshrun8h(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP4]]
+;
+ %out = load <4 x i16>, ptr %ret
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32> %tmp1, i32 1)
+ %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @sqrshrun4s(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sqrshrun4s(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+ %out = load <2 x i32>, ptr %ret
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64> %tmp1, i32 1)
+ %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %tmp4
+}
+
+declare i32 @llvm.aarch64.neon.sqrshrun.i32(i64, i32) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.sqrshrun.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.sqrshrun.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrshrun.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @uqrshrn1s(i64 %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i32 @uqrshrn1s(
+; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 [[A]], i32 1)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[TMP]]
+;
+ %tmp = call i32 @llvm.aarch64.neon.uqrshrn.i32(i64 %A, i32 1)
+ ret i32 %tmp
+}
+
+define <8 x i8> @uqrshrn8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @uqrshrn8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqrshrn4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @uqrshrn4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqrshrn2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @uqrshrn2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @uqrshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @uqrshrn16b(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP4]]
+;
+ %out = load <8 x i8>, ptr %ret
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16> %tmp1, i32 1)
+ %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @uqrshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @uqrshrn8h(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP4]]
+;
+ %out = load <4 x i16>, ptr %ret
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32> %tmp1, i32 1)
+ %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @uqrshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @uqrshrn4s(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+ %out = load <2 x i32>, ptr %ret
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64> %tmp1, i32 1)
+ %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %tmp4
+}
+
+declare i32 @llvm.aarch64.neon.uqrshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.uqrshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqrshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqrshrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define i32 @uqshrn1s(i64 %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i32 @uqshrn1s(
+; CHECK-SAME: i64 [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP:%.*]] = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 [[A]], i32 1)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[TMP]]
+;
+ %tmp = call i32 @llvm.aarch64.neon.uqshrn.i32(i64 %A, i32 1)
+ ret i32 %tmp
+}
+
+define <8 x i8> @uqshrn8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @uqshrn8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqshrn4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @uqshrn4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqshrn2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @uqshrn2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @uqshrn16b(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @uqshrn16b(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <8 x i8>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i8> [[_MSLD]], <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <8 x i8> [[OUT]], <8 x i8> [[TMP3]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP4]]
+;
+ %out = load <8 x i8>, ptr %ret
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16> %tmp1, i32 1)
+ %tmp4 = shufflevector <8 x i8> %out, <8 x i8> %tmp3, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %tmp4
+}
+
+define <8 x i16> @uqshrn8h(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @uqshrn8h(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <4 x i16>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i16> [[_MSLD]], <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[OUT]], <4 x i16> [[TMP3]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP4]]
+;
+ %out = load <4 x i16>, ptr %ret
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32> %tmp1, i32 1)
+ %tmp4 = shufflevector <4 x i16> %out, <4 x i16> %tmp3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i16> %tmp4
+}
+
+define <4 x i32> @uqshrn4s(ptr %ret, ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @uqshrn4s(
+; CHECK-SAME: ptr [[RET:%.*]], ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[OUT:%.*]] = load <2 x i32>, ptr [[RET]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[RET]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF0]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> [[TMP1]], i32 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i32> [[_MSLD]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[OUT]], <2 x i32> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP4]]
+;
+ %out = load <2 x i32>, ptr %ret
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64> %tmp1, i32 1)
+ %tmp4 = shufflevector <2 x i32> %out, <2 x i32> %tmp3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ ret <4 x i32> %tmp4
+}
+
+declare i32 @llvm.aarch64.neon.uqshrn.i32(i64, i32) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.uqshrn.v8i8(<8 x i16>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.uqshrn.v4i16(<4 x i32>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.uqshrn.v2i32(<2 x i64>, i32) nounwind readnone
+
+define <8 x i16> @ushll8h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @ushll8h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
+ %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @ushll4s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @ushll4s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
+ %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @ushll2d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @ushll2d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
+; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
+ %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @ushll2_8h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @ushll2_8h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[LOAD1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <8 x i8> [[_MSPROP]] to <8 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %load1 = load <16 x i8>, ptr %A
+ %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
+ %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @ushll2_4s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @ushll2_4s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %load1 = load <8 x i16>, ptr %A
+ %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
+ %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @ushll2_2d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @ushll2_2d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <2 x i32> [[_MSPROP]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
+; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %load1 = load <4 x i32>, ptr %A
+ %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
+ %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
+ ret <2 x i64> %tmp3
+}
+
+declare <16 x i8> @llvm.aarch64.neon.ushl.v16i8(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64>, <2 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64>, <1 x i64>)
+declare i64 @llvm.aarch64.neon.ushl.i64(i64, i64)
+
+define <8 x i16> @neon.ushll8h_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @neon.ushll8h_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %tmp3
+}
+
+define <8 x i16> @neon.ushl8h_no_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @neon.ushl8h_no_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[_MSLD]] to <8 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP2]], <8 x i16> [[TMP2]])
+; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = zext <8 x i8> %tmp1 to <8 x i16>
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp2, <8 x i16> %tmp2)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i8> [[_MSLD]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i8>, ptr %A
+ %tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ ret <4 x i32> %tmp3
+}
+
+define <8 x i16> @neon.ushl8_noext_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @neon.ushl8_noext_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.ushl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @neon.ushll4s_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ ret <4 x i32> %tmp3
+}
+
+; FIXME: unnecessary ushll.4s v0, v0, #0?
+define <4 x i32> @neon.ushll4s_neg_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @neon.ushll4s_neg_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <4 x i16> [[_MSLD]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = zext <4 x i16> %tmp1 to <4 x i32>
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+ ret <4 x i32> %tmp3
+}
+
+; FIXME: should be constant folded.
+define <4 x i32> @neon.ushll4s_constant_fold() nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @neon.ushll4s_constant_fold(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @neon.ushll2d_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @neon.ushll2d_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <2 x i32> [[_MSLD]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[TMP2]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = zext <2 x i32> %tmp1 to <2 x i64>
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
+ ret <2 x i64> %tmp3
+}
+
+define <1 x i64> @neon.ushl_vscalar_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @neon.ushl_vscalar_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <1 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> [[TMP2]], <1 x i64> <i64 1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i32>, ptr %A
+ %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.ushl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @neon.ushl_scalar_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @neon.ushl_scalar_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.ushl.i64(i64 [[TMP2]], i64 1)
+; CHECK-NEXT: store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i32, ptr %A
+ %tmp2 = zext i32 %tmp1 to i64
+ %tmp3 = call i64 @llvm.aarch64.neon.ushl.i64(i64 %tmp2, i64 1)
+ ret i64 %tmp3
+}
+
+define <8 x i16> @sshll8h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sshll8h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
+ %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %tmp3
+}
+
+define <2 x i64> @sshll2d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @sshll2d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
+; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
+ %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
+ ret <2 x i64> %tmp3
+}
+
+declare <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64>, <2 x i64>)
+declare <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64>, <1 x i64>)
+declare i64 @llvm.aarch64.neon.sshl.i64(i64, i64)
+
+define <16 x i8> @neon.sshl16b_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @neon.sshl16b_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP2]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ ret <16 x i8> %tmp2
+}
+
+define <16 x i8> @neon.sshl16b_non_splat_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @neon.sshl16b_non_splat_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP2]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 6, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ ret <16 x i8> %tmp2
+}
+
+define <16 x i8> @neon.sshl16b_neg_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @neon.sshl16b_neg_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP2]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = call <16 x i8> @llvm.aarch64.neon.sshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
+ ret <16 x i8> %tmp2
+}
+
+define <8 x i16> @neon.sshll8h_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @neon.sshll8h_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = sext <8 x i8> [[_MSLD]] to <8 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> [[TMP2]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.sshl.v8i16(<8 x i16> %tmp2, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i8>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i8> [[_MSLD]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i8>, ptr %A
+ %tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ ret <4 x i32> %tmp3
+}
+
+define <4 x i32> @neon.sshll4s_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @neon.sshll4s_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ ret <4 x i32> %tmp3
+}
+
+define <4 x i32> @neon.sshll4s_neg_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @neon.sshll4s_neg_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = sext <4 x i16> [[_MSLD]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <4 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP2]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+ ret <4 x i32> %tmp3
+}
+
+; FIXME: should be constant folded.
+define <4 x i32> @neon.sshl4s_constant_fold() nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @neon.sshl4s_constant_fold(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> <i32 0, i32 1, i32 2, i32 3>, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
+ ret <4 x i32> %tmp3
+}
+
+define <4 x i32> @neon.sshl4s_no_fold(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @neon.sshl4s_no_fold(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @neon.sshll2d_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @neon.sshll2d_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = sext <2 x i32> [[_MSLD]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 1, i64 1>)
+ ret <2 x i64> %tmp3
+}
+
+define <1 x i64> @neon.sshll_vscalar_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @neon.sshll_vscalar_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i32>, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i32>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <1 x i32> [[_MSLD]] to <1 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = zext <1 x i32> [[TMP1]] to <1 x i64>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <1 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> [[TMP2]], <1 x i64> <i64 1>)
+; CHECK-NEXT: store <1 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i32>, ptr %A
+ %tmp2 = zext <1 x i32> %tmp1 to <1 x i64>
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.sshl.v1i64(<1 x i64> %tmp2, <1 x i64> <i64 1>)
+ ret <1 x i64> %tmp3
+}
+
+define i64 @neon.sshll_scalar_constant_shift(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @neon.sshll_scalar_constant_shift(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[TMP2]], i64 1)
+; CHECK-NEXT: store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i32, ptr %A
+ %tmp2 = zext i32 %tmp1 to i64
+ %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 1)
+ ret i64 %tmp3
+}
+
+define i64 @neon.sshll_scalar_constant_shift_m1(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @neon.sshll_scalar_constant_shift_m1(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[A]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext i32 [[_MSLD]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or i64 [[_MSPROP]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sshl.i64(i64 [[TMP2]], i64 -1)
+; CHECK-NEXT: store i64 [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
+ %tmp1 = load i32, ptr %A
+ %tmp2 = zext i32 %tmp1 to i64
+ %tmp3 = call i64 @llvm.aarch64.neon.sshl.i64(i64 %tmp2, i64 -1)
+ ret i64 %tmp3
+}
+
+; FIXME: should be constant folded.
+define <2 x i64> @neon.sshl2d_constant_fold() nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @neon.sshl2d_constant_fold(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> <i64 99, i64 1000>, <2 x i64> <i64 1, i64 1>)
+ ret <2 x i64> %tmp3
+}
+
+define <2 x i64> @neon.sshl2d_no_fold(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @neon.sshl2d_no_fold(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[TMP2]], <2 x i64> <i64 2, i64 2>)
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp2 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %tmp2, <2 x i64> <i64 2, i64 2>)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i16> @sshll2_8h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sshll2_8h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[LOAD1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <16 x i8> [[_MSLD]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[LOAD1]], <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <8 x i8> [[_MSPROP]] to <8 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i8> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <8 x i16> [[_MSPROP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP2]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: store <8 x i16> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %load1 = load <16 x i8>, ptr %A
+ %tmp1 = shufflevector <16 x i8> %load1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %tmp2 = sext <8 x i8> %tmp1 to <8 x i16>
+ %tmp3 = shl <8 x i16> %tmp2, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sshll2_4s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sshll2_4s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[LOAD1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[_MSLD]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[LOAD1]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <4 x i16> [[_MSPROP]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[_MSPROP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %load1 = load <8 x i16>, ptr %A
+ %tmp1 = shufflevector <8 x i16> %load1, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %tmp2 = sext <4 x i16> %tmp1 to <4 x i32>
+ %tmp3 = shl <4 x i32> %tmp2, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sshll2_2d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @sshll2_2d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[LOAD1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[_MSLD]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[LOAD1]], <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = sext <2 x i32> [[_MSPROP]] to <2 x i64>
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = shl <2 x i64> [[_MSPROP1]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
+; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %load1 = load <4 x i32>, ptr %A
+ %tmp1 = shufflevector <4 x i32> %load1, <4 x i32> undef, <2 x i32> <i32 2, i32 3>
+ %tmp2 = sext <2 x i32> %tmp1 to <2 x i64>
+ %tmp3 = shl <2 x i64> %tmp2, <i64 1, i64 1>
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @sqshli8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @sqshli8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sqshli4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @sqshli4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sqshli2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @sqshli2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @sqshli16b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @sqshli16b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sqshli8h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sqshli8h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sqshli4s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sqshli4s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sqshli2d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @sqshli2d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @uqshli8b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @uqshli8b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ ret <8 x i8> %tmp3
+}
+
+define <8 x i8> @uqshli8b_1(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @uqshli8b_1(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @uqshli4h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @uqshli4h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @uqshli2s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @uqshli2s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP6]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>)
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 1, i32 1>)
+ ret <2 x i32> %tmp3
+}
+
+define <16 x i8> @uqshli16b(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @uqshli16b(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @uqshli8h(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @uqshli8h(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @uqshli4s(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @uqshli4s(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @uqshli2d(ptr %A) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @uqshli2d(
+; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 1, i64 1>)
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 1, i64 1>)
+ ret <2 x i64> %tmp3
+}
+
+define <8 x i8> @ursra8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @ursra8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP5]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.urshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %tmp4 = load <8 x i8>, ptr %B
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @ursra4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @ursra4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP5]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.urshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+ %tmp4 = load <4 x i16>, ptr %B
+ %tmp5 = add <4 x i16> %tmp3, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @ursra2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @ursra2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.urshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+ %tmp4 = load <2 x i32>, ptr %B
+ %tmp5 = add <2 x i32> %tmp3, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @ursra16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @ursra16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP5]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.urshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %tmp4 = load <16 x i8>, ptr %B
+ %tmp5 = add <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @ursra8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @ursra8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP5]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.urshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+ %tmp4 = load <8 x i16>, ptr %B
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @ursra4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @ursra4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP5]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.urshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+ %tmp4 = load <4 x i32>, ptr %B
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @ursra2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @ursra2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+ %tmp4 = load <2 x i64>, ptr %B
+ %tmp5 = add <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
+}
+
+define <1 x i64> @ursra1d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @ursra1d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <1 x i64> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <1 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP5]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.urshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
+ %tmp4 = load <1 x i64>, ptr %B
+ %tmp5 = add <1 x i64> %tmp3, %tmp4
+ ret <1 x i64> %tmp5
+}
+
+define i64 @ursra_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @ursra_scalar(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.urshl.i64(i64 [[TMP1]], i64 -1)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP5]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.urshl.i64(i64 %tmp1, i64 -1)
+ %tmp4 = load i64, ptr %B
+ %tmp5 = add i64 %tmp3, %tmp4
+ ret i64 %tmp5
+}
+
+define <8 x i8> @srsra8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @srsra8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i8> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP5]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.srshl.v8i8(<8 x i8> %tmp1, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %tmp4 = load <8 x i8>, ptr %B
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @srsra4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @srsra4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i16> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP5]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.srshl.v4i16(<4 x i16> %tmp1, <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
+ %tmp4 = load <4 x i16>, ptr %B
+ %tmp5 = add <4 x i16> %tmp3, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @srsra2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @srsra2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> <i32 -1, i32 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i32> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.srshl.v2i32(<2 x i32> %tmp1, <2 x i32> <i32 -1, i32 -1>)
+ %tmp4 = load <2 x i32>, ptr %B
+ %tmp5 = add <2 x i32> %tmp3, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @srsra16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @srsra16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <16 x i8> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <16 x i8> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP5]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.srshl.v16i8(<16 x i8> %tmp1, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %tmp4 = load <16 x i8>, ptr %B
+ %tmp5 = add <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @srsra8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @srsra8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <8 x i16> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i16> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP5]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.srshl.v8i16(<8 x i16> %tmp1, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
+ %tmp4 = load <8 x i16>, ptr %B
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @srsra4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @srsra4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <4 x i32> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i32> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP5]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.srshl.v4i32(<4 x i32> %tmp1, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
+ %tmp4 = load <4 x i32>, ptr %B
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @srsra2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @srsra2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> <i64 -1, i64 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <2 x i64> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %tmp1, <2 x i64> <i64 -1, i64 -1>)
+ %tmp4 = load <2 x i64>, ptr %B
+ %tmp5 = add <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
+}
+
+define <1 x i64> @srsra1d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @srsra1d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 -1>)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or <1 x i64> [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <1 x i64> [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP5]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.srshl.v1i64(<1 x i64> %tmp1, <1 x i64> <i64 -1>)
+ %tmp4 = load <1 x i64>, ptr %B
+ %tmp5 = add <1 x i64> %tmp3, %tmp4
+ ret <1 x i64> %tmp5
+}
+
+define i64 @srsra_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define i64 @srsra_scalar(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.srshl.i64(i64 [[TMP1]], i64 -1)
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or i64 [[_MSPROP]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store i64 [[_MSPROP2]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i64 [[TMP5]]
+;
+ %tmp1 = load i64, ptr %A
+ %tmp3 = call i64 @llvm.aarch64.neon.srshl.i64(i64 %tmp1, i64 -1)
+ %tmp4 = load i64, ptr %B
+ %tmp5 = add i64 %tmp3, %tmp4
+ ret i64 %tmp5
+}
+
+define <8 x i8> @usra8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @usra8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP5]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %tmp4 = load <8 x i8>, ptr %B
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @usra4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @usra4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP5]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
+ %tmp4 = load <4 x i16>, ptr %B
+ %tmp5 = add <4 x i16> %tmp3, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @usra2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @usra2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i32> [[_MSLD]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
+ %tmp4 = load <2 x i32>, ptr %B
+ %tmp5 = add <2 x i32> %tmp3, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @usra16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @usra16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = lshr <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP14]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP5]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %tmp4 = load <16 x i8>, ptr %B
+ %tmp5 = add <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @usra8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @usra8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = lshr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP14]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP5]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %tmp4 = load <8 x i16>, ptr %B
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @usra4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @usra4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = lshr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP5]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = load <4 x i32>, ptr %B
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @usra2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @usra2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = lshr <2 x i64> [[_MSLD]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP14]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
+ %tmp4 = load <2 x i64>, ptr %B
+ %tmp5 = add <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
+}
+
+define <1 x i64> @usra1d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @usra1d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = lshr <1 x i64> [[_MSLD]], <i64 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <1 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <1 x i64>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <1 x i64> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP5]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp3 = lshr <1 x i64> %tmp1, <i64 1>
+ %tmp4 = load <1 x i64>, ptr %B
+ %tmp5 = add <1 x i64> %tmp3, %tmp4
+ ret <1 x i64> %tmp5
+}
+
+define <8 x i8> @ssra8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @ssra8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i8> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP5]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp3 = ashr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %tmp4 = load <8 x i8>, ptr %B
+ %tmp5 = add <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @ssra4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @ssra4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP5]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp3 = ashr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
+ %tmp4 = load <4 x i16>, ptr %B
+ %tmp5 = add <4 x i16> %tmp3, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @ssra2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @ssra2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i32> [[_MSLD]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP14]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp3 = ashr <2 x i32> %tmp1, <i32 1, i32 1>
+ %tmp4 = load <2 x i32>, ptr %B
+ %tmp5 = add <2 x i32> %tmp3, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @ssra16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @ssra16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = ashr <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP14]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <16 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP5]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp3 = ashr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %tmp4 = load <16 x i8>, ptr %B
+ %tmp5 = add <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @ssra8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @ssra8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = ashr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP14]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <8 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP5]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp3 = ashr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %tmp4 = load <8 x i16>, ptr %B
+ %tmp5 = add <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @ssra4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @ssra4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = ashr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP5]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp3 = ashr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+ %tmp4 = load <4 x i32>, ptr %B
+ %tmp5 = add <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @ssra2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @ssra2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = ashr <2 x i64> [[_MSLD]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF0]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 193514046488576
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP14]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP9]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP5:%.*]] = add <2 x i64> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp3 = ashr <2 x i64> %tmp1, <i64 1, i64 1>
+ %tmp4 = load <2 x i64>, ptr %B
+ %tmp5 = add <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
+}
+
+define <8 x i8> @shr_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @shr_orr8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i8> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i8> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP5]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp4 = load <8 x i8>, ptr %B
+ %tmp3 = lshr <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %tmp5 = or <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @shr_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @shr_orr4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i16> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i16> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP5]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp4 = load <4 x i16>, ptr %B
+ %tmp3 = lshr <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
+ %tmp5 = or <4 x i16> %tmp3, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @shr_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @shr_orr2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i32> [[_MSLD]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], <i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], <i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i32> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <2 x i32> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp4 = load <2 x i32>, ptr %B
+ %tmp3 = lshr <2 x i32> %tmp1, <i32 1, i32 1>
+ %tmp5 = or <2 x i32> %tmp3, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @shr_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @shr_orr16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = lshr <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <16 x i8> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i8> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <16 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP5]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp4 = load <16 x i8>, ptr %B
+ %tmp3 = lshr <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %tmp5 = or <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @shr_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @shr_orr8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = lshr <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i16> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i16> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP5]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp4 = load <8 x i16>, ptr %B
+ %tmp3 = lshr <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %tmp5 = or <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @shr_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @shr_orr4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = lshr <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP5]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp4 = load <4 x i32>, ptr %B
+ %tmp3 = lshr <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+ %tmp5 = or <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @shr_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @shr_orr2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = lshr <2 x i64> [[_MSLD]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], <i64 -1, i64 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], <i64 -1, i64 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i64> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <2 x i64> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i64> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp4 = load <2 x i64>, ptr %B
+ %tmp3 = lshr <2 x i64> %tmp1, <i64 1, i64 1>
+ %tmp5 = or <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
+}
+
+define <8 x i8> @shl_orr8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @shl_orr8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i8> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i8> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i8> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i8> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i8> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP5]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp4 = load <8 x i8>, ptr %B
+ %tmp3 = shl <8 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %tmp5 = or <8 x i8> %tmp3, %tmp4
+ ret <8 x i8> %tmp5
+}
+
+define <4 x i16> @shl_orr4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @shl_orr4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i16> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i16> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i16> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i16> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i16> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i16> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP5]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp4 = load <4 x i16>, ptr %B
+ %tmp3 = shl <4 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1>
+ %tmp5 = or <4 x i16> %tmp3, %tmp4
+ ret <4 x i16> %tmp5
+}
+
+define <2 x i32> @shl_orr2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @shl_orr2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i32> [[_MSLD]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i32> [[TMP3]], <i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i32> [[TMP4]], <i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i32> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i32> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i32> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i32> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <2 x i32> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP5]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp4 = load <2 x i32>, ptr %B
+ %tmp3 = shl <2 x i32> %tmp1, <i32 1, i32 1>
+ %tmp5 = or <2 x i32> %tmp3, %tmp4
+ ret <2 x i32> %tmp5
+}
+
+define <16 x i8> @shl_orr16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @shl_orr16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = shl <16 x i8> [[_MSLD]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i8> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <16 x i8> [[TMP1]], <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i8> [[TMP3]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <16 x i8> [[TMP4]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i8> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <16 x i8> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <16 x i8> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <16 x i8> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <16 x i8> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i8> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <16 x i8> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP5]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp4 = load <16 x i8>, ptr %B
+ %tmp3 = shl <16 x i8> %tmp1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %tmp5 = or <16 x i8> %tmp3, %tmp4
+ ret <16 x i8> %tmp5
+}
+
+define <8 x i16> @shl_orr8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @shl_orr8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = shl <8 x i16> [[_MSLD]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i16> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i16> [[TMP3]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <8 x i16> [[TMP4]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <8 x i16> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <8 x i16> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <8 x i16> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <8 x i16> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i16> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i16> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <8 x i16> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP5]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp4 = load <8 x i16>, ptr %B
+ %tmp3 = shl <8 x i16> %tmp1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %tmp5 = or <8 x i16> %tmp3, %tmp4
+ ret <8 x i16> %tmp5
+}
+
+define <4 x i32> @shl_orr4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @shl_orr4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = shl <4 x i32> [[_MSLD]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <4 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i32> [[TMP3]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <4 x i32> [[TMP4]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <4 x i32> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <4 x i32> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <4 x i32> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <4 x i32> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <4 x i32> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <4 x i32> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP5]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp4 = load <4 x i32>, ptr %B
+ %tmp3 = shl <4 x i32> %tmp1, <i32 1, i32 1, i32 1, i32 1>
+ %tmp5 = or <4 x i32> %tmp3, %tmp4
+ ret <4 x i32> %tmp5
+}
+
+define <2 x i64> @shl_orr2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @shl_orr2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = shl <2 x i64> [[_MSLD]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP14:%.*]] = or <2 x i64> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <2 x i64> [[TMP3]], <i64 -1, i64 -1>
+; CHECK-NEXT: [[TMP16:%.*]] = xor <2 x i64> [[TMP4]], <i64 -1, i64 -1>
+; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i64> [[TMP14]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP18:%.*]] = and <2 x i64> [[TMP15]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP19:%.*]] = and <2 x i64> [[TMP14]], [[TMP16]]
+; CHECK-NEXT: [[TMP20:%.*]] = or <2 x i64> [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP21:%.*]] = or <2 x i64> [[TMP20]], [[TMP19]]
+; CHECK-NEXT: [[TMP5:%.*]] = or <2 x i64> [[TMP3]], [[TMP4]]
+; CHECK-NEXT: store <2 x i64> [[TMP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP5]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp4 = load <2 x i64>, ptr %B
+ %tmp3 = shl <2 x i64> %tmp1, <i64 1, i64 1>
+ %tmp5 = or <2 x i64> %tmp3, %tmp4
+ ret <2 x i64> %tmp5
+}
+
+define <8 x i16> @shll(<8 x i8> %in) sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @shll(
+; CHECK-SAME: <8 x i8> [[IN:%.*]]) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = zext <8 x i8> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: [[EXT:%.*]] = zext <8 x i8> [[IN]] to <8 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <8 x i16> [[_MSPROP]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = shl <8 x i16> [[EXT]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+; CHECK-NEXT: store <8 x i16> [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[RES]]
+;
+ %ext = zext <8 x i8> %in to <8 x i16>
+ %res = shl <8 x i16> %ext, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
+ ret <8 x i16> %res
+}
+
+define <4 x i32> @shll_high(<8 x i16> %in) sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @shll_high(
+; CHECK-SAME: <8 x i16> [[IN:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i16> [[IN]], <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = zext <4 x i16> [[_MSPROP]] to <4 x i32>
+; CHECK-NEXT: [[EXT:%.*]] = zext <4 x i16> [[EXTRACT]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[_MSPROP1]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = shl <4 x i32> [[EXT]], <i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[RES]]
+;
+ %extract = shufflevector <8 x i16> %in, <8 x i16> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+ %ext = zext <4 x i16> %extract to <4 x i32>
+ %res = shl <4 x i32> %ext, <i32 16, i32 16, i32 16, i32 16>
+ ret <4 x i32> %res
+}
+
+define <8 x i8> @sli8b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i8> @sli8b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i8>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i8>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD]] to i64
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK: 15:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 16:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], i32 1)
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[TMP3]]
+;
+ %tmp1 = load <8 x i8>, ptr %A
+ %tmp2 = load <8 x i8>, ptr %B
+ %tmp3 = call <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2, i32 1)
+ ret <8 x i8> %tmp3
+}
+
+define <4 x i16> @sli4h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @sli4h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i16>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i16>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD]] to i64
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK: 15:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 16:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]], i32 1)
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[TMP3]]
+;
+ %tmp1 = load <4 x i16>, ptr %A
+ %tmp2 = load <4 x i16>, ptr %B
+ %tmp3 = call <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2, i32 1)
+ ret <4 x i16> %tmp3
+}
+
+define <2 x i32> @sli2s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i32> @sli2s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD]] to i64
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK: 15:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 16:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]], i32 1)
+; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i32> [[TMP3]]
+;
+ %tmp1 = load <2 x i32>, ptr %A
+ %tmp2 = load <2 x i32>, ptr %B
+ %tmp3 = call <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2, i32 1)
+ ret <2 x i32> %tmp3
+}
+
+define <1 x i64> @sli1d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @sli1d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr [[A]], align 8
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP7]], align 8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr [[B]], align 8
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD]] to i64
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK: 15:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 16:
+; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]], i32 1)
+; CHECK-NEXT: store <1 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[TMP3]]
+;
+ %tmp1 = load <1 x i64>, ptr %A
+ %tmp2 = load <1 x i64>, ptr %B
+ %tmp3 = call <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64> %tmp1, <1 x i64> %tmp2, i32 1)
+ ret <1 x i64> %tmp3
+}
+
+define <16 x i8> @sli16b(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <16 x i8> @sli16b(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i8>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK: 15:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 16:
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]], i32 1)
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[TMP3]]
+;
+ %tmp1 = load <16 x i8>, ptr %A
+ %tmp2 = load <16 x i8>, ptr %B
+ %tmp3 = call <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2, i32 1)
+ ret <16 x i8> %tmp3
+}
+
+define <8 x i16> @sli8h(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <8 x i16> @sli8h(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i16>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK: 15:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 16:
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]], i32 1)
+; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[TMP3]]
+;
+ %tmp1 = load <8 x i16>, ptr %A
+ %tmp2 = load <8 x i16>, ptr %B
+ %tmp3 = call <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2, i32 1)
+ ret <8 x i16> %tmp3
+}
+
+define <4 x i32> @sli4s(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sli4s(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK: 15:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 16:
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]], i32 1)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[TMP3]]
+;
+ %tmp1 = load <4 x i32>, ptr %A
+ %tmp2 = load <4 x i32>, ptr %B
+ %tmp3 = call <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2, i32 1)
+ ret <4 x i32> %tmp3
+}
+
+define <2 x i64> @sli2d(ptr %A, ptr %B) nounwind sanitize_memory {
+; CHECK-LABEL: define <2 x i64> @sli2d(
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr [[A]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF0]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr [[B]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
+; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
+; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP14]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF0]]
+; CHECK: 15:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 16:
+; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]], i32 1)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
+ %tmp1 = load <2 x i64>, ptr %A
+ %tmp2 = load <2 x i64>, ptr %B
+ %tmp3 = call <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64> %tmp1, <2 x i64> %tmp2, i32 1)
+ ret <2 x i64> %tmp3
+}
+
+declare <8 x i8> @llvm.aarch64.neon.vsli.v8i8(<8 x i8>, <8 x i8>, i32) nounwind readnone
+declare <4 x i16> @llvm.aarch64.neon.vsli.v4i16(<4 x i16>, <4 x i16>, i32) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.vsli.v2i32(<2 x i32>, <2 x i32>, i32) nounwind readnone
+declare <1 x i64> @llvm.aarch64.neon.vsli.v1i64(<1 x i64>, <1 x i64>, i32) nounwind readnone
+
+declare <16 x i8> @llvm.aarch64.neon.vsli.v16i8(<16 x i8>, <16 x i8>, i32) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.vsli.v8i16(<8 x i16>, <8 x i16>, i32) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.vsli.v4i32(<4 x i32>, <4 x i32>, i32) nounwind readnone
+declare <2 x i64> @llvm.aarch64.neon.vsli.v2i64(<2 x i64>, <2 x i64>, i32) nounwind readnone
+
+define <1 x i64> @ashr_v1i64(<1 x i64> %a, <1 x i64> %b) sanitize_memory {
+; CHECK-LABEL: define <1 x i64> @ashr_v1i64(
+; CHECK-SAME: <1 x i64> [[A:%.*]], <1 x i64> [[B:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <1 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = sext <1 x i1> [[TMP3]] to <1 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = ashr <1 x i64> [[TMP1]], [[B]]
+; CHECK-NEXT: [[TMP6:%.*]] = or <1 x i64> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[C:%.*]] = ashr <1 x i64> [[A]], [[B]]
+; CHECK-NEXT: store <1 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <1 x i64> [[C]]
+;
+ %c = ashr <1 x i64> %a, %b
+ ret <1 x i64> %c
+}
+
+define void @sqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
+; CHECK-LABEL: define void @sqshl_zero_shift_amount(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
+; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+ %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
+ store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
+ ret void
+}
+
+define void @uqshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
+; CHECK-LABEL: define void @uqshl_zero_shift_amount(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
+; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+ %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
+ store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
+ ret void
+}
+
+define void @srshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
+; CHECK-LABEL: define void @srshl_zero_shift_amount(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
+; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+ %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.srshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
+ store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
+ ret void
+}
+
+define void @urshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
+; CHECK-LABEL: define void @urshl_zero_shift_amount(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
+; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+ %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.urshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
+ store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
+ ret void
+}
+
+define void @sqshlu_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
+; CHECK-LABEL: define void @sqshlu_zero_shift_amount(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
+; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+ %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sqshlu.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
+ store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
+ ret void
+}
+
+define void @sshl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
+; CHECK-LABEL: define void @sshl_zero_shift_amount(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
+; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+ %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.sshl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
+ store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
+ ret void
+}
+
+define void @ushl_zero_shift_amount(<2 x i64> %a, <2 x i64> %b, ptr %dst) sanitize_memory {
+; CHECK-LABEL: define void @ushl_zero_shift_amount(
+; CHECK-SAME: <2 x i64> [[A:%.*]], <2 x i64> [[B:%.*]], ptr [[DST:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[VPADDQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> [[A]], <2 x i64> [[B]])
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <2 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[VSHLQ_V2_I_I:%.*]] = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> [[VPADDQ_V2_I_I]], <2 x i64> zeroinitializer)
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF0]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[DST]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 193514046488576
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr [[TMP7]], align 8
+; CHECK-NEXT: store <2 x i64> [[VSHLQ_V2_I_I]], ptr [[DST]], align 8
+; CHECK-NEXT: ret void
+;
+entry:
+ %vpaddq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64> %a, <2 x i64> %b)
+ %vshlq_v2.i.i = tail call <2 x i64> @llvm.aarch64.neon.ushl.v2i64(<2 x i64> %vpaddq_v2.i.i, <2 x i64> zeroinitializer)
+ store <2 x i64> %vshlq_v2.i.i, ptr %dst, align 8
+ ret void
+}
+
+define <4 x i32> @sext_rshrn(<4 x i32> noundef %a) sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @sext_rshrn(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 13)
+; CHECK-NEXT: [[VMOVL_I:%.*]] = sext <4 x i16> [[VRSHRN_N1]] to <4 x i32>
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[VMOVL_I]]
+;
+entry:
+ %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
+ %vmovl.i = sext <4 x i16> %vrshrn_n1 to <4 x i32>
+ ret <4 x i32> %vmovl.i
+}
+
+define <4 x i32> @zext_rshrn(<4 x i32> noundef %a) sanitize_memory {
+; CHECK-LABEL: define <4 x i32> @zext_rshrn(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[TMP0]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[A]], i32 13)
+; CHECK-NEXT: [[VMOVL_I:%.*]] = zext <4 x i16> [[VRSHRN_N1]] to <4 x i32>
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i32> [[VMOVL_I]]
+;
+entry:
+ %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %a, i32 13)
+ %vmovl.i = zext <4 x i16> %vrshrn_n1 to <4 x i32>
+ ret <4 x i32> %vmovl.i
+}
+
+define <4 x i16> @mul_rshrn(<4 x i32> noundef %a) sanitize_memory {
+; CHECK-LABEL: define <4 x i16> @mul_rshrn(
+; CHECK-SAME: <4 x i32> noundef [[A:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP0]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = add <4 x i32> [[A]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[_MSPROP]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF0]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[VRSHRN_N1:%.*]] = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> [[B]], i32 13)
+; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x i16> [[VRSHRN_N1]]
+;
+entry:
+ %b = add <4 x i32> %a, <i32 3, i32 3, i32 3, i32 3>
+ %vrshrn_n1 = tail call <4 x i16> @llvm.aarch64.neon.rshrn.v4i16(<4 x i32> %b, i32 13)
+ ret <4 x i16> %vrshrn_n1
+}
+
+declare <2 x i64> @llvm.aarch64.neon.addp.v2i64(<2 x i64>, <2 x i64>)
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 1048575}
+;.
More information about the llvm-commits
mailing list