[llvm] [msan] Add test for Arm NEON tbl intrinsics (PR #114462)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 15:35:06 PDT 2024
https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/114462
>From 4d47f0b42f5112e111c93395488b0afa7d5027e5 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Thu, 31 Oct 2024 20:33:16 +0000
Subject: [PATCH 1/2] [msan] Add test for Arm NEON tbl intrinsics
Arm NEON tbl intrinsics are currently not instrumented properly by MSan.
This test will help track progress.
---
.../MemorySanitizer/AArch64/neon_tbl.ll | 509 ++++++++++++++++++
1 file changed, 509 insertions(+)
create mode 100644 llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll
new file mode 100644
index 00000000000000..42f79fbf027483
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl.ll
@@ -0,0 +1,509 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 2
+; Test memory sanitizer instrumentation for Arm NEON tbl instructions.
+;
+; RUN: opt < %s -passes=msan -S | FileCheck %s
+;
+; Forked from llvm/test/CodeGen/AArch64/arm64-tbl.ll
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+; -----------------------------------------------------------------------------------------------------------------------------------------------
+
+define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
+; CHECK-LABEL: define <8 x i8> @tbl1_8b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
+; CHECK-LABEL: define <16 x i8> @tbl1_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
+; CHECK-LABEL: define <8 x i8> @tbl2_8b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
+; CHECK-LABEL: define <16 x i8> @tbl2_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
+; CHECK-LABEL: define <8 x i8> @tbl3_8b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
+; CHECK-LABEL: define <16 x i8> @tbl3_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
+; CHECK-LABEL: define <8 x i8> @tbl4_8b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
+; CHECK-LABEL: define <16 x i8> @tbl4_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
+ ret <16 x i8> %out
+}
+
+
+
+define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[T1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i8> [[T1]], <8 x i8> [[T2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[S]]
+;
+ %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
+; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
+; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
+; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
+; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
+; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
+; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
+; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
+; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
+; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
+; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
+; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
+; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
+; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
+; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
+ %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
+ %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
+ %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
+ %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
+ %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
+ %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
+ %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
+ %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
+ %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
+ %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
+ %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
+ %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
+ %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
+ %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
+ %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 1, i32 0
+; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 1, i32 1
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 1, i32 2
+; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 1, i32 3
+; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 1, i32 4
+; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 1, i32 5
+; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 1, i32 6
+; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 1, i32 7
+; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
+; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
+; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
+; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
+; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 [[V]], i32 12
+; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 [[V]], i32 13
+; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
+; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0
+ %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1
+ %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2
+ %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3
+ %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4
+ %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5
+ %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6
+ %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7
+ %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
+ %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
+ %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
+ %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
+ %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12
+ %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13
+ %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
+ %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
+ ret <16 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
+; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
+; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
+; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
+; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
+; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
+; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
+; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
+; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
+; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
+; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
+; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
+; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
+; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
+; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
+ %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
+ %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
+ %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
+ %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
+ %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
+ %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
+ %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
+ %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
+ %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
+ %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
+ %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
+ %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
+ %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
+ %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
+ %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
+; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
+; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
+; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
+; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
+; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
+; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
+; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
+; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
+; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
+; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
+; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
+; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
+; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 [[V]], i32 14
+; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
+ %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
+ %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
+ %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
+ %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
+ %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
+ %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
+ %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
+ %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
+ %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
+ %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
+ %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
+ %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
+ %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
+ %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14
+ %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
+ ret <16 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
+; CHECK-LABEL: define <8 x i8> @tbx1_8b
+; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
+; CHECK-LABEL: define <16 x i8> @tbx1_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
+; CHECK-LABEL: define <8 x i8> @tbx2_8b
+; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
+; CHECK-LABEL: define <16 x i8> @tbx2_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
+; CHECK-LABEL: define <8 x i8> @tbx3_8b
+; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
+; CHECK-LABEL: define <16 x i8> @tbx3_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
+; CHECK-LABEL: define <8 x i8> @tbx4_8b
+; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <8 x i8> [[F]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
+; CHECK-LABEL: define <16 x i8> @tbx4_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <16 x i8> [[F]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
+ ret <16 x i8> %out
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
>From 1cb3f14fa11c217b3b6cddcb3a94d42a73d69daf Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Thu, 31 Oct 2024 22:34:42 +0000
Subject: [PATCH 2/2] Add origins-tracking test
---
.../AArch64/neon_tbl_origins.ll | 534 ++++++++++++++++++
1 file changed, 534 insertions(+)
create mode 100644 llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll
new file mode 100644
index 00000000000000..44c0a278de96f3
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/neon_tbl_origins.ll
@@ -0,0 +1,534 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --tool build/bin/opt --version 2
+; Test memory sanitizer instrumentation for Arm NEON tbl instructions.
+;
+; RUN: build/bin/opt < %s -passes=msan -msan-track-origins=2 -S | FileCheck %s
+;
+; Forked from llvm/test/CodeGen/AArch64/arm64-tbl.ll
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-android9001"
+
+; -----------------------------------------------------------------------------------------------------------------------------------------------
+
+define <8 x i8> @tbl1_8b(<16 x i8> %A, <8 x i8> %B) nounwind {
+; CHECK-LABEL: define <8 x i8> @tbl1_8b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> [[A]], <8 x i8> [[B]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8> %A, <8 x i8> %B)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbl1_16b(<16 x i8> %A, <16 x i8> %B) nounwind {
+; CHECK-LABEL: define <16 x i8> @tbl1_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8> %A, <16 x i8> %B)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbl2_8b(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C) {
+; CHECK-LABEL: define <8 x i8> @tbl2_8b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %A, <16 x i8> %B, <8 x i8> %C)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbl2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) {
+; CHECK-LABEL: define <16 x i8> @tbl2_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbl3_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
+; CHECK-LABEL: define <8 x i8> @tbl3_8b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbl3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
+; CHECK-LABEL: define <16 x i8> @tbl3_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbl4_8b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
+; CHECK-LABEL: define <8 x i8> @tbl4_8b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbl4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
+; CHECK-LABEL: define <16 x i8> @tbl4_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
+ ret <16 x i8> %out
+}
+
+
+
+define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: define <8 x i8> @shuffled_tbl2_to_tbl4_v8i8
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[T1:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[A]], <16 x i8> [[B]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <8 x i8> [[T1]], <8 x i8> [[T2]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <8 x i8> [[S]]
+;
+ %t1 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %a, <16 x i8> %b, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8> %c, <16 x i8> %d, <8 x i8> <i8 0, i8 4, i8 8, i8 12, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <8 x i8> %t1, <8 x i8> %t2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15>
+ ret <8 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
+; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
+; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
+; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
+; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
+; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
+; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
+; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
+; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
+; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
+; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
+; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
+; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
+; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
+; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
+ %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
+ %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
+ %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
+ %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
+ %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
+ %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
+ %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
+ %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
+ %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
+ %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
+ %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
+ %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
+ %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
+ %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
+ %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 1, i32 0
+; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 1, i32 1
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 1, i32 2
+; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 1, i32 3
+; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 1, i32 4
+; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 1, i32 5
+; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 1, i32 6
+; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 1, i32 7
+; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
+; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
+; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
+; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
+; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 [[V]], i32 12
+; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 [[V]], i32 13
+; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
+; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %ins.0 = insertelement <16 x i8> poison, i8 1, i32 0
+ %ins.1 = insertelement <16 x i8> %ins.0, i8 1, i32 1
+ %ins.2 = insertelement <16 x i8> %ins.1, i8 1, i32 2
+ %ins.3 = insertelement <16 x i8> %ins.2, i8 1, i32 3
+ %ins.4 = insertelement <16 x i8> %ins.3, i8 1, i32 4
+ %ins.5 = insertelement <16 x i8> %ins.4, i8 1, i32 5
+ %ins.6 = insertelement <16 x i8> %ins.5, i8 1, i32 6
+ %ins.7 = insertelement <16 x i8> %ins.6, i8 1, i32 7
+ %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
+ %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
+ %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
+ %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
+ %ins.12 = insertelement <16 x i8> %ins.11, i8 %v, i32 12
+ %ins.13 = insertelement <16 x i8> %ins.12, i8 %v, i32 13
+ %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
+ %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 31>
+ ret <16 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
+; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
+; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
+; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
+; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
+; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
+; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
+; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
+; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
+; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
+; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
+; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
+; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
+; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 -1, i32 14
+; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 -1, i32 15
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
+ %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
+ %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
+ %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
+ %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
+ %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
+ %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
+ %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
+ %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
+ %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
+ %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
+ %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
+ %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
+ %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
+ %ins.14 = insertelement <16 x i8> %ins.13, i8 -1, i32 14
+ %ins.15 = insertelement <16 x i8> %ins.14, i8 -1, i32 15
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], i8 [[V:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[INS_0:%.*]] = insertelement <16 x i8> poison, i8 [[V]], i32 0
+; CHECK-NEXT: [[INS_1:%.*]] = insertelement <16 x i8> [[INS_0]], i8 [[V]], i32 1
+; CHECK-NEXT: [[INS_2:%.*]] = insertelement <16 x i8> [[INS_1]], i8 [[V]], i32 2
+; CHECK-NEXT: [[INS_3:%.*]] = insertelement <16 x i8> [[INS_2]], i8 [[V]], i32 3
+; CHECK-NEXT: [[INS_4:%.*]] = insertelement <16 x i8> [[INS_3]], i8 [[V]], i32 4
+; CHECK-NEXT: [[INS_5:%.*]] = insertelement <16 x i8> [[INS_4]], i8 [[V]], i32 5
+; CHECK-NEXT: [[INS_6:%.*]] = insertelement <16 x i8> [[INS_5]], i8 [[V]], i32 6
+; CHECK-NEXT: [[INS_7:%.*]] = insertelement <16 x i8> [[INS_6]], i8 [[V]], i32 7
+; CHECK-NEXT: [[INS_8:%.*]] = insertelement <16 x i8> [[INS_7]], i8 -1, i32 8
+; CHECK-NEXT: [[INS_9:%.*]] = insertelement <16 x i8> [[INS_8]], i8 -1, i32 9
+; CHECK-NEXT: [[INS_10:%.*]] = insertelement <16 x i8> [[INS_9]], i8 -1, i32 10
+; CHECK-NEXT: [[INS_11:%.*]] = insertelement <16 x i8> [[INS_10]], i8 -1, i32 11
+; CHECK-NEXT: [[INS_12:%.*]] = insertelement <16 x i8> [[INS_11]], i8 -1, i32 12
+; CHECK-NEXT: [[INS_13:%.*]] = insertelement <16 x i8> [[INS_12]], i8 -1, i32 13
+; CHECK-NEXT: [[INS_14:%.*]] = insertelement <16 x i8> [[INS_13]], i8 [[V]], i32 14
+; CHECK-NEXT: [[INS_15:%.*]] = insertelement <16 x i8> [[INS_14]], i8 [[V]], i32 15
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[INS_15]])
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %ins.0 = insertelement <16 x i8> poison, i8 %v, i32 0
+ %ins.1 = insertelement <16 x i8> %ins.0, i8 %v, i32 1
+ %ins.2 = insertelement <16 x i8> %ins.1, i8 %v, i32 2
+ %ins.3 = insertelement <16 x i8> %ins.2, i8 %v, i32 3
+ %ins.4 = insertelement <16 x i8> %ins.3, i8 %v, i32 4
+ %ins.5 = insertelement <16 x i8> %ins.4, i8 %v, i32 5
+ %ins.6 = insertelement <16 x i8> %ins.5, i8 %v, i32 6
+ %ins.7 = insertelement <16 x i8> %ins.6, i8 %v, i32 7
+ %ins.8 = insertelement <16 x i8> %ins.7, i8 -1, i32 8
+ %ins.9 = insertelement <16 x i8> %ins.8, i8 -1, i32 9
+ %ins.10 = insertelement <16 x i8> %ins.9, i8 -1, i32 10
+ %ins.11 = insertelement <16 x i8> %ins.10, i8 -1, i32 11
+ %ins.12 = insertelement <16 x i8> %ins.11, i8 -1, i32 12
+ %ins.13 = insertelement <16 x i8> %ins.12, i8 -1, i32 13
+ %ins.14 = insertelement <16 x i8> %ins.13, i8 %v, i32 14
+ %ins.15 = insertelement <16 x i8> %ins.14, i8 %v, i32 15
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %ins.15)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 30, i32 31>
+ ret <16 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+
+
+define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) {
+; CHECK-LABEL: define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[T1:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[T2:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+; CHECK-NEXT: [[S:%.*]] = shufflevector <16 x i8> [[T1]], <16 x i8> [[T2]], <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[S]]
+;
+ %t1 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %t2 = call <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8> %c, <16 x i8> %d, <16 x i8> <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
+ %s = shufflevector <16 x i8> %t1, <16 x i8> %t2, <16 x i32> <i32 0, i32 1, i32 21, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+ ret <16 x i8> %s
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbl1.v8i8(<16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl1.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl2.v8i8(<16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl3.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbl4.v8i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbl4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+
+define <8 x i8> @tbx1_8b(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C) nounwind {
+; CHECK-LABEL: define <8 x i8> @tbx1_8b
+; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <8 x i8> [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <8 x i8> [[C]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8> %A, <16 x i8> %B, <8 x i8> %C)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbx1_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) nounwind {
+; CHECK-LABEL: define <16 x i8> @tbx1_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbx2_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D) {
+; CHECK-LABEL: define <8 x i8> @tbx2_8b
+; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <8 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <8 x i8> [[D]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <8 x i8> %D)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbx2_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) {
+; CHECK-LABEL: define <16 x i8> @tbx2_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbx3_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E) {
+; CHECK-LABEL: define <8 x i8> @tbx3_8b
+; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <8 x i8> [[E:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <8 x i8> [[E]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(< 8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <8 x i8> %E)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbx3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) {
+; CHECK-LABEL: define <16 x i8> @tbx3_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E)
+ ret <16 x i8> %out
+}
+
+define <8 x i8> @tbx4_8b(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F) {
+; CHECK-LABEL: define <8 x i8> @tbx4_8b
+; CHECK-SAME: (<8 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <8 x i8> [[F:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <8 x i8> [[F]])
+; CHECK-NEXT: store <8 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <8 x i8> [[OUT]]
+;
+ %out = call <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <8 x i8> %F)
+ ret <8 x i8> %out
+}
+
+define <16 x i8> @tbx4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F) {
+; CHECK-LABEL: define <16 x i8> @tbx4_16b
+; CHECK-SAME: (<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], <16 x i8> [[C:%.*]], <16 x i8> [[D:%.*]], <16 x i8> [[E:%.*]], <16 x i8> [[F:%.*]]) {
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[OUT:%.*]] = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> [[A]], <16 x i8> [[B]], <16 x i8> [[C]], <16 x i8> [[D]], <16 x i8> [[E]], <16 x i8> [[F]])
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_origin_tls, align 4
+; CHECK-NEXT: ret <16 x i8> [[OUT]]
+;
+ %out = call <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, <16 x i8> %F)
+ ret <16 x i8> %out
+}
+
+declare <8 x i8> @llvm.aarch64.neon.tbx1.v8i8(<8 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx1.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx2.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx2.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx3.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx3.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
+declare <8 x i8> @llvm.aarch64.neon.tbx4.v8i8(<8 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <8 x i8>) nounwind readnone
+declare <16 x i8> @llvm.aarch64.neon.tbx4.v16i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
More information about the llvm-commits
mailing list