[clang] [Clang][AArch64] Add mfloat8_t variants of Neon load intrinsics (PR #145666)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 27 07:54:01 PDT 2025
https://github.com/amilendra updated https://github.com/llvm/llvm-project/pull/145666
>From 157ae6bb475451a0d3ad48266e21a4bc55722fa8 Mon Sep 17 00:00:00 2001
From: Amilendra Kodithuwakku <amilendra.kodithuwakku at arm.com>
Date: Wed, 25 Jun 2025 09:49:46 +0100
Subject: [PATCH 1/2] [Clang][AArch64] Add mfloat8_t variants of Neon load
intrinsics
Add mfloat8_t support for the following Neon load intrinsics.
- VLD1
- VLD1_X2
- VLD1_X3
- VLD1_X4
- VLD1_LANE
- VLD1_DUP
- VLD2
- VLD3
- VLD4
- VLD2_DUP
- VLD3_DUP
- VLD4_DUP
- VLD2_LANE
- VLD3_LANE
- VLD4_LANE
---
clang/include/clang/Basic/arm_neon.td | 30 +-
.../neon-fp8-intrinsics/acle_neon_loads.c | 825 ++++++++++++++++++
2 files changed, 840 insertions(+), 15 deletions(-)
create mode 100644 clang/test/CodeGen/AArch64/neon-fp8-intrinsics/acle_neon_loads.c
diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td
index 7251cc2d1759a..a21773bd315cd 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -453,18 +453,18 @@ def VSLI_N : WInst<"vsli_n", "...I",
////////////////////////////////////////////////////////////////////////////////
// E.3.14 Loads and stores of a single vector
def VLD1 : WInst<"vld1", ".(c*!)",
- "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPsmQm">;
def VLD1_X2 : WInst<"vld1_x2", "2(c*!)",
- "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
+ "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPsmQm">;
def VLD1_X3 : WInst<"vld1_x3", "3(c*!)",
- "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
+ "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPsmQm">;
def VLD1_X4 : WInst<"vld1_x4", "4(c*!)",
- "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPs">;
+ "cfilsUcUiUlUsQcQfQiQlQsQUcQUiQUlQUsPcPsQPcQPsmQm">;
def VLD1_LANE : WInst<"vld1_lane", ".(c*!).I",
- "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs",
+ "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPsmQm",
[ImmCheck<2, ImmCheckLaneIndex, 1>]>;
def VLD1_DUP : WInst<"vld1_dup", ".(c*!)",
- "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
+ "QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPsmQm">;
def VST1 : WInst<"vst1", "v*(.!)",
"QUcQUsQUiQUlQcQsQiQlQfQPcQPsUcUsUiUlcsilfPcPs">;
def VST1_X2 : WInst<"vst1_x2", "v*(2!)",
@@ -495,20 +495,20 @@ def VST1_LANE_F16 : WInst<"vst1_lane", "v*(.!)I", "hQh",
////////////////////////////////////////////////////////////////////////////////
// E.3.15 Loads and stores of an N-element structure
-def VLD2 : WInst<"vld2", "2(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
-def VLD3 : WInst<"vld3", "3(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
-def VLD4 : WInst<"vld4", "4(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
+def VLD2 : WInst<"vld2", "2(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPsmQm">;
+def VLD3 : WInst<"vld3", "3(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPsmQm">;
+def VLD4 : WInst<"vld4", "4(c*!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPsmQm">;
def VLD2_DUP : WInst<"vld2_dup", "2(c*!)",
- "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
+ "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUsmQm">;
def VLD3_DUP : WInst<"vld3_dup", "3(c*!)",
- "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
+ "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUsmQm">;
def VLD4_DUP : WInst<"vld4_dup", "4(c*!)",
- "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUs">;
-def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+ "UcUsUiUlcsilfPcPsQcQfQiQlQsQPcQPsQUcQUiQUlQUsmQm">;
+def VLD2_LANE : WInst<"vld2_lane", "2(c*!)2I", "QUsQUiQsQiQfQPsUcUsUicsifPcPsmQm",
[ImmCheck<4, ImmCheckLaneIndex, 1>]>;
-def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+def VLD3_LANE : WInst<"vld3_lane", "3(c*!)3I", "QUsQUiQsQiQfQPsUcUsUicsifPcPsmQm",
[ImmCheck<5, ImmCheckLaneIndex, 1>]>;
-def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPs",
+def VLD4_LANE : WInst<"vld4_lane", "4(c*!)4I", "QUsQUiQsQiQfQPsUcUsUicsifPcPsmQm",
[ImmCheck<6, ImmCheckLaneIndex, 1>]>;
def VST2 : WInst<"vst2", "v*(2!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
def VST3 : WInst<"vst3", "v*(3!)", "QUcQUsQUiQcQsQiQfQPcQPsUcUsUiUlcsilfPcPs">;
diff --git a/clang/test/CodeGen/AArch64/neon-fp8-intrinsics/acle_neon_loads.c b/clang/test/CodeGen/AArch64/neon-fp8-intrinsics/acle_neon_loads.c
new file mode 100644
index 0000000000000..7947494a9dbb8
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/neon-fp8-intrinsics/acle_neon_loads.c
@@ -0,0 +1,825 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -check-prefix CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -Werror -Wall -S -O3 -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z13test_vld1_mf8PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
+// CHECK-CXX-NEXT: ret <8 x i8> [[TMP0]]
+//
+mfloat8x8_t test_vld1_mf8(mfloat8_t const *a) {
+ return vld1_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld1_mf8_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x2_t @_Z16test_vld1_mf8_x2PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD1XN]], 0
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD1XN]], 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x8x2_t test_vld1_mf8_x2(mfloat8_t const *a) {
+ return vld1_mf8_x2(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld1_mf8_x3(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x3_t @_Z16test_vld1_mf8_x3PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 0
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 1
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x8x3_t test_vld1_mf8_x3(mfloat8_t const *a) {
+ return vld1_mf8_x3(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld1_mf8_x4(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 2
+// CHECK-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x4_t @_Z16test_vld1_mf8_x4PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 0
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 1
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 2
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 3
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x8x4_t test_vld1_mf8_x4(mfloat8_t const *a) {
+ return vld1_mf8_x4(a);
+}
+
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
+// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z17test_vld1_dup_mf8Pu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-CXX-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-CXX-NEXT: ret <8 x i8> [[LANE]]
+//
+mfloat8x8_t test_vld1_dup_mf8(mfloat8_t *a) {
+ return vld1_dup_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VLD1_LANE]]
+//
+// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z18test_vld1_lane_mf8Pu6__mfp813__Mfloat8x8_t(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-CXX-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
+// CHECK-CXX-NEXT: ret <8 x i8> [[VLD1_LANE]]
+//
+mfloat8x8_t test_vld1_lane_mf8(mfloat8_t *a, mfloat8x8_t b) {
+ return vld1_lane_mf8(a, b, 7);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z14test_vld1q_mf8PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 1
+// CHECK-CXX-NEXT: ret <16 x i8> [[TMP0]]
+//
+mfloat8x16_t test_vld1q_mf8(mfloat8_t const *a) {
+ return vld1q_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld1q_mf8_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x2_t @_Z17test_vld1q_mf8_x2PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD1XN]], 0
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD1XN]], 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x16x2_t test_vld1q_mf8_x2(mfloat8_t const *a) {
+ return vld1q_mf8_x2(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld1q_mf8_x3(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr [[PTR]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x3_t @_Z17test_vld1q_mf8_x3PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr [[PTR]])
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 0
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 1
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x16x3_t test_vld1q_mf8_x3(mfloat8_t const *ptr) {
+ return vld1q_mf8_x3(ptr);
+}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld1q_mf8_x4(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr [[PTR]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 2
+// CHECK-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x4_t @_Z17test_vld1q_mf8_x4PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr [[PTR]])
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 0
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 1
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 2
+// CHECK-CXX-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 3
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x16x4_t test_vld1q_mf8_x4(mfloat8_t const *ptr) {
+ return vld1q_mf8_x4(ptr);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
+// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z18test_vld1q_dup_mf8Pu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-CXX-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-CXX-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK-CXX-NEXT: ret <16 x i8> [[LANE]]
+//
+mfloat8x16_t test_vld1q_dup_mf8(mfloat8_t *a) {
+ return vld1q_dup_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VLD1_LANE]]
+//
+// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z19test_vld1q_lane_mf8Pu6__mfp814__Mfloat8x16_t(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-CXX-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
+// CHECK-CXX-NEXT: ret <16 x i8> [[VLD1_LANE]]
+//
+mfloat8x16_t test_vld1q_lane_mf8(mfloat8_t *a, mfloat8x16_t b) {
+ return vld1q_lane_mf8(a, b, 15);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld2_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x2_t @_Z13test_vld2_mf8PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
+// CHECK-CXX-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x8x2_t test_vld2_mf8(mfloat8_t const *a) {
+ return vld2_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld2_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x2_t @_Z17test_vld2_dup_mf8Pu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
+// CHECK-CXX-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x8x2_t test_vld2_dup_mf8(mfloat8_t *a) {
+ return vld2_dup_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld2_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x2_t @_Z18test_vld2_lane_mf8Pu6__mfp813mfloat8x8x2_t(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-CXX-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 0
+// CHECK-CXX-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x8x2_t test_vld2_lane_mf8(mfloat8_t *a, mfloat8x8x2_t b) {
+ return vld2_lane_mf8(a, b, 7);
+}
+
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld2q_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x2_t @_Z14test_vld2q_mf8PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
+// CHECK-CXX-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x16x2_t test_vld2q_mf8(mfloat8_t const *a) {
+ return vld2q_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld2q_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x2_t @_Z18test_vld2q_dup_mf8Pu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
+// CHECK-CXX-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x16x2_t test_vld2q_dup_mf8(mfloat8_t *a) {
+ return vld2q_dup_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld2q_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x2_t @_Z19test_vld2q_lane_mf8Pu6__mfp814mfloat8x16x2_t(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-CXX-NEXT: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 0
+// CHECK-CXX-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x16x2_t test_vld2q_lane_mf8(mfloat8_t *a, mfloat8x16x2_t b) {
+ return vld2q_lane_mf8(a, b, 15);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld3_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x3_t @_Z13test_vld3_mf8PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
+// CHECK-CXX-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
+// CHECK-CXX-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x8x3_t test_vld3_mf8(mfloat8_t const *a) {
+ return vld3_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld3_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x3_t @_Z17test_vld3_dup_mf8Pu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
+// CHECK-CXX-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
+// CHECK-CXX-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x8x3_t test_vld3_dup_mf8(mfloat8_t *a) {
+ return vld3_dup_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld3_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x3_t @_Z18test_vld3_lane_mf8Pu6__mfp813mfloat8x8x3_t(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-CXX-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 0
+// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 1
+// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x8x3_t test_vld3_lane_mf8(mfloat8_t *a, mfloat8x8x3_t b) {
+ return vld3_lane_mf8(a, b, 7);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld3q_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x3_t @_Z14test_vld3q_mf8PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
+// CHECK-CXX-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
+// CHECK-CXX-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x16x3_t test_vld3q_mf8(mfloat8_t const *a) {
+ return vld3q_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld3q_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x3_t @_Z18test_vld3q_dup_mf8Pu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
+// CHECK-CXX-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
+// CHECK-CXX-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x16x3_t test_vld3q_dup_mf8(mfloat8_t *a) {
+ return vld3q_dup_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld3q_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x3_t @_Z19test_vld3q_lane_mf8Pu6__mfp814mfloat8x16x3_t(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-CXX-NEXT: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 0
+// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 1
+// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x16x3_t test_vld3q_lane_mf8(mfloat8_t *a, mfloat8x16x3_t b) {
+ return vld3q_lane_mf8(a, b, 15);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld4_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x4_t @_Z13test_vld4_mf8PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
+// CHECK-CXX-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
+// CHECK-CXX-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
+// CHECK-CXX-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x8x4_t test_vld4_mf8(mfloat8_t const *a) {
+ return vld4_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld4_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x4_t @_Z17test_vld4_dup_mf8Pu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
+// CHECK-CXX-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
+// CHECK-CXX-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
+// CHECK-CXX-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x8x4_t test_vld4_dup_mf8(mfloat8_t *a) {
+ return vld4_dup_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld4_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x4_t @_Z18test_vld4_lane_mf8Pu6__mfp813mfloat8x8x4_t(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-CXX-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 0
+// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 1
+// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 2
+// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 3
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x8x4_t test_vld4_lane_mf8(mfloat8_t *a, mfloat8x8x4_t b) {
+ return vld4_lane_mf8(a, b, 7);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld4q_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x4_t @_Z14test_vld4q_mf8PKu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
+// CHECK-CXX-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
+// CHECK-CXX-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
+// CHECK-CXX-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x16x4_t test_vld4q_mf8(mfloat8_t const *a) {
+ return vld4q_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld4q_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x4_t @_Z18test_vld4q_dup_mf8Pu6__mfp8(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
+// CHECK-CXX-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
+// CHECK-CXX-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
+// CHECK-CXX-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x16x4_t test_vld4q_dup_mf8(mfloat8_t *a) {
+ return vld4q_dup_mf8(a);
+}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld4q_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x4_t @_Z19test_vld4q_lane_mf8Pu6__mfp814mfloat8x16x4_t(
+// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT: [[ENTRY:.*:]]
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-CXX-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-CXX-NEXT: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 0
+// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 1
+// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 2
+// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 3
+// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x16x4_t test_vld4q_lane_mf8(mfloat8_t *a, mfloat8x16x4_t b) {
+ return vld4q_lane_mf8(a, b, 15);
+}
>From 932d2c7874040060b9abcd9fb1f799092ab33c89 Mon Sep 17 00:00:00 2001
From: Amilendra Kodithuwakku <amilendra.kodithuwakku at arm.com>
Date: Fri, 27 Jun 2025 15:46:53 +0100
Subject: [PATCH 2/2] [Clang][AArch64] Add mfloat8_t variants of Neon load
intrinsics
Move tests to existing files
- neon-intrinsics.c
- neon-ldst-one.c
---
.../neon-fp8-intrinsics/acle_neon_loads.c | 825 ------------------
clang/test/CodeGen/AArch64/neon-intrinsics.c | 213 +++++
clang/test/CodeGen/AArch64/neon-ldst-one.c | 257 ++++++
3 files changed, 470 insertions(+), 825 deletions(-)
delete mode 100644 clang/test/CodeGen/AArch64/neon-fp8-intrinsics/acle_neon_loads.c
diff --git a/clang/test/CodeGen/AArch64/neon-fp8-intrinsics/acle_neon_loads.c b/clang/test/CodeGen/AArch64/neon-fp8-intrinsics/acle_neon_loads.c
deleted file mode 100644
index 7947494a9dbb8..0000000000000
--- a/clang/test/CodeGen/AArch64/neon-fp8-intrinsics/acle_neon_loads.c
+++ /dev/null
@@ -1,825 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s
-// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,sroa | FileCheck %s -check-prefix CHECK-CXX
-
-// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -Werror -Wall -S -O3 -o /dev/null %s
-
-// REQUIRES: aarch64-registered-target
-
-#include <arm_neon.h>
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
-// CHECK-NEXT: ret <8 x i8> [[TMP0]]
-//
-// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z13test_vld1_mf8PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0:[0-9]+]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
-// CHECK-CXX-NEXT: ret <8 x i8> [[TMP0]]
-//
-mfloat8x8_t test_vld1_mf8(mfloat8_t const *a) {
- return vld1_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld1_mf8_x2(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD1XN]], 0
-// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD1XN]], 1
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x2_t @_Z16test_vld1_mf8_x2PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD1XN]], 0
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD1XN]], 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-mfloat8x8x2_t test_vld1_mf8_x2(mfloat8_t const *a) {
- return vld1_mf8_x2(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld1_mf8_x3(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 0
-// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 1
-// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 2
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x3_t @_Z16test_vld1_mf8_x3PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 0
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 1
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-mfloat8x8x3_t test_vld1_mf8_x3(mfloat8_t const *a) {
- return vld1_mf8_x3(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld1_mf8_x4(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 0
-// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 1
-// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 2
-// CHECK-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 3
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x4_t @_Z16test_vld1_mf8_x4PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 0
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 1
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 2
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 3
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-mfloat8x8x4_t test_vld1_mf8_x4(mfloat8_t const *a) {
- return vld1_mf8_x4(a);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_dup_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
-// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
-// CHECK-NEXT: ret <8 x i8> [[LANE]]
-//
-// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z17test_vld1_dup_mf8Pu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK-CXX-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
-// CHECK-CXX-NEXT: ret <8 x i8> [[LANE]]
-//
-mfloat8x8_t test_vld1_dup_mf8(mfloat8_t *a) {
- return vld1_dup_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_lane_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
-// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
-// CHECK-NEXT: ret <8 x i8> [[VLD1_LANE]]
-//
-// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z18test_vld1_lane_mf8Pu6__mfp813__Mfloat8x8_t(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
-// CHECK-CXX-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
-// CHECK-CXX-NEXT: ret <8 x i8> [[VLD1_LANE]]
-//
-mfloat8x8_t test_vld1_lane_mf8(mfloat8_t *a, mfloat8x8_t b) {
- return vld1_lane_mf8(a, b, 7);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 1
-// CHECK-NEXT: ret <16 x i8> [[TMP0]]
-//
-// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z14test_vld1q_mf8PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 1
-// CHECK-CXX-NEXT: ret <16 x i8> [[TMP0]]
-//
-mfloat8x16_t test_vld1q_mf8(mfloat8_t const *a) {
- return vld1q_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld1q_mf8_x2(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD1XN]], 0
-// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD1XN]], 1
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x2_t @_Z17test_vld1q_mf8_x2PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD1XN]], 0
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD1XN]], 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-mfloat8x16x2_t test_vld1q_mf8_x2(mfloat8_t const *a) {
- return vld1q_mf8_x2(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld1q_mf8_x3(
-// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr [[PTR]])
-// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 0
-// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 1
-// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 2
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x3_t @_Z17test_vld1q_mf8_x3PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr [[PTR]])
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 0
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 1
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-mfloat8x16x3_t test_vld1q_mf8_x3(mfloat8_t const *ptr) {
- return vld1q_mf8_x3(ptr);
-}
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld1q_mf8_x4(
-// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr [[PTR]])
-// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 0
-// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 1
-// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 2
-// CHECK-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 3
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x4_t @_Z17test_vld1q_mf8_x4PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr [[PTR]])
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 0
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 1
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 2
-// CHECK-CXX-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 3
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-mfloat8x16x4_t test_vld1q_mf8_x4(mfloat8_t const *ptr) {
- return vld1q_mf8_x4(ptr);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_dup_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
-// CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
-// CHECK-NEXT: ret <16 x i8> [[LANE]]
-//
-// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z18test_vld1q_dup_mf8Pu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
-// CHECK-CXX-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
-// CHECK-CXX-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
-// CHECK-CXX-NEXT: ret <16 x i8> [[LANE]]
-//
-mfloat8x16_t test_vld1q_dup_mf8(mfloat8_t *a) {
- return vld1q_dup_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_lane_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
-// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
-// CHECK-NEXT: ret <16 x i8> [[VLD1_LANE]]
-//
-// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z19test_vld1q_lane_mf8Pu6__mfp814__Mfloat8x16_t(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
-// CHECK-CXX-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
-// CHECK-CXX-NEXT: ret <16 x i8> [[VLD1_LANE]]
-//
-mfloat8x16_t test_vld1q_lane_mf8(mfloat8_t *a, mfloat8x16_t b) {
- return vld1q_lane_mf8(a, b, 15);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld2_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
-// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x2_t @_Z13test_vld2_mf8PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
-// CHECK-CXX-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-mfloat8x8x2_t test_vld2_mf8(mfloat8_t const *a) {
- return vld2_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld2_dup_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
-// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x2_t @_Z17test_vld2_dup_mf8Pu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
-// CHECK-CXX-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-mfloat8x8x2_t test_vld2_dup_mf8(mfloat8_t *a) {
- return vld2_dup_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld2_lane_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
-// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
-// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
-// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 0
-// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 1
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x2_t @_Z18test_vld2_lane_mf8Pu6__mfp813mfloat8x8x2_t(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
-// CHECK-CXX-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 0
-// CHECK-CXX-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-mfloat8x8x2_t test_vld2_lane_mf8(mfloat8_t *a, mfloat8x8x2_t b) {
- return vld2_lane_mf8(a, b, 7);
-}
-
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld2q_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
-// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x2_t @_Z14test_vld2q_mf8PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
-// CHECK-CXX-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-mfloat8x16x2_t test_vld2q_mf8(mfloat8_t const *a) {
- return vld2q_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld2q_dup_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
-// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x2_t @_Z18test_vld2q_dup_mf8Pu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
-// CHECK-CXX-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-mfloat8x16x2_t test_vld2q_dup_mf8(mfloat8_t *a) {
- return vld2q_dup_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld2q_lane_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
-// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
-// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[A]])
-// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 0
-// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 1
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x2_t @_Z19test_vld2q_lane_mf8Pu6__mfp814mfloat8x16x2_t(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
-// CHECK-CXX-NEXT: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 0
-// CHECK-CXX-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-mfloat8x16x2_t test_vld2q_lane_mf8(mfloat8_t *a, mfloat8x16x2_t b) {
- return vld2q_lane_mf8(a, b, 15);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld3_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
-// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
-// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x3_t @_Z13test_vld3_mf8PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
-// CHECK-CXX-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
-// CHECK-CXX-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-mfloat8x8x3_t test_vld3_mf8(mfloat8_t const *a) {
- return vld3_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld3_dup_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
-// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
-// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x3_t @_Z17test_vld3_dup_mf8Pu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
-// CHECK-CXX-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
-// CHECK-CXX-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-mfloat8x8x3_t test_vld3_dup_mf8(mfloat8_t *a) {
- return vld3_dup_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld3_lane_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
-// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
-// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
-// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
-// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 0
-// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 1
-// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 2
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x3_t @_Z18test_vld3_lane_mf8Pu6__mfp813mfloat8x8x3_t(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
-// CHECK-CXX-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 0
-// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 1
-// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-mfloat8x8x3_t test_vld3_lane_mf8(mfloat8_t *a, mfloat8x8x3_t b) {
- return vld3_lane_mf8(a, b, 7);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld3q_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
-// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
-// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x3_t @_Z14test_vld3q_mf8PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
-// CHECK-CXX-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
-// CHECK-CXX-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-mfloat8x16x3_t test_vld3q_mf8(mfloat8_t const *a) {
- return vld3q_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld3q_dup_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
-// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
-// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x3_t @_Z18test_vld3q_dup_mf8Pu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
-// CHECK-CXX-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
-// CHECK-CXX-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-mfloat8x16x3_t test_vld3q_dup_mf8(mfloat8_t *a) {
- return vld3q_dup_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld3q_lane_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
-// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
-// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
-// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[A]])
-// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 0
-// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 1
-// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 2
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x3_t @_Z19test_vld3q_lane_mf8Pu6__mfp814mfloat8x16x3_t(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
-// CHECK-CXX-NEXT: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 0
-// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 1
-// CHECK-CXX-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
-//
-mfloat8x16x3_t test_vld3q_lane_mf8(mfloat8_t *a, mfloat8x16x3_t b) {
- return vld3q_lane_mf8(a, b, 15);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld4_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
-// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
-// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
-// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x4_t @_Z13test_vld4_mf8PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
-// CHECK-CXX-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
-// CHECK-CXX-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
-// CHECK-CXX-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-mfloat8x8x4_t test_vld4_mf8(mfloat8_t const *a) {
- return vld4_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld4_dup_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
-// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
-// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
-// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x4_t @_Z17test_vld4_dup_mf8Pu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
-// CHECK-CXX-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
-// CHECK-CXX-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
-// CHECK-CXX-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-mfloat8x8x4_t test_vld4_dup_mf8(mfloat8_t *a) {
- return vld4_dup_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld4_lane_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
-// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
-// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
-// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
-// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
-// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 0
-// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 1
-// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 2
-// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 3
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x8x4_t @_Z18test_vld4_lane_mf8Pu6__mfp813mfloat8x8x4_t(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
-// CHECK-CXX-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 0
-// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 1
-// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 2
-// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 3
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-mfloat8x8x4_t test_vld4_lane_mf8(mfloat8_t *a, mfloat8x8x4_t b) {
- return vld4_lane_mf8(a, b, 7);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld4q_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
-// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
-// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
-// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x4_t @_Z14test_vld4q_mf8PKu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
-// CHECK-CXX-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
-// CHECK-CXX-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
-// CHECK-CXX-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-mfloat8x16x4_t test_vld4q_mf8(mfloat8_t const *a) {
- return vld4q_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld4q_dup_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr [[A]])
-// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
-// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
-// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
-// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x4_t @_Z18test_vld4q_dup_mf8Pu6__mfp8(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
-// CHECK-CXX-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
-// CHECK-CXX-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
-// CHECK-CXX-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-mfloat8x16x4_t test_vld4q_dup_mf8(mfloat8_t *a) {
- return vld4q_dup_mf8(a);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld4q_lane_mf8(
-// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
-// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
-// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
-// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
-// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
-// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 0
-// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 1
-// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 2
-// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 3
-// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
-// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
-// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-// CHECK-CXX-LABEL: define dso_local %struct.mfloat8x16x4_t @_Z19test_vld4q_lane_mf8Pu6__mfp814mfloat8x16x4_t(
-// CHECK-CXX-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
-// CHECK-CXX-NEXT: [[ENTRY:.*:]]
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
-// CHECK-CXX-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
-// CHECK-CXX-NEXT: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
-// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 0
-// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 1
-// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 2
-// CHECK-CXX-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 3
-// CHECK-CXX-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
-// CHECK-CXX-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
-// CHECK-CXX-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
-// CHECK-CXX-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
-// CHECK-CXX-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
-//
-mfloat8x16x4_t test_vld4q_lane_mf8(mfloat8_t *a, mfloat8x16x4_t b) {
- return vld4q_lane_mf8(a, b, 15);
-}
diff --git a/clang/test/CodeGen/AArch64/neon-intrinsics.c b/clang/test/CodeGen/AArch64/neon-intrinsics.c
index 791f0a1a29409..7d5c77f544d46 100644
--- a/clang/test/CodeGen/AArch64/neon-intrinsics.c
+++ b/clang/test/CodeGen/AArch64/neon-intrinsics.c
@@ -13112,6 +13112,16 @@ float64x2_t test_vld1q_f64(float64_t const *a) {
return vld1q_f64(a);
}
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <16 x i8> [[TMP0]]
+//
+mfloat8x16_t test_vld1q_mf8(mfloat8_t const *a) {
+ return vld1q_mf8(a);
+}
+
// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_p8(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -13242,6 +13252,16 @@ float64x1_t test_vld1_f64(float64_t const *a) {
return vld1_f64(a);
}
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 1
+// CHECK-NEXT: ret <8 x i8> [[TMP0]]
+//
+mfloat8x8_t test_vld1_mf8(mfloat8_t const *a) {
+ return vld1_mf8(a);
+}
+
// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_p8(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -13546,6 +13566,20 @@ float64x2x2_t test_vld2q_f64(float64_t const *a) {
return vld2q_f64(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld2q_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x16x2_t test_vld2q_mf8(mfloat8_t const *a) {
+ return vld2q_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly8x16x2_t @test_vld2q_p8(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -13728,6 +13762,20 @@ float64x1x2_t test_vld2_f64(float64_t const *a) {
return vld2_f64(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld2_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x8x2_t test_vld2_mf8(mfloat8_t const *a) {
+ return vld2_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly8x8x2_t @test_vld2_p8(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -13932,6 +13980,22 @@ float64x2x3_t test_vld3q_f64(float64_t const *a) {
return vld3q_f64(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld3q_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x16x3_t test_vld3q_mf8(mfloat8_t const *a) {
+ return vld3q_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly8x16x3_t @test_vld3q_p8(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -14140,6 +14204,22 @@ float64x1x3_t test_vld3_f64(float64_t const *a) {
return vld3_f64(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld3_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x8x3_t test_vld3_mf8(mfloat8_t const *a) {
+ return vld3_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly8x8x3_t @test_vld3_p8(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -14370,6 +14450,24 @@ float64x2x4_t test_vld4q_f64(float64_t const *a) {
return vld4q_f64(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld4q_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x16x4_t test_vld4q_mf8(mfloat8_t const *a) {
+ return vld4q_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly8x16x4_t @test_vld4q_p8(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -14604,6 +14702,24 @@ float64x1x4_t test_vld4_f64(float64_t const *a) {
return vld4_f64(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld4_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x8x4_t test_vld4_mf8(mfloat8_t const *a) {
+ return vld4_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly8x8x4_t @test_vld4_p8(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16398,6 +16514,20 @@ float64x2x2_t test_vld1q_f64_x2(float64_t const *a) {
return vld1q_f64_x2(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld1q_mf8_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x16x2_t test_vld1q_mf8_x2(mfloat8_t const *a) {
+ return vld1q_mf8_x2(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x2x2_t @test_vld1q_p64_x2(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16426,6 +16556,20 @@ float64x1x2_t test_vld1_f64_x2(float64_t const *a) {
return vld1_f64_x2(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld1_mf8_x2(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x8x2_t test_vld1_mf8_x2(mfloat8_t const *a) {
+ return vld1_mf8_x2(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x1x2_t @test_vld1_p64_x2(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16456,6 +16600,22 @@ float64x2x3_t test_vld1q_f64_x3(float64_t const *a) {
return vld1q_f64_x3(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld1q_mf8_x3(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr [[PTR]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x16x3_t test_vld1q_mf8_x3(mfloat8_t const *ptr) {
+ return vld1q_mf8_x3(ptr);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x2x3_t @test_vld1q_p64_x3(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16488,6 +16648,23 @@ float64x1x3_t test_vld1_f64_x3(float64_t const *a) {
return vld1_f64_x3(a);
}
+
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld1_mf8_x3(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x8x3_t test_vld1_mf8_x3(mfloat8_t const *a) {
+ return vld1_mf8_x3(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x1x3_t @test_vld1_p64_x3(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16522,6 +16699,24 @@ float64x2x4_t test_vld1q_f64_x4(float64_t const *a) {
return vld1q_f64_x4(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld1q_mf8_x4(
+// CHECK-SAME: ptr noundef [[PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr [[PTR]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 2
+// CHECK-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD1XN]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x16x4_t test_vld1q_mf8_x4(mfloat8_t const *ptr) {
+ return vld1q_mf8_x4(ptr);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x2x4_t @test_vld1q_p64_x4(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -16558,6 +16753,24 @@ float64x1x4_t test_vld1_f64_x4(float64_t const *a) {
return vld1_f64_x4(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld1_mf8_x4(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD1XN:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD1XN_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 0
+// CHECK-NEXT: [[VLD1XN_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 1
+// CHECK-NEXT: [[VLD1XN_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 2
+// CHECK-NEXT: [[VLD1XN_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD1XN]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD1XN_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD1XN_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD1XN_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD1XN_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x8x4_t test_vld1_mf8_x4(mfloat8_t const *a) {
+ return vld1_mf8_x4(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x1x4_t @test_vld1_p64_x4(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon-ldst-one.c b/clang/test/CodeGen/AArch64/neon-ldst-one.c
index 2cff007826ba6..67c9b179aa773 100644
--- a/clang/test/CodeGen/AArch64/neon-ldst-one.c
+++ b/clang/test/CodeGen/AArch64/neon-ldst-one.c
@@ -139,6 +139,18 @@ float64x2_t test_vld1q_dup_f64(float64_t *a) {
return vld1q_dup_f64(a);
}
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
+// CHECK-NEXT: ret <16 x i8> [[LANE]]
+//
+mfloat8x16_t test_vld1q_dup_mf8(mfloat8_t *a) {
+ return vld1q_dup_mf8(a);
+}
+
// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_dup_p8(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -307,6 +319,18 @@ float64x1_t test_vld1_dup_f64(float64_t *a) {
return vld1_dup_f64(a);
}
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i8> poison, i8 [[TMP0]], i32 0
+// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
+// CHECK-NEXT: ret <8 x i8> [[LANE]]
+//
+mfloat8x8_t test_vld1_dup_mf8(mfloat8_t *a) {
+ return vld1_dup_mf8(a);
+}
+
// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_dup_p8(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -385,6 +409,20 @@ float64x2x2_t test_vld2q_dup_f64(float64_t *a) {
return vld2q_dup_f64(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld2q_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x16x2_t test_vld2q_dup_mf8(mfloat8_t *a) {
+ return vld2q_dup_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x2x2_t @test_vld2q_dup_p64(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -413,6 +451,20 @@ float64x1x2_t test_vld2_dup_f64(float64_t *a) {
return vld2_dup_f64(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld2_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD2:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD2_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 0
+// CHECK-NEXT: [[VLD2_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x8x2_t test_vld2_dup_mf8(mfloat8_t *a) {
+ return vld2_dup_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x1x2_t @test_vld2_dup_p64(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -478,6 +530,22 @@ float64x2x3_t test_vld3q_dup_f64(float64_t *a) {
// [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld3q_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x16x3_t test_vld3q_dup_mf8(mfloat8_t *a) {
+ return vld3q_dup_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x2x3_t @test_vld3q_dup_p64(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -512,6 +580,22 @@ float64x1x3_t test_vld3_dup_f64(float64_t *a) {
// [{{x[0-9]+|sp}}]
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld3_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD3:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD3_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 0
+// CHECK-NEXT: [[VLD3_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 1
+// CHECK-NEXT: [[VLD3_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x8x3_t test_vld3_dup_mf8(mfloat8_t *a) {
+ return vld3_dup_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x1x3_t @test_vld3_dup_p64(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -583,6 +667,24 @@ float64x2x4_t test_vld4q_dup_f64(float64_t *a) {
return vld4q_dup_f64(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld4q_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x16x4_t test_vld4q_dup_mf8(mfloat8_t *a) {
+ return vld4q_dup_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x2x4_t @test_vld4q_dup_p64(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -619,6 +721,24 @@ float64x1x4_t test_vld4_dup_f64(float64_t *a) {
return vld4_dup_f64(a);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld4_dup_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[VLD4:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0(ptr [[A]])
+// CHECK-NEXT: [[VLD4_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 0
+// CHECK-NEXT: [[VLD4_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 1
+// CHECK-NEXT: [[VLD4_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 2
+// CHECK-NEXT: [[VLD4_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x8x4_t test_vld4_dup_mf8(mfloat8_t *a) {
+ return vld4_dup_mf8(a);
+}
+
// CHECK-LABEL: define dso_local %struct.poly64x1x4_t @test_vld4_dup_p64(
// CHECK-SAME: ptr noundef [[A:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -779,6 +899,17 @@ float64x2_t test_vld1q_lane_f64(float64_t *a, float64x2_t b) {
return vld1q_lane_f64(a, b, 1);
}
+// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <16 x i8> [[B]], i8 [[TMP0]], i32 15
+// CHECK-NEXT: ret <16 x i8> [[VLD1_LANE]]
+//
+mfloat8x16_t test_vld1q_lane_mf8(mfloat8_t *a, mfloat8x16_t b) {
+ return vld1q_lane_mf8(a, b, 15);
+}
+
// CHECK-LABEL: define dso_local <16 x i8> @test_vld1q_lane_p8(
// CHECK-SAME: ptr noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -959,6 +1090,17 @@ float64x1_t test_vld1_lane_f64(float64_t *a, float64x1_t b) {
return vld1_lane_f64(a, b, 0);
}
+// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[A]], align 1
+// CHECK-NEXT: [[VLD1_LANE:%.*]] = insertelement <8 x i8> [[B]], i8 [[TMP0]], i32 7
+// CHECK-NEXT: ret <8 x i8> [[VLD1_LANE]]
+//
+mfloat8x8_t test_vld1_lane_mf8(mfloat8_t *a, mfloat8x8_t b) {
+ return vld1_lane_mf8(a, b, 7);
+}
+
// CHECK-LABEL: define dso_local <8 x i8> @test_vld1_lane_p8(
// CHECK-SAME: ptr noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -1268,6 +1410,22 @@ float64x2x2_t test_vld2q_lane_f64(float64_t *a, float64x2x2_t b) {
return vld2q_lane_f64(a, b, 1);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vld2q_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x16x2_t test_vld2q_lane_mf8(mfloat8_t *a, mfloat8x16x2_t b) {
+ return vld2q_lane_mf8(a, b, 15);
+}
+
// CHECK-LABEL: define dso_local %struct.poly16x8x2_t @test_vld2q_lane_p16(
// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i16>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -1528,6 +1686,22 @@ float64x1x2_t test_vld2_lane_f64(float64_t *a, float64x1x2_t b) {
return vld2_lane_f64(a, b, 0);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vld2_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[VLD2_LANE:%.*]] = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD2_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 0
+// CHECK-NEXT: [[VLD2_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8> } [[VLD2_LANE]], 1
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VLD2_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD2_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
+//
+mfloat8x8x2_t test_vld2_lane_mf8(mfloat8_t *a, mfloat8x8x2_t b) {
+ return vld2_lane_mf8(a, b, 7);
+}
+
// CHECK-LABEL: define dso_local %struct.poly8x8x2_t @test_vld2_lane_p8(
// CHECK-SAME: ptr noundef [[A:%.*]], [2 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -1818,6 +1992,26 @@ float64x2x3_t test_vld3q_lane_f64(float64_t *a, float64x2x3_t b) {
return vld3q_lane_f64(a, b, 1);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x3_t @test_vld3q_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T:%.*]] poison, <16 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x16x3_t test_vld3q_lane_mf8(mfloat8_t *a, mfloat8x16x3_t b) {
+ return vld3q_lane_mf8(a, b, 15);
+}
+
+
// CHECK-LABEL: define dso_local %struct.poly8x16x3_t @test_vld3q_lane_p8(
// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -2162,6 +2356,25 @@ float64x1x3_t test_vld3_lane_f64(float64_t *a, float64x1x3_t b) {
return vld3_lane_f64(a, b, 0);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x3_t @test_vld3_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [3 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[VLD3_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD3_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 0
+// CHECK-NEXT: [[VLD3_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 1
+// CHECK-NEXT: [[VLD3_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } [[VLD3_LANE]], 2
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T:%.*]] poison, <8 x i8> [[VLD3_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD3_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD3_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X3_T]] [[DOTFCA_0_2_INSERT]]
+//
+mfloat8x8x3_t test_vld3_lane_mf8(mfloat8_t *a, mfloat8x8x3_t b) {
+ return vld3_lane_mf8(a, b, 7);
+}
+
// CHECK-LABEL: define dso_local %struct.poly8x8x3_t @test_vld3_lane_p8(
// CHECK-SAME: ptr noundef [[A:%.*]], [3 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -2557,6 +2770,28 @@ float64x2x4_t test_vld4q_lane_f64(float64_t *a, float64x2x4_t b) {
return vld4q_lane_f64(a, b, 1);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x16x4_t @test_vld4q_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <16 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> [[B_COERCE_FCA_0_EXTRACT]], <16 x i8> [[B_COERCE_FCA_1_EXTRACT]], <16 x i8> [[B_COERCE_FCA_2_EXTRACT]], <16 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 15, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T:%.*]] poison, <16 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_1_INSERT]], <16 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_2_INSERT]], <16 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x16x4_t test_vld4q_lane_mf8(mfloat8_t *a, mfloat8x16x4_t b) {
+ return vld4q_lane_mf8(a, b, 15);
+}
+
// CHECK-LABEL: define dso_local %struct.poly8x16x4_t @test_vld4q_lane_p8(
// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <16 x i8>] alignstack(16) [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
@@ -2969,6 +3204,28 @@ float64x1x4_t test_vld4_lane_f64(float64_t *a, float64x1x4_t b) {
return vld4_lane_f64(a, b, 0);
}
+// CHECK-LABEL: define dso_local %struct.mfloat8x8x4_t @test_vld4_lane_mf8(
+// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[B_COERCE_FCA_0_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 0
+// CHECK-NEXT: [[B_COERCE_FCA_1_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 1
+// CHECK-NEXT: [[B_COERCE_FCA_2_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 2
+// CHECK-NEXT: [[B_COERCE_FCA_3_EXTRACT:%.*]] = extractvalue [4 x <8 x i8>] [[B_COERCE]], 3
+// CHECK-NEXT: [[VLD4_LANE:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0(<8 x i8> [[B_COERCE_FCA_0_EXTRACT]], <8 x i8> [[B_COERCE_FCA_1_EXTRACT]], <8 x i8> [[B_COERCE_FCA_2_EXTRACT]], <8 x i8> [[B_COERCE_FCA_3_EXTRACT]], i64 7, ptr [[A]])
+// CHECK-NEXT: [[VLD4_LANE_FCA_0_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 0
+// CHECK-NEXT: [[VLD4_LANE_FCA_1_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 1
+// CHECK-NEXT: [[VLD4_LANE_FCA_2_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 2
+// CHECK-NEXT: [[VLD4_LANE_FCA_3_EXTRACT:%.*]] = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } [[VLD4_LANE]], 3
+// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T:%.*]] poison, <8 x i8> [[VLD4_LANE_FCA_0_EXTRACT]], 0, 0
+// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[VLD4_LANE_FCA_1_EXTRACT]], 0, 1
+// CHECK-NEXT: [[DOTFCA_0_2_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_1_INSERT]], <8 x i8> [[VLD4_LANE_FCA_2_EXTRACT]], 0, 2
+// CHECK-NEXT: [[DOTFCA_0_3_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_2_INSERT]], <8 x i8> [[VLD4_LANE_FCA_3_EXTRACT]], 0, 3
+// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X4_T]] [[DOTFCA_0_3_INSERT]]
+//
+mfloat8x8x4_t test_vld4_lane_mf8(mfloat8_t *a, mfloat8x8x4_t b) {
+ return vld4_lane_mf8(a, b, 7);
+}
+
// CHECK-LABEL: define dso_local %struct.poly8x8x4_t @test_vld4_lane_p8(
// CHECK-SAME: ptr noundef [[A:%.*]], [4 x <8 x i8>] alignstack(8) [[B_COERCE:%.*]]) #[[ATTR0]] {
// CHECK-NEXT: [[ENTRY:.*:]]
More information about the cfe-commits
mailing list