[clang] [llvm] [AArch64] Fix argument passing for SVE tuples (PR #118961)
Momchil Velikov via cfe-commits
cfe-commits at lists.llvm.org
Fri Dec 6 04:23:43 PST 2024
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/118961
The fix for passing Pure Scalable Types (https://github.com/llvm/llvm-project/pull/112747) was incomplete,
it didn't handle correctly tuples of SVE vectors (e.g. `sveboolx2_t`, `svfloat32x4_t`, etc).
These types are Pure Scalable Types and should be passed either entirely in vector registers
or indirectly in memory, not split.
>From 7e2d60348850619fb7b0c8a88e92ab103f907d34 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Fri, 6 Dec 2024 11:08:21 +0000
Subject: [PATCH 1/2] Handle scalable store size in MemCpyOptimizer
The compiler crashes with an ICE when it tries to create a `memset` with
scalable size.
---
.../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 3 +-
.../CodeGen/AArch64/memset-scalable-size.ll | 56 +++++++++++++++++++
2 files changed, 58 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/memset-scalable-size.ll
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0cba5d077da62b..fc5f6ff2b7f377 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -800,8 +800,9 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// in subsequent passes.
auto *T = V->getType();
if (T->isAggregateType()) {
- uint64_t Size = DL.getTypeStoreSize(T);
IRBuilder<> Builder(SI);
+ Value *Size =
+ Builder.CreateTypeSize(Builder.getInt64Ty(), DL.getTypeStoreSize(T));
auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
SI->getAlign());
M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
diff --git a/llvm/test/CodeGen/AArch64/memset-scalable-size.ll b/llvm/test/CodeGen/AArch64/memset-scalable-size.ll
new file mode 100644
index 00000000000000..8ea6330f235a69
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/memset-scalable-size.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S --passes=memcpyopt < %s | FileCheck %s
+target triple = "aarch64-unknown-linux"
+
+define void @f0() {
+; CHECK-LABEL: define void @f0() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 2 [[P]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT: call void @g(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %p = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>}, align 2
+ store { <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr %p, align 2
+ call void @g(ptr %p)
+ ret void
+}
+
+define void @f1() {
+; CHECK-LABEL: define void @f1() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 48
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT: call void @g(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %p = alloca {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> }, align 16
+ store {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } zeroinitializer, ptr %p, align 16
+ call void @g(ptr %p)
+ ret void
+}
+
+define void @f2() {
+; CHECK-LABEL: define void @f2() {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[P:%.*]] = alloca { <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> }, align 16
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 192
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 16 [[P]], i8 0, i64 [[TMP1]], i1 false)
+; CHECK-NEXT: call void @g(ptr [[P]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %p = alloca {<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> }, align 16
+ store {<vscale x 8 x double>, <vscale x 8 x double>, <vscale x 8 x double> } zeroinitializer, ptr %p, align 16
+ call void @g(ptr %p)
+ ret void
+}
+
+declare void @g(ptr)
>From 83331bbf9d083ec8cba96acc32114ed1518e91f7 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Fri, 6 Dec 2024 10:47:43 +0000
Subject: [PATCH 2/2] Fix SVE tuples
---
clang/lib/CodeGen/Targets/AArch64.cpp | 68 +++++++----
.../test/CodeGen/AArch64/pure-scalable-args.c | 19 ++++
.../CodeGenCXX/aarch64-mangle-sve-vectors.cpp | 106 ++++++++----------
3 files changed, 111 insertions(+), 82 deletions(-)
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp
index be33e26f047841..ad7f405cc72550 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -52,6 +52,7 @@ class AArch64ABIInfo : public ABIInfo {
bool isIllegalVectorType(QualType Ty) const;
+ bool passAsAggregateType(QualType Ty) const;
bool passAsPureScalableType(QualType Ty, unsigned &NV, unsigned &NP,
SmallVectorImpl<llvm::Type *> &CoerceToSeq) const;
@@ -337,6 +338,10 @@ ABIArgInfo AArch64ABIInfo::coerceAndExpandPureScalableAggregate(
NSRN += NVec;
NPRN += NPred;
+ // Handle SVE vector tuples.
+ if (Ty->isSVESizelessBuiltinType())
+ return ABIArgInfo::getDirect();
+
llvm::Type *UnpaddedCoerceToType =
UnpaddedCoerceToSeq.size() == 1
? UnpaddedCoerceToSeq[0]
@@ -362,7 +367,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
if (isIllegalVectorType(Ty))
return coerceIllegalVector(Ty, NSRN, NPRN);
- if (!isAggregateTypeForABI(Ty)) {
+ if (!passAsAggregateType(Ty)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = Ty->getAs<EnumType>())
Ty = EnumTy->getDecl()->getIntegerType();
@@ -417,7 +422,7 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn,
// elsewhere for GNU compatibility.
uint64_t Size = getContext().getTypeSize(Ty);
bool IsEmpty = isEmptyRecord(getContext(), Ty, true);
- if (IsEmpty || Size == 0) {
+ if (!Ty->isSVESizelessBuiltinType() && (IsEmpty || Size == 0)) {
if (!getContext().getLangOpts().CPlusPlus || isDarwinPCS())
return ABIArgInfo::getIgnore();
@@ -504,7 +509,7 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
return getNaturalAlignIndirect(RetTy);
- if (!isAggregateTypeForABI(RetTy)) {
+ if (!passAsAggregateType(RetTy)) {
// Treat an enum type as its underlying type.
if (const EnumType *EnumTy = RetTy->getAs<EnumType>())
RetTy = EnumTy->getDecl()->getIntegerType();
@@ -519,7 +524,8 @@ ABIArgInfo AArch64ABIInfo::classifyReturnType(QualType RetTy,
}
uint64_t Size = getContext().getTypeSize(RetTy);
- if (isEmptyRecord(getContext(), RetTy, true) || Size == 0)
+ if (!RetTy->isSVESizelessBuiltinType() &&
+ (isEmptyRecord(getContext(), RetTy, true) || Size == 0))
return ABIArgInfo::getIgnore();
const Type *Base = nullptr;
@@ -654,6 +660,15 @@ bool AArch64ABIInfo::isZeroLengthBitfieldPermittedInHomogeneousAggregate()
return true;
}
+bool AArch64ABIInfo::passAsAggregateType(QualType Ty) const {
+ if (Kind == AArch64ABIKind::AAPCS && Ty->isSVESizelessBuiltinType()) {
+ const auto *BT = Ty->getAs<BuiltinType>();
+ return !BT->isSVECount() &&
+ getContext().getBuiltinVectorTypeInfo(BT).NumVectors > 1;
+ }
+ return isAggregateTypeForABI(Ty);
+}
+
// Check if a type needs to be passed in registers as a Pure Scalable Type (as
// defined by AAPCS64). Return the number of data vectors and the number of
// predicate vectors in the type, into `NVec` and `NPred`, respectively. Upon
@@ -719,37 +734,38 @@ bool AArch64ABIInfo::passAsPureScalableType(
return true;
}
- const auto *VT = Ty->getAs<VectorType>();
- if (!VT)
- return false;
+ if (const auto *VT = Ty->getAs<VectorType>()) {
+ if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
+ ++NPred;
+ if (CoerceToSeq.size() + 1 > 12)
+ return false;
+ CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
+ return true;
+ }
- if (VT->getVectorKind() == VectorKind::SveFixedLengthPredicate) {
- ++NPred;
- if (CoerceToSeq.size() + 1 > 12)
- return false;
- CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
- return true;
- }
+ if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
+ ++NVec;
+ if (CoerceToSeq.size() + 1 > 12)
+ return false;
+ CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
+ return true;
+ }
- if (VT->getVectorKind() == VectorKind::SveFixedLengthData) {
- ++NVec;
- if (CoerceToSeq.size() + 1 > 12)
- return false;
- CoerceToSeq.push_back(convertFixedToScalableVectorType(VT));
- return true;
+ return false;
}
- if (!VT->isBuiltinType())
+ if (!Ty->isBuiltinType())
return false;
- switch (cast<BuiltinType>(VT)->getKind()) {
+ bool isPredicate;
+ switch (Ty->getAs<BuiltinType>()->getKind()) {
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
case BuiltinType::Id: \
- ++NVec; \
+ isPredicate = false; \
break;
#define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId) \
case BuiltinType::Id: \
- ++NPred; \
+ isPredicate = true; \
break;
#define SVE_TYPE(Name, Id, SingletonId)
#include "clang/Basic/AArch64SVEACLETypes.def"
@@ -761,6 +777,10 @@ bool AArch64ABIInfo::passAsPureScalableType(
getContext().getBuiltinVectorTypeInfo(cast<BuiltinType>(Ty));
assert(Info.NumVectors > 0 && Info.NumVectors <= 4 &&
"Expected 1, 2, 3 or 4 vectors!");
+ if (isPredicate)
+ NPred += Info.NumVectors;
+ else
+ NVec += Info.NumVectors;
auto VTy = llvm::ScalableVectorType::get(CGT.ConvertType(Info.ElementType),
Info.EC.getKnownMinValue());
diff --git a/clang/test/CodeGen/AArch64/pure-scalable-args.c b/clang/test/CodeGen/AArch64/pure-scalable-args.c
index f40c944335e4a4..e1dbf5f48ce0ce 100644
--- a/clang/test/CodeGen/AArch64/pure-scalable-args.c
+++ b/clang/test/CodeGen/AArch64/pure-scalable-args.c
@@ -459,3 +459,22 @@ void test_va_arg(int n, ...) {
// CHECK-DARWIN-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull %ap)
// CHECK-DARWIN-NEXT: ret void
// CHECK-DARWIN-NEXT: }
+
+// Regression test for incorrect passing of SVE vector tuples
+// The whole `y` need to be passed indirectly.
+void test_tuple_reg_count(svfloat32_t x, svfloat32x2_t y) {
+ void test_tuple_reg_count_callee(svfloat32_t, svfloat32_t, svfloat32_t, svfloat32_t,
+ svfloat32_t, svfloat32_t, svfloat32_t, svfloat32x2_t);
+ test_tuple_reg_count_callee(x, x, x, x, x, x, x, y);
+}
+// CHECK-AAPCS: declare void @test_tuple_reg_count_callee(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, ptr noundef)
+// CHECK-DARWIN: declare void @test_tuple_reg_count_callee(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+
+// Regression test for incorrect passing of SVE vector tuples
+// The whole `y` need to be passed indirectly.
+void test_tuple_reg_count_bool(svboolx4_t x, svboolx4_t y) {
+ void test_tuple_reg_count_bool_callee(svboolx4_t, svboolx4_t);
+ test_tuple_reg_count_bool_callee(x, y);
+}
+// CHECK-AAPCS: declare void @test_tuple_reg_count_bool_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, ptr noundef)
+// CHECK-DARWIN: declare void @test_tuple_reg_count_bool_callee(<vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>)
diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp
index 9f481e1f0f0857..152be26948f281 100644
--- a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp
+++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp
@@ -141,13 +141,13 @@ void f(__clang_svmfloat8x4_t, __clang_svmfloat8x4_t);
// CHECK-NEXT: [[COERCE72:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// CHECK-NEXT: [[COERCE73:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// CHECK-NEXT: [[COERCE74:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
-// CHECK-NEXT: [[COERCE75:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
+// CHECK-NEXT: [[BYVAL_TEMP:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
+// CHECK-NEXT: [[COERCE75:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT: [[COERCE76:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
-// CHECK-NEXT: [[COERCE77:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
+// CHECK-NEXT: [[COERCE77:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT: [[COERCE78:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
-// CHECK-NEXT: [[COERCE79:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
+// CHECK-NEXT: [[COERCE79:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT: [[COERCE80:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
-// CHECK-NEXT: [[COERCE81:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// CHECK-NEXT: call void @_Z1fu10__SVInt8_tS_(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer)
// CHECK-NEXT: call void @_Z1fu11__SVInt16_tS_(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer)
// CHECK-NEXT: call void @_Z1fu11__SVInt16_tS_(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer)
@@ -575,46 +575,41 @@ void f(__clang_svmfloat8x4_t, __clang_svmfloat8x4_t);
// CHECK-NEXT: [[COERCE74_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 1
// CHECK-NEXT: [[COERCE74_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 2
// CHECK-NEXT: [[COERCE74_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 3
-// CHECK-NEXT: store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[COERCE75]], align 2
-// CHECK-NEXT: [[COERCE75_TUPLE:%.*]] = load { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, ptr [[COERCE75]], align 2
-// CHECK-NEXT: [[COERCE75_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 0
-// CHECK-NEXT: [[COERCE75_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 1
-// CHECK-NEXT: [[COERCE75_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 2
-// CHECK-NEXT: [[COERCE75_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 3
-// CHECK-NEXT: call void @_Z1f10svboolx4_tS_(<vscale x 16 x i1> [[COERCE74_EXTRACT0]], <vscale x 16 x i1> [[COERCE74_EXTRACT1]], <vscale x 16 x i1> [[COERCE74_EXTRACT2]], <vscale x 16 x i1> [[COERCE74_EXTRACT3]], <vscale x 16 x i1> [[COERCE75_EXTRACT0]], <vscale x 16 x i1> [[COERCE75_EXTRACT1]], <vscale x 16 x i1> [[COERCE75_EXTRACT2]], <vscale x 16 x i1> [[COERCE75_EXTRACT3]])
+// CHECK-NEXT: store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[BYVAL_TEMP]], align 2
+// CHECK-NEXT: call void @_Z1f10svboolx4_tS_(<vscale x 16 x i1> [[COERCE74_EXTRACT0]], <vscale x 16 x i1> [[COERCE74_EXTRACT1]], <vscale x 16 x i1> [[COERCE74_EXTRACT2]], <vscale x 16 x i1> [[COERCE74_EXTRACT3]], ptr noundef [[BYVAL_TEMP]])
+// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE75]], align 16
+// CHECK-NEXT: [[COERCE75_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE75]], align 16
+// CHECK-NEXT: [[COERCE75_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE75_TUPLE]], 0
+// CHECK-NEXT: [[COERCE75_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE75_TUPLE]], 1
// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE76]], align 16
// CHECK-NEXT: [[COERCE76_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE76]], align 16
// CHECK-NEXT: [[COERCE76_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE76_TUPLE]], 0
// CHECK-NEXT: [[COERCE76_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE76_TUPLE]], 1
-// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE77]], align 16
-// CHECK-NEXT: [[COERCE77_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE77]], align 16
-// CHECK-NEXT: [[COERCE77_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 0
-// CHECK-NEXT: [[COERCE77_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 1
-// CHECK-NEXT: call void @_Z1f13svmfloat8x2_tS_(<vscale x 16 x i8> [[COERCE76_EXTRACT0]], <vscale x 16 x i8> [[COERCE76_EXTRACT1]], <vscale x 16 x i8> [[COERCE77_EXTRACT0]], <vscale x 16 x i8> [[COERCE77_EXTRACT1]])
+// CHECK-NEXT: call void @_Z1f13svmfloat8x2_tS_(<vscale x 16 x i8> [[COERCE75_EXTRACT0]], <vscale x 16 x i8> [[COERCE75_EXTRACT1]], <vscale x 16 x i8> [[COERCE76_EXTRACT0]], <vscale x 16 x i8> [[COERCE76_EXTRACT1]])
+// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE77]], align 16
+// CHECK-NEXT: [[COERCE77_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE77]], align 16
+// CHECK-NEXT: [[COERCE77_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 0
+// CHECK-NEXT: [[COERCE77_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 1
+// CHECK-NEXT: [[COERCE77_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 2
// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE78]], align 16
// CHECK-NEXT: [[COERCE78_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE78]], align 16
// CHECK-NEXT: [[COERCE78_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 0
// CHECK-NEXT: [[COERCE78_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 1
// CHECK-NEXT: [[COERCE78_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 2
-// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE79]], align 16
-// CHECK-NEXT: [[COERCE79_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE79]], align 16
-// CHECK-NEXT: [[COERCE79_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 0
-// CHECK-NEXT: [[COERCE79_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 1
-// CHECK-NEXT: [[COERCE79_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 2
-// CHECK-NEXT: call void @_Z1f13svmfloat8x3_tS_(<vscale x 16 x i8> [[COERCE78_EXTRACT0]], <vscale x 16 x i8> [[COERCE78_EXTRACT1]], <vscale x 16 x i8> [[COERCE78_EXTRACT2]], <vscale x 16 x i8> [[COERCE79_EXTRACT0]], <vscale x 16 x i8> [[COERCE79_EXTRACT1]], <vscale x 16 x i8> [[COERCE79_EXTRACT2]])
+// CHECK-NEXT: call void @_Z1f13svmfloat8x3_tS_(<vscale x 16 x i8> [[COERCE77_EXTRACT0]], <vscale x 16 x i8> [[COERCE77_EXTRACT1]], <vscale x 16 x i8> [[COERCE77_EXTRACT2]], <vscale x 16 x i8> [[COERCE78_EXTRACT0]], <vscale x 16 x i8> [[COERCE78_EXTRACT1]], <vscale x 16 x i8> [[COERCE78_EXTRACT2]])
+// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE79]], align 16
+// CHECK-NEXT: [[COERCE79_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE79]], align 16
+// CHECK-NEXT: [[COERCE79_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 0
+// CHECK-NEXT: [[COERCE79_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 1
+// CHECK-NEXT: [[COERCE79_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 2
+// CHECK-NEXT: [[COERCE79_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 3
// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE80]], align 16
// CHECK-NEXT: [[COERCE80_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE80]], align 16
// CHECK-NEXT: [[COERCE80_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 0
// CHECK-NEXT: [[COERCE80_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 1
// CHECK-NEXT: [[COERCE80_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 2
// CHECK-NEXT: [[COERCE80_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 3
-// CHECK-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE81]], align 16
-// CHECK-NEXT: [[COERCE81_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE81]], align 16
-// CHECK-NEXT: [[COERCE81_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 0
-// CHECK-NEXT: [[COERCE81_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 1
-// CHECK-NEXT: [[COERCE81_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 2
-// CHECK-NEXT: [[COERCE81_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 3
-// CHECK-NEXT: call void @_Z1f13svmfloat8x4_tS_(<vscale x 16 x i8> [[COERCE80_EXTRACT0]], <vscale x 16 x i8> [[COERCE80_EXTRACT1]], <vscale x 16 x i8> [[COERCE80_EXTRACT2]], <vscale x 16 x i8> [[COERCE80_EXTRACT3]], <vscale x 16 x i8> [[COERCE81_EXTRACT0]], <vscale x 16 x i8> [[COERCE81_EXTRACT1]], <vscale x 16 x i8> [[COERCE81_EXTRACT2]], <vscale x 16 x i8> [[COERCE81_EXTRACT3]])
+// CHECK-NEXT: call void @_Z1f13svmfloat8x4_tS_(<vscale x 16 x i8> [[COERCE79_EXTRACT0]], <vscale x 16 x i8> [[COERCE79_EXTRACT1]], <vscale x 16 x i8> [[COERCE79_EXTRACT2]], <vscale x 16 x i8> [[COERCE79_EXTRACT3]], <vscale x 16 x i8> [[COERCE80_EXTRACT0]], <vscale x 16 x i8> [[COERCE80_EXTRACT1]], <vscale x 16 x i8> [[COERCE80_EXTRACT2]], <vscale x 16 x i8> [[COERCE80_EXTRACT3]])
// CHECK-NEXT: ret void
//
// COMPAT_17-LABEL: define dso_local void @_Z3foov(
@@ -695,13 +690,13 @@ void f(__clang_svmfloat8x4_t, __clang_svmfloat8x4_t);
// COMPAT_17-NEXT: [[COERCE72:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// COMPAT_17-NEXT: [[COERCE73:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
// COMPAT_17-NEXT: [[COERCE74:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
-// COMPAT_17-NEXT: [[COERCE75:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
+// COMPAT_17-NEXT: [[BYVAL_TEMP:%.*]] = alloca { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, align 2
+// COMPAT_17-NEXT: [[COERCE75:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// COMPAT_17-NEXT: [[COERCE76:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
-// COMPAT_17-NEXT: [[COERCE77:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
+// COMPAT_17-NEXT: [[COERCE77:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// COMPAT_17-NEXT: [[COERCE78:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
-// COMPAT_17-NEXT: [[COERCE79:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
+// COMPAT_17-NEXT: [[COERCE79:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// COMPAT_17-NEXT: [[COERCE80:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
-// COMPAT_17-NEXT: [[COERCE81:%.*]] = alloca { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, align 16
// COMPAT_17-NEXT: call void @_Z1fu10__SVInt8_tu10__SVInt8_t(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i8> zeroinitializer)
// COMPAT_17-NEXT: call void @_Z1fu11__SVInt16_tu11__SVInt16_t(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer)
// COMPAT_17-NEXT: call void @_Z1fu11__SVInt16_tu11__SVInt16_t(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i16> zeroinitializer)
@@ -1129,46 +1124,41 @@ void f(__clang_svmfloat8x4_t, __clang_svmfloat8x4_t);
// COMPAT_17-NEXT: [[COERCE74_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 1
// COMPAT_17-NEXT: [[COERCE74_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 2
// COMPAT_17-NEXT: [[COERCE74_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE74_TUPLE]], 3
-// COMPAT_17-NEXT: store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[COERCE75]], align 2
-// COMPAT_17-NEXT: [[COERCE75_TUPLE:%.*]] = load { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> }, ptr [[COERCE75]], align 2
-// COMPAT_17-NEXT: [[COERCE75_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 0
-// COMPAT_17-NEXT: [[COERCE75_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 1
-// COMPAT_17-NEXT: [[COERCE75_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 2
-// COMPAT_17-NEXT: [[COERCE75_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } [[COERCE75_TUPLE]], 3
-// COMPAT_17-NEXT: call void @_Z1f10svboolx4_t10svboolx4_t(<vscale x 16 x i1> [[COERCE74_EXTRACT0]], <vscale x 16 x i1> [[COERCE74_EXTRACT1]], <vscale x 16 x i1> [[COERCE74_EXTRACT2]], <vscale x 16 x i1> [[COERCE74_EXTRACT3]], <vscale x 16 x i1> [[COERCE75_EXTRACT0]], <vscale x 16 x i1> [[COERCE75_EXTRACT1]], <vscale x 16 x i1> [[COERCE75_EXTRACT2]], <vscale x 16 x i1> [[COERCE75_EXTRACT3]])
+// COMPAT_17-NEXT: store { <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1>, <vscale x 16 x i1> } zeroinitializer, ptr [[BYVAL_TEMP]], align 2
+// COMPAT_17-NEXT: call void @_Z1f10svboolx4_t10svboolx4_t(<vscale x 16 x i1> [[COERCE74_EXTRACT0]], <vscale x 16 x i1> [[COERCE74_EXTRACT1]], <vscale x 16 x i1> [[COERCE74_EXTRACT2]], <vscale x 16 x i1> [[COERCE74_EXTRACT3]], ptr noundef [[BYVAL_TEMP]])
+// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE75]], align 16
+// COMPAT_17-NEXT: [[COERCE75_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE75]], align 16
+// COMPAT_17-NEXT: [[COERCE75_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE75_TUPLE]], 0
+// COMPAT_17-NEXT: [[COERCE75_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE75_TUPLE]], 1
// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE76]], align 16
// COMPAT_17-NEXT: [[COERCE76_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE76]], align 16
// COMPAT_17-NEXT: [[COERCE76_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE76_TUPLE]], 0
// COMPAT_17-NEXT: [[COERCE76_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE76_TUPLE]], 1
-// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE77]], align 16
-// COMPAT_17-NEXT: [[COERCE77_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE77]], align 16
-// COMPAT_17-NEXT: [[COERCE77_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 0
-// COMPAT_17-NEXT: [[COERCE77_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 1
-// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x2_t13svmfloat8x2_t(<vscale x 16 x i8> [[COERCE76_EXTRACT0]], <vscale x 16 x i8> [[COERCE76_EXTRACT1]], <vscale x 16 x i8> [[COERCE77_EXTRACT0]], <vscale x 16 x i8> [[COERCE77_EXTRACT1]])
+// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x2_t13svmfloat8x2_t(<vscale x 16 x i8> [[COERCE75_EXTRACT0]], <vscale x 16 x i8> [[COERCE75_EXTRACT1]], <vscale x 16 x i8> [[COERCE76_EXTRACT0]], <vscale x 16 x i8> [[COERCE76_EXTRACT1]])
+// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE77]], align 16
+// COMPAT_17-NEXT: [[COERCE77_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE77]], align 16
+// COMPAT_17-NEXT: [[COERCE77_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 0
+// COMPAT_17-NEXT: [[COERCE77_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 1
+// COMPAT_17-NEXT: [[COERCE77_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE77_TUPLE]], 2
// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE78]], align 16
// COMPAT_17-NEXT: [[COERCE78_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE78]], align 16
// COMPAT_17-NEXT: [[COERCE78_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 0
// COMPAT_17-NEXT: [[COERCE78_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 1
// COMPAT_17-NEXT: [[COERCE78_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE78_TUPLE]], 2
-// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE79]], align 16
-// COMPAT_17-NEXT: [[COERCE79_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE79]], align 16
-// COMPAT_17-NEXT: [[COERCE79_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 0
-// COMPAT_17-NEXT: [[COERCE79_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 1
-// COMPAT_17-NEXT: [[COERCE79_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 2
-// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x3_t13svmfloat8x3_t(<vscale x 16 x i8> [[COERCE78_EXTRACT0]], <vscale x 16 x i8> [[COERCE78_EXTRACT1]], <vscale x 16 x i8> [[COERCE78_EXTRACT2]], <vscale x 16 x i8> [[COERCE79_EXTRACT0]], <vscale x 16 x i8> [[COERCE79_EXTRACT1]], <vscale x 16 x i8> [[COERCE79_EXTRACT2]])
+// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x3_t13svmfloat8x3_t(<vscale x 16 x i8> [[COERCE77_EXTRACT0]], <vscale x 16 x i8> [[COERCE77_EXTRACT1]], <vscale x 16 x i8> [[COERCE77_EXTRACT2]], <vscale x 16 x i8> [[COERCE78_EXTRACT0]], <vscale x 16 x i8> [[COERCE78_EXTRACT1]], <vscale x 16 x i8> [[COERCE78_EXTRACT2]])
+// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE79]], align 16
+// COMPAT_17-NEXT: [[COERCE79_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE79]], align 16
+// COMPAT_17-NEXT: [[COERCE79_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 0
+// COMPAT_17-NEXT: [[COERCE79_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 1
+// COMPAT_17-NEXT: [[COERCE79_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 2
+// COMPAT_17-NEXT: [[COERCE79_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE79_TUPLE]], 3
// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE80]], align 16
// COMPAT_17-NEXT: [[COERCE80_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE80]], align 16
// COMPAT_17-NEXT: [[COERCE80_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 0
// COMPAT_17-NEXT: [[COERCE80_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 1
// COMPAT_17-NEXT: [[COERCE80_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 2
// COMPAT_17-NEXT: [[COERCE80_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE80_TUPLE]], 3
-// COMPAT_17-NEXT: store { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } zeroinitializer, ptr [[COERCE81]], align 16
-// COMPAT_17-NEXT: [[COERCE81_TUPLE:%.*]] = load { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> }, ptr [[COERCE81]], align 16
-// COMPAT_17-NEXT: [[COERCE81_EXTRACT0:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 0
-// COMPAT_17-NEXT: [[COERCE81_EXTRACT1:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 1
-// COMPAT_17-NEXT: [[COERCE81_EXTRACT2:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 2
-// COMPAT_17-NEXT: [[COERCE81_EXTRACT3:%.*]] = extractvalue { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } [[COERCE81_TUPLE]], 3
-// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x4_t13svmfloat8x4_t(<vscale x 16 x i8> [[COERCE80_EXTRACT0]], <vscale x 16 x i8> [[COERCE80_EXTRACT1]], <vscale x 16 x i8> [[COERCE80_EXTRACT2]], <vscale x 16 x i8> [[COERCE80_EXTRACT3]], <vscale x 16 x i8> [[COERCE81_EXTRACT0]], <vscale x 16 x i8> [[COERCE81_EXTRACT1]], <vscale x 16 x i8> [[COERCE81_EXTRACT2]], <vscale x 16 x i8> [[COERCE81_EXTRACT3]])
+// COMPAT_17-NEXT: call void @_Z1f13svmfloat8x4_t13svmfloat8x4_t(<vscale x 16 x i8> [[COERCE79_EXTRACT0]], <vscale x 16 x i8> [[COERCE79_EXTRACT1]], <vscale x 16 x i8> [[COERCE79_EXTRACT2]], <vscale x 16 x i8> [[COERCE79_EXTRACT3]], <vscale x 16 x i8> [[COERCE80_EXTRACT0]], <vscale x 16 x i8> [[COERCE80_EXTRACT1]], <vscale x 16 x i8> [[COERCE80_EXTRACT2]], <vscale x 16 x i8> [[COERCE80_EXTRACT3]])
// COMPAT_17-NEXT: ret void
//
void foo() {
More information about the cfe-commits
mailing list