[llvm] 5431bf2 - [AArch64]Remove svget/svset/svcreate from llvm

Caroline Concatto via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 23 02:48:59 PDT 2022


Author: Caroline Concatto
Date: 2022-09-23T10:48:43+01:00
New Revision: 5431bf27bd22a02ea6ea75ed564f52699a21c375

URL: https://github.com/llvm/llvm-project/commit/5431bf27bd22a02ea6ea75ed564f52699a21c375
DIFF: https://github.com/llvm/llvm-project/commit/5431bf27bd22a02ea6ea75ed564f52699a21c375.diff

LOG: [AArch64]Remove svget/svset/svcreate from llvm

This patch removes the aarch64 instrinsic svget/svset/svcreate from llvm.
It also implements the InstCombine for vector.extract that used to be in svget.

Depends on: D131547

Differential Revision: https://reviews.llvm.org/D131548

Added: 
    

Modified: 
    llvm/include/llvm/IR/IntrinsicsAArch64.td
    llvm/lib/IR/AutoUpgrade.cpp
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll

Removed: 
    llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll
    llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
    llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll


################################################################################
diff  --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 264cab077d2e6..3110abddcc1f4 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -946,31 +946,6 @@ def llvm_nxv2f64_ty : LLVMType<nxv2f64>;
 
 let TargetPrefix = "aarch64" in {  // All intrinsics start with "llvm.aarch64.".
 
-  class AdvSIMD_SVE_Create_2Vector_Tuple
-    : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
-                [llvm_anyvector_ty, LLVMMatchType<1>],
-                [IntrReadMem]>;
-
-  class AdvSIMD_SVE_Create_3Vector_Tuple
-    : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
-                [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>],
-                [IntrReadMem]>;
-
-  class AdvSIMD_SVE_Create_4Vector_Tuple
-    : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
-                [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>,
-                 LLVMMatchType<1>],
-                [IntrReadMem]>;
-
-  class AdvSIMD_SVE_Set_Vector_Tuple
-    : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
-                [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty],
-                [IntrReadMem, ImmArg<ArgIndex<1>>]>;
-
-  class AdvSIMD_SVE_Get_Vector_Tuple
-    : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty],
-                [IntrReadMem, IntrArgMemOnly, ImmArg<ArgIndex<1>>]>;
-
   class AdvSIMD_1Vec_PredLoad_Intrinsic
     : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
                 [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -1544,21 +1519,6 @@ class SVE_4Vec_BF16_Indexed
                 [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i64_ty],
                 [IntrNoMem, ImmArg<ArgIndex<3>>]>;
 
-//
-// Vector tuple creation intrinsics (ACLE)
-//
-
-def int_aarch64_sve_tuple_create2 : AdvSIMD_SVE_Create_2Vector_Tuple;
-def int_aarch64_sve_tuple_create3 : AdvSIMD_SVE_Create_3Vector_Tuple;
-def int_aarch64_sve_tuple_create4 : AdvSIMD_SVE_Create_4Vector_Tuple;
-
-//
-// Vector tuple insertion/extraction intrinsics (ACLE)
-//
-
-def int_aarch64_sve_tuple_get : AdvSIMD_SVE_Get_Vector_Tuple;
-def int_aarch64_sve_tuple_set : AdvSIMD_SVE_Set_Vector_Tuple;
-
 //
 // Loads
 //

diff  --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index cbe27f603600a..a4a33efb055a4 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -574,6 +574,28 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       NewFn = Intrinsic::getDeclaration(F->getParent(), ID, Ty);
       return true;
     }
+    if (Name.startswith("aarch64.sve.tuple.get")) {
+      Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::vector_extract, Tys);
+      return true;
+    }
+    if (Name.startswith("aarch64.sve.tuple.set")) {
+      auto Args = F->getFunctionType()->params();
+      Type *Tys[] = {Args[0], Args[2], Args[1]};
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::vector_insert, Tys);
+      return true;
+    }
+    static const Regex CreateTupleRegex(
+        "^aarch64\\.sve\\.tuple\\.create[234](.nxv[a-z0-9]+|$)");
+    if (CreateTupleRegex.match(Name)) {
+      auto Args = F->getFunctionType()->params();
+      Type *Tys[] = {F->getReturnType(), Args[1]};
+      NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                        Intrinsic::vector_insert, Tys);
+      return true;
+    }
     if (Name.startswith("arm.neon.vclz")) {
       Type* args[2] = {
         F->arg_begin()->getType(),
@@ -3898,6 +3920,61 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) {
     NewCall = dyn_cast<CallInst>(Ret);
     break;
   }
+
+  case Intrinsic::vector_extract: {
+    StringRef Name = F->getName();
+    Name = Name.substr(5); // Strip llvm
+    if (!Name.startswith("aarch64.sve.tuple.get")) {
+      DefaultCase();
+      return;
+    }
+    ScalableVectorType *RetTy =
+        dyn_cast<ScalableVectorType>(F->getReturnType());
+    unsigned MinElts = RetTy->getMinNumElements();
+    unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+    Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
+    NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
+    break;
+  }
+
+  case Intrinsic::vector_insert: {
+    StringRef Name = F->getName();
+    Name = Name.substr(5);
+    if (!Name.startswith("aarch64.sve.tuple")) {
+      DefaultCase();
+      return;
+    }
+    if (Name.startswith("aarch64.sve.tuple.set")) {
+      unsigned I = dyn_cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+      ScalableVectorType *Ty =
+          dyn_cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
+      Value *NewIdx =
+          ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
+      NewCall = Builder.CreateCall(
+          NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
+      break;
+    }
+    if (Name.startswith("aarch64.sve.tuple.create")) {
+      unsigned N = StringSwitch<unsigned>(Name)
+                       .StartsWith("aarch64.sve.tuple.create2", 2)
+                       .StartsWith("aarch64.sve.tuple.create3", 3)
+                       .StartsWith("aarch64.sve.tuple.create4", 4)
+                       .Default(0);
+      assert(N > 1 && "Create is expected to be between 2-4");
+      ScalableVectorType *RetTy =
+          dyn_cast<ScalableVectorType>(F->getReturnType());
+      Value *Ret = llvm::PoisonValue::get(RetTy);
+      unsigned MinElts = RetTy->getMinNumElements() / N;
+      for (unsigned I = 0; I < N; I++) {
+        Value *Idx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
+        Value *V = CI->getArgOperand(I);
+        Ret = Builder.CreateInsertVector(RetTy, Ret, V, Idx);
+      }
+      NewCall = dyn_cast<CallInst>(Ret);
+    }
+    break;
+  }
+
   case Intrinsic::arm_neon_bfdot:
   case Intrinsic::arm_neon_bfmmla:
   case Intrinsic::arm_neon_bfmlalb:

diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a63d0382dd903..28751afd70bd5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20339,71 +20339,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
                                         /*OnlyPackedOffsets=*/false);
     case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
       return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
-    case Intrinsic::aarch64_sve_tuple_get: {
-      SDLoc DL(N);
-      SDValue Chain = N->getOperand(0);
-      SDValue Src1 = N->getOperand(2);
-      SDValue Idx = N->getOperand(3);
-
-      uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
-      EVT ResVT = N->getValueType(0);
-      uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
-      SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
-      SDValue Val =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
-      return DAG.getMergeValues({Val, Chain}, DL);
-    }
-    case Intrinsic::aarch64_sve_tuple_set: {
-      SDLoc DL(N);
-      SDValue Chain = N->getOperand(0);
-      SDValue Tuple = N->getOperand(2);
-      SDValue Idx = N->getOperand(3);
-      SDValue Vec = N->getOperand(4);
-
-      EVT TupleVT = Tuple.getValueType();
-      uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
-
-      uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
-      uint64_t NumLanes =
-          Vec.getValueType().getVectorElementCount().getKnownMinValue();
-
-      if ((TupleLanes % NumLanes) != 0)
-        report_fatal_error("invalid tuple vector!");
-
-      uint64_t NumVecs = TupleLanes / NumLanes;
-
-      SmallVector<SDValue, 4> Opnds;
-      for (unsigned I = 0; I < NumVecs; ++I) {
-        if (I == IdxConst)
-          Opnds.push_back(Vec);
-        else {
-          SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
-          Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
-                                      Vec.getValueType(), Tuple, ExtIdx));
-        }
-      }
-      SDValue Concat =
-          DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
-      return DAG.getMergeValues({Concat, Chain}, DL);
-    }
-    case Intrinsic::aarch64_sve_tuple_create2:
-    case Intrinsic::aarch64_sve_tuple_create3:
-    case Intrinsic::aarch64_sve_tuple_create4: {
-      SDLoc DL(N);
-      SDValue Chain = N->getOperand(0);
-
-      SmallVector<SDValue, 4> Opnds;
-      for (unsigned I = 2; I < N->getNumOperands(); ++I)
-        Opnds.push_back(N->getOperand(I));
-
-      EVT VT = Opnds[0].getValueType();
-      EVT EltVT = VT.getVectorElementType();
-      EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
-                                    VT.getVectorElementCount() *
-                                        (N->getNumOperands() - 2));
-      SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
-      return DAG.getMergeValues({Concat, Chain}, DL);
-    }
     case Intrinsic::aarch64_rndr:
     case Intrinsic::aarch64_rndrrs: {
       unsigned IntrinsicID =

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ebceac3b0ac94..e3c208653b7a8 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1208,32 +1208,6 @@ static Optional<Instruction *> instCombineSVETBL(InstCombiner &IC,
   return IC.replaceInstUsesWith(II, VectorSplat);
 }
 
-static Optional<Instruction *> instCombineSVETupleGet(InstCombiner &IC,
-                                                      IntrinsicInst &II) {
-  // Try to remove sequences of tuple get/set.
-  Value *SetTuple, *SetIndex, *SetValue;
-  auto *GetTuple = II.getArgOperand(0);
-  auto *GetIndex = II.getArgOperand(1);
-  // Check that we have tuple_get(GetTuple, GetIndex) where GetTuple is a
-  // call to tuple_set i.e. tuple_set(SetTuple, SetIndex, SetValue).
-  // Make sure that the types of the current intrinsic and SetValue match
-  // in order to safely remove the sequence.
-  if (!match(GetTuple,
-             m_Intrinsic<Intrinsic::aarch64_sve_tuple_set>(
-                 m_Value(SetTuple), m_Value(SetIndex), m_Value(SetValue))) ||
-      SetValue->getType() != II.getType())
-    return None;
-  // Case where we get the same index right after setting it.
-  // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex) --> SetValue
-  if (GetIndex == SetIndex)
-    return IC.replaceInstUsesWith(II, SetValue);
-  // If we are getting a 
diff erent index than what was set in the tuple_set
-  // intrinsic. We can just set the input tuple to the one up in the chain.
-  // tuple_get(tuple_set(SetTuple, SetIndex, SetValue), GetIndex)
-  // --> tuple_get(SetTuple, GetIndex)
-  return IC.replaceOperand(II, 0, SetTuple);
-}
-
 static Optional<Instruction *> instCombineSVEZip(InstCombiner &IC,
                                                  IntrinsicInst &II) {
   // zip1(uzp1(A, B), uzp2(A, B)) --> A
@@ -1448,8 +1422,6 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   case Intrinsic::aarch64_sve_sunpkhi:
   case Intrinsic::aarch64_sve_sunpklo:
     return instCombineSVEUnpack(IC, II);
-  case Intrinsic::aarch64_sve_tuple_get:
-    return instCombineSVETupleGet(IC, II);
   case Intrinsic::aarch64_sve_zip1:
   case Intrinsic::aarch64_sve_zip2:
     return instCombineSVEZip(IC, II);

diff  --git a/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll b/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll
index f9715cbd7f1ef..c21a6b7f1539a 100644
--- a/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll
+++ b/llvm/test/Bitcode/upgrade-aarch64-sve-intrinsics.ll
@@ -72,3 +72,99 @@ declare <vscale x 48 x i8> @llvm.aarch64.sve.ld3.nxv48i8.nxv16i1.p0i8(<vscale x
 declare <vscale x 64 x i8> @llvm.aarch64.sve.ld4.nxv64i8.nxv16i1.p0i8(<vscale x 16 x i1>, i8*)
 declare <vscale x 32 x i8> @llvm.aarch64.sve.ld2(<vscale x 16 x i1>, i8 *)
 declare <vscale x 32 x i8> @llvm.aarch64.sve.ld2.nxv32i8(<vscale x 16 x i1>, i8 *)
+
+; aarch64.sve.tuple.create.N
+define <vscale x 32 x i8> @create2_nxv32i8_nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) {
+; CHECK-LABEL: @create2_nxv32i8_nxv16i8
+; CHECK:  %1 = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> %z1, i64 0)
+; CHECK-NEXT:  %tuple = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> %1, <vscale x 16 x i8> %z2, i64 16)
+; CHECK-NEXT:  ret <vscale x 32 x i8> %tuple
+
+  %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
+  ret <vscale x 32 x i8> %tuple
+}
+
+define <vscale x 24 x i16> @create3_nxv24i8_nxv16i8(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) {
+; CHECK-LABEL: @create3_nxv24i8_nxv16i8
+; CHECK:  %1 = call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> poison, <vscale x 8 x i16> %z1, i64 0)
+; CHECK-NEXT:  %2 = call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> %1, <vscale x 8 x i16> %z2, i64 8)
+; CHECK-NEXT:  %tuple = call <vscale x 24 x i16> @llvm.vector.insert.nxv24i16.nxv8i16(<vscale x 24 x i16> %2, <vscale x 8 x i16> %z3, i64 16)
+; CHECK-NEXT:  ret <vscale x 24 x i16> %tuple
+
+  %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
+  ret <vscale x 24 x i16> %tuple
+}
+
+define <vscale x 64 x i8> @create4_nxv64i8_nxv16i8(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4) {
+; CHECK-LABEL: @create4_nxv64i8_nxv16i8
+; CHECK:  %1 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> poison, <vscale x 16 x i8> %z1, i64 0)
+; CHECK-NEXT:  %2 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %1, <vscale x 16 x i8> %z2, i64 16)
+; CHECK-NEXT:  %3 = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %2, <vscale x 16 x i8> %z3, i64 32)
+; CHECK-NEXT:  %tuple = call <vscale x 64 x i8> @llvm.vector.insert.nxv64i8.nxv16i8(<vscale x 64 x i8> %3, <vscale x 16 x i8> %z4, i64 48)
+; CHECK-NEXT:  ret <vscale x 64 x i8> %tuple
+
+  %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4)
+  ret <vscale x 64 x i8> %tuple
+}
+
+; Accept short mangling name
+define <vscale x 32 x i8> @create2_nxv32i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) {
+; CHECK-LABEL: @create2_nxv32i8
+; CHECK:  %1 = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> %z1, i64 0)
+; CHECK-NEXT:  %tuple = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> %1, <vscale x 16 x i8> %z2, i64 16)
+; CHECK-NEXT:  ret <vscale x 32 x i8> %tuple
+
+  %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
+  ret <vscale x 32 x i8> %tuple
+}
+
+define <vscale x 32 x i8> @create2(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) {
+; CHECK-LABEL: @create2
+; CHECK:  %1 = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> poison, <vscale x 16 x i8> %z1, i64 0)
+; CHECK-NEXT:  %tuple = call <vscale x 32 x i8> @llvm.vector.insert.nxv32i8.nxv16i8(<vscale x 32 x i8> %1, <vscale x 16 x i8> %z2, i64 16)
+; CHECK-NEXT:  ret <vscale x 32 x i8> %tuple
+
+  %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
+  ret <vscale x 32 x i8> %tuple
+}
+
+; Negative test for create
+; Should not upgrade when create is not 2,3 or 4
+define <vscale x 16 x i8> @sve_tuple_create1(<vscale x 16 x i8> %z0) {
+; CHECK-LABEL: @sve_tuple_create1
+; CHECK: %tuple = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8(<vscale x 16 x i8> %z0)
+; CHECK-NEXT:  ret <vscale x 16 x i8> %tuple
+
+  %tuple  = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8(<vscale x 16 x i8> %z0);
+  ret <vscale x 16 x i8> %tuple;
+}
+
+; aarch64.sve.tuple.set
+
+define void  @set_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %z0, <vscale x 4 x i32> %z1) {
+; CHECK-LABEL: @set_tuple2_nxv8i32_elt1
+; CHECK:  %ins = call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> %z0, <vscale x 4 x i32> %z1, i64 4)
+; CHECK-NEXT: ret void
+
+  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %z0, i32 1, <vscale x 4 x i32> %z1)
+  ret void
+}
+
+; aarch64.sve.tuple.get
+define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) {
+; CHECK-LABEL: @get_tuple2_nxv8i32_elt1
+; CHECK:  %ext = call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> %tuple, i64 4)
+; CHECK-NEXT:  ret <vscale x 4 x i32> %ext
+
+  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
+  ret <vscale x 4 x i32> %ext
+}
+
+declare  <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare  <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare  <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare  <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.create1.nxv16i8.nxv16i8(<vscale x 16 x i8>)
+declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32)

diff  --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll
deleted file mode 100644
index 6c702b64a4554..0000000000000
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll
+++ /dev/null
@@ -1,499 +0,0 @@
-; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
-
-;
-; svint8x2_t
-;
-
-define <vscale x 32 x i8> @ret_svint8x2_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
-; CHECK-LABEL: ret_svint8x2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
-  ret <vscale x 32 x i8> %tuple
-}
-
-define void @call_svint8x2_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %dummy_z2, <vscale x 16 x i8> %z3) #0 {
-; CHECK-LABEL: call_svint8x2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z3.d
-; CHECK-NEXT: bl callee_svint8x2_t
-  %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z3)
-  call void @callee_svint8x2_t(<vscale x 32 x i8> %tuple)
-  ret void
-}
-
-;
-; svint16x2_t
-;
-
-define <vscale x 16 x i16> @ret_svint16x2_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
-; CHECK-LABEL: ret_svint16x2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2)
-  ret <vscale x 16 x i16> %tuple
-}
-
-define void @call_svint16x2_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %dummy_z2, <vscale x 8 x i16> %z3) #0 {
-; CHECK-LABEL: call_svint16x2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z3.d
-; CHECK-NEXT: bl callee_svint16x2_t
-  %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z3)
-  call void @callee_svint16x2_t(<vscale x 16 x i16> %tuple)
-  ret void
-}
-
-;
-; svint32x2_t
-;
-
-define <vscale x 8 x i32> @ret_svint32x2_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
-; CHECK-LABEL: ret_svint32x2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
-  ret <vscale x 8 x i32> %tuple
-}
-
-define void @call_svint32x2_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %dummy_z2, <vscale x 4 x i32> %z3) #0 {
-; CHECK-LABEL: call_svint32x2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z3.d
-; CHECK-NEXT: bl callee_svint32x2_t
-  %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z3)
-  call void @callee_svint32x2_t(<vscale x 8 x i32> %tuple)
-  ret void
-}
-
-;
-; svint64x2_t
-;
-
-define <vscale x 4 x i64> @ret_svint64x2_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
-; CHECK-LABEL: ret_svint64x2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2)
-  ret <vscale x 4 x i64> %tuple
-}
-
-define void @call_svint64x2_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %dummy_z2, <vscale x 2 x i64> %z3) #0 {
-; CHECK-LABEL: call_svint64x2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z3.d
-; CHECK-NEXT: bl callee_svint64x2_t
-  %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z3)
-  call void @callee_svint64x2_t(<vscale x 4 x i64> %tuple)
-  ret void
-}
-
-;
-; svfloatx2_t
-;
-
-define <vscale x 8 x float> @ret_svfloatx2_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
-; CHECK-LABEL: ret_svfloatx2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2)
-  ret <vscale x 8 x float> %tuple
-}
-
-define void @call_svfloatx2_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %dummy_z2, <vscale x 4 x float> %z3) #0 {
-; CHECK-LABEL: call_svfloatx2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z3.d
-; CHECK-NEXT: bl callee_svfloatx2_t
-  %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z3)
-  call void @callee_svfloatx2_t(<vscale x 8 x float> %tuple)
-  ret void
-}
-
-;
-; svdoublex2_t
-;
-
-define <vscale x 4 x double> @ret_svdoublex2_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
-; CHECK-LABEL: ret_svdoublex2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2)
-  ret <vscale x 4 x double> %tuple
-}
-
-define void @call_svdoublex2_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %dummy_z2, <vscale x 2 x double> %z3) #0 {
-; CHECK-LABEL: call_svdoublex2_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z3.d
-; CHECK-NEXT: bl callee_svdoublex2_t
-  %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z3)
-  call void @callee_svdoublex2_t(<vscale x 4 x double> %tuple)
-  ret void
-}
-
-;
-; svint8x3_t
-;
-
-define <vscale x 48 x i8> @ret_svint8x3_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
-; CHECK-LABEL: ret_svint8x3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3)
-  ret <vscale x 48 x i8> %tuple
-}
-
-define void @call_svint8x3_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %dummy_z3, <vscale x 16 x i8> %z4) #0 {
-; CHECK-LABEL: call_svint8x3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svint8x3_t
-  %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z4)
-  call void @callee_svint8x3_t(<vscale x 48 x i8> %tuple)
-  ret void
-}
-
-;
-; svint16x3_t
-;
-
-define <vscale x 24 x i16> @ret_svint16x3_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
-; CHECK-LABEL: ret_svint16x3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
-  ret <vscale x 24 x i16> %tuple
-}
-
-define void @call_svint16x3_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %dummy_z3, <vscale x 8 x i16> %z4) #0 {
-; CHECK-LABEL: call_svint16x3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svint16x3_t
-  %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z4)
-  call void @callee_svint16x3_t(<vscale x 24 x i16> %tuple)
-  ret void
-}
-
-;
-; svint32x3_t
-;
-
-define <vscale x 12 x i32> @ret_svint32x3_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
-; CHECK-LABEL: ret_svint32x3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
-  ret <vscale x 12 x i32> %tuple
-}
-
-define void @call_svint32x3_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %dummy_z3, <vscale x 4 x i32> %z4) #0 {
-; CHECK-LABEL: call_svint32x3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svint32x3_t
-  %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z4)
-  call void @callee_svint32x3_t(<vscale x 12 x i32> %tuple)
-  ret void
-}
-
-;
-; svint64x3_t
-;
-
-define <vscale x 6 x i64> @ret_svint64x3_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
-; CHECK-LABEL: ret_svint64x3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3)
-  ret <vscale x 6 x i64> %tuple
-}
-
-define void @call_svint64x3_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %dummy_z3, <vscale x 2 x i64> %z4) #0 {
-; CHECK-LABEL: call_svint64x3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svint64x3_t
-  %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z4)
-  call void @callee_svint64x3_t(<vscale x 6 x i64> %tuple)
-  ret void
-}
-
-;
-; svfloatx3_t
-;
-
-define <vscale x 12 x float> @ret_svfloatx3_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
-; CHECK-LABEL: ret_svfloatx3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
-  ret <vscale x 12 x float> %tuple
-}
-
-define void @call_svfloatx3_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %dummy_z3, <vscale x 4 x float> %z4) #0 {
-; CHECK-LABEL: call_svfloatx3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svfloatx3_t
-  %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z4)
-  call void @callee_svfloatx3_t(<vscale x 12 x float> %tuple)
-  ret void
-}
-
-;
-; svdoublex3_t
-;
-
-define <vscale x 6 x double> @ret_svdoublex3_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
-; CHECK-LABEL: ret_svdoublex3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3)
-  ret <vscale x 6 x double> %tuple
-}
-
-define void @call_svdoublex3_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %dummy_z3, <vscale x 2 x double> %z4) #0 {
-; CHECK-LABEL: call_svdoublex3_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svdoublex3_t
-  %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z4)
-  call void @callee_svdoublex3_t(<vscale x 6 x double> %tuple)
-  ret void
-}
-
-;
-; svint8x4_t
-;
-
-define <vscale x 64 x i8> @ret_svint8x4_t(<vscale x 16 x i8> %unused_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4) #0 {
-; CHECK-LABEL: ret_svint8x4_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: mov z3.d, z4.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3, <vscale x 16 x i8> %z4)
-  ret <vscale x 64 x i8> %tuple
-}
-
-define void @call_svint8x4_t(<vscale x 16 x i8> %dummy_z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %dummy_z3, <vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5) #0 {
-; CHECK-LABEL: call_svint8x4_t
-; CHECK:      mov z3.d, z5.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svint8x4_t
-  %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z4, <vscale x 16 x i8> %z5)
-  call void @callee_svint8x4_t(<vscale x 64 x i8> %tuple)
-  ret void
-}
-
-;
-; svint16x4_t
-;
-
-define <vscale x 32 x i16> @ret_svint16x4_t(<vscale x 8 x i16> %unused_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3, <vscale x 8 x i16> %z4) #0 {
-; CHECK-LABEL: ret_svint16x4_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: mov z3.d, z4.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3, <vscale x 8 x i16> %z4)
-  ret <vscale x 32 x i16> %tuple
-}
-
-define void @call_svint16x4_t(<vscale x 8 x i16> %dummy_z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %dummy_z3, <vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5) #0 {
-; CHECK-LABEL: call_svint16x4_t
-; CHECK:      mov z3.d, z5.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svint16x4_t
-  %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z4, <vscale x 8 x i16> %z5)
-  call void @callee_svint16x4_t(<vscale x 32 x i16> %tuple)
-  ret void
-}
-
-;
-; svint32x4_t
-;
-
-define <vscale x 16 x i32> @ret_svint32x4_t(<vscale x 4 x i32> %unused_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4) #0 {
-; CHECK-LABEL: ret_svint32x4_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: mov z3.d, z4.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4)
-  ret <vscale x 16 x i32> %tuple
-}
-
-define void @call_svint32x4_t(<vscale x 4 x i32> %dummy_z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %dummy_z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) #0 {
-; CHECK-LABEL: call_svint32x4_t
-; CHECK:      mov z3.d, z5.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svint32x4_t
-  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5)
-  call void @callee_svint32x4_t(<vscale x 16 x i32> %tuple)
-  ret void
-}
-
-;
-; svint64x4_t
-;
-
-define <vscale x 8 x i64> @ret_svint64x4_t(<vscale x 2 x i64> %unused_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3, <vscale x 2 x i64> %z4) #0 {
-; CHECK-LABEL: ret_svint64x4_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: mov z3.d, z4.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3, <vscale x 2 x i64> %z4)
-  ret <vscale x 8 x i64> %tuple
-}
-
-define void @call_svint64x4_t(<vscale x 2 x i64> %dummy_z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %dummy_z3, <vscale x 2 x i64> %z4, <vscale x 2 x i64> %z5) #0 {
-; CHECK-LABEL: call_svint64x4_t
-; CHECK:      mov z3.d, z5.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svint64x4_t
-  %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z4, <vscale x 2 x i64> %z5)
-  call void @callee_svint64x4_t(<vscale x 8 x i64> %tuple)
-  ret void
-}
-
-;
-; svfloatx4_t
-;
-
-define <vscale x 16 x float> @ret_svfloatx4_t(<vscale x 4 x float> %unused_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3, <vscale x 4 x float> %z4) #0 {
-; CHECK-LABEL: ret_svfloatx4_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: mov z3.d, z4.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3, <vscale x 4 x float> %z4)
-  ret <vscale x 16 x float> %tuple
-}
-
-define void @call_svfloatx4_t(<vscale x 4 x float> %dummy_z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %dummy_z3, <vscale x 4 x float> %z4, <vscale x 4 x float> %z5) #0 {
-; CHECK-LABEL: call_svfloatx4_t
-; CHECK:      mov z3.d, z5.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svfloatx4_t
-  %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z4, <vscale x 4 x float> %z5)
-  call void @callee_svfloatx4_t(<vscale x 16 x float> %tuple)
-  ret void
-}
-
-;
-; svdoublex4_t
-;
-
-define <vscale x 8 x double> @ret_svdoublex4_t(<vscale x 2 x double> %unused_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3, <vscale x 2 x double> %z4) #0 {
-; CHECK-LABEL: ret_svdoublex4_t
-; CHECK:      mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z3.d
-; CHECK-NEXT: mov z3.d, z4.d
-; CHECK-NEXT: ret
-  %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3, <vscale x 2 x double> %z4)
-  ret <vscale x 8 x double> %tuple
-}
-
-define void @call_svdoublex4_t(<vscale x 2 x double> %dummy_z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %dummy_z3, <vscale x 2 x double> %z4, <vscale x 2 x double> %z5) #0 {
-; CHECK-LABEL: call_svdoublex4_t
-; CHECK:      mov z3.d, z5.d
-; CHECK-NEXT: mov z0.d, z1.d
-; CHECK-NEXT: mov z1.d, z2.d
-; CHECK-NEXT: mov z2.d, z4.d
-; CHECK-NEXT: bl callee_svdoublex4_t
-  %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z4, <vscale x 2 x double> %z5)
-  call void @callee_svdoublex4_t(<vscale x 8 x double> %tuple)
-  ret void
-}
-
-attributes #0 = { nounwind "target-features"="+sve" }
-
-declare void @callee_svint8x2_t(<vscale x 32 x i8>)
-declare void @callee_svint16x2_t(<vscale x 16 x i16>)
-declare void @callee_svint32x2_t(<vscale x 8 x i32>)
-declare void @callee_svint64x2_t(<vscale x 4 x i64>)
-declare void @callee_svfloatx2_t(<vscale x 8 x float>)
-declare void @callee_svdoublex2_t(<vscale x 4 x double>)
-
-declare void @callee_svint8x3_t(<vscale x 48 x i8>)
-declare void @callee_svint16x3_t(<vscale x 24 x i16>)
-declare void @callee_svint32x3_t(<vscale x 12 x i32>)
-declare void @callee_svint64x3_t(<vscale x 6 x i64>)
-declare void @callee_svfloatx3_t(<vscale x 12 x float>)
-declare void @callee_svdoublex3_t(<vscale x 6 x double>)
-
-declare void @callee_svint8x4_t(<vscale x 64 x i8>)
-declare void @callee_svint16x4_t(<vscale x 32 x i16>)
-declare void @callee_svint32x4_t(<vscale x 16 x i32>)
-declare void @callee_svint64x4_t(<vscale x 8 x i64>)
-declare void @callee_svfloatx4_t(<vscale x 16 x float>)
-declare void @callee_svdoublex4_t(<vscale x 8 x double>)
-
-
-; x2
-declare <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
-
-; x3
-declare <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-
-; x4
-declare <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
deleted file mode 100644
index d3ca9cb731b31..0000000000000
--- a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
-
-; Test that DAGCombiner doesn't drop the scalable flag when it tries to fold:
-;   extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
-define <vscale x 16 x i8> @extract_nxv16i8_nxv4i64(<vscale x 4 x i64> %z0_z1) {
-; CHECK-LABEL: extract_nxv16i8_nxv4i64:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %z0_z1_bc = bitcast <vscale x 4 x i64> %z0_z1 to <vscale x 32 x i8>
-  %ext = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv32i8(<vscale x 32 x i8> %z0_z1_bc, i32 1)
-  ret <vscale x 16 x i8> %ext
-}
-
-
-define <vscale x 2 x i64> @extract_nxv2i64_nxv32i8(<vscale x 32 x i8> %z0_z1) {
-; CHECK-LABEL: extract_nxv2i64_nxv32i8:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %z0_z1_bc = bitcast <vscale x 32 x i8> %z0_z1 to <vscale x 4 x i64>
-  %ext = call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv4i64(<vscale x 4 x i64> %z0_z1_bc, i32 1)
-  ret <vscale x 2 x i64> %ext
-}
-
-define <vscale x 4 x half> @extract_lo_nxv4f16_nxv8f16(<vscale x 8 x half> %z0) {
-; CHECK-LABEL: extract_lo_nxv4f16_nxv8f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.s, z0.h
-; CHECK-NEXT:    ret
-  %ext = call <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half> %z0, i32 0)
-  ret <vscale x 4 x half> %ext
-}
-
-define <vscale x 4 x half> @extract_hi_nxv4f16_nxv8f16(<vscale x 8 x half> %z0) {
-; CHECK-LABEL: extract_hi_nxv4f16_nxv8f16:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpkhi z0.s, z0.h
-; CHECK-NEXT:    ret
-  %ext = call <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half> %z0, i32 1)
-  ret <vscale x 4 x half> %ext
-}
-
-define <vscale x 2 x float> @extract_lo_nxv2f32_nxv4f32(<vscale x 4 x float> %z0) {
-; CHECK-LABEL: extract_lo_nxv2f32_nxv4f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpklo z0.d, z0.s
-; CHECK-NEXT:    ret
-  %ext = call <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float> %z0, i32 0)
-  ret <vscale x 2 x float> %ext
-}
-
-define <vscale x 2 x float> @extract_hi_nxv2f32_nxv4f32(<vscale x 4 x float> %z0) {
-; CHECK-LABEL: extract_hi_nxv2f32_nxv4f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    uunpkhi z0.d, z0.s
-; CHECK-NEXT:    ret
-  %ext = call <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float> %z0, i32 1)
-  ret <vscale x 2 x float> %ext
-}
-
-define <vscale x 4 x float> @load_extract_nxv4f32_nxv8f32(<vscale x 8 x float>* %p) {
-; CHECK-LABEL: load_extract_nxv4f32_nxv8f32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT:    ret
-  %tmp1 = load <vscale x 8 x float>, <vscale x 8 x float>* %p, align 16
-  %tmp2 = call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv8f32(<vscale x 8 x float> %tmp1, i32 1)
-  ret <vscale x 4 x float> %tmp2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv4i64(<vscale x 4 x i64>, i32)
-declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv32i8(<vscale x 32 x i8>, i32)
-declare <vscale x 2 x float> @llvm.aarch64.sve.tuple.get.nxv4f32(<vscale x 4 x float>, i32)
-declare <vscale x 4 x half> @llvm.aarch64.sve.tuple.get.nxv8f16(<vscale x 8 x half>, i32)
-declare <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv8f32(<vscale x 8 x float>, i32)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
deleted file mode 100644
index 549cbd49cd4c5..0000000000000
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
+++ /dev/null
@@ -1,1002 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple aarch64 -mattr=+sme < %s | FileCheck %s
-
-;
-; SVCREATE2 (i8)
-;
-
-define <vscale x 16 x i8> @test_svcreate2_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_s8_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB0_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB0_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 16 x i8> undef
-L2:
-  %extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %tuple, i32 0)
-  ret <vscale x 16 x i8> %extract
-}
-
-define <vscale x 16 x i8> @test_svcreate2_s8_vec1(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_s8_vec1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB1_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB1_2: // %L2
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 32 x i8> @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 16 x i8> undef
-L2:
-  %extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8> %tuple, i32 1)
-  ret <vscale x 16 x i8> %extract
-}
-
-;
-; SVCREATE2 (i16)
-;
-
-define <vscale x 8 x i16> @test_svcreate2_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_s16_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB2_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB2_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x i16> undef
-L2:
-  %extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %tuple, i32 0)
-  ret <vscale x 8 x i16> %extract
-}
-
-define <vscale x 8 x i16> @test_svcreate2_s16_vec1(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_s16_vec1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB3_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB3_2: // %L2
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x i16> undef
-L2:
-  %extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16> %tuple, i32 1)
-  ret <vscale x 8 x i16> %extract
-}
-
-;
-; SVCREATE2 (half)
-;
-
-define <vscale x 8 x half> @test_svcreate2_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_f16_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB4_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB4_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x half> @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x half> undef
-L2:
-  %extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half> %tuple, i32 0)
-  ret <vscale x 8 x half> %extract
-}
-
-define <vscale x 8 x half> @test_svcreate2_f16_vec1(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_f16_vec1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB5_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB5_2: // %L2
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x half> @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x half> undef
-L2:
-  %extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half> %tuple, i32 1)
-  ret <vscale x 8 x half> %extract
-}
-
-;
-; SVCREATE2 (bfloat)
-;
-
-define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) #1 {
-; CHECK-LABEL: test_svcreate2_bf16_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB6_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB6_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x bfloat> undef
-L2:
-  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %tuple, i32 0)
-  ret <vscale x 8 x bfloat> %extract
-}
-
-define <vscale x 8 x bfloat> @test_svcreate2_bf16_vec1(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1) #1 {
-; CHECK-LABEL: test_svcreate2_bf16_vec1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB7_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB7_2: // %L2
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x bfloat> @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x bfloat> undef
-L2:
-  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat> %tuple, i32 1)
-  ret <vscale x 8 x bfloat> %extract
-}
-
-;
-; SVCREATE2 (i32)
-;
-
-define <vscale x 4 x i32> @test_svcreate2_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_s32_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB8_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB8_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x i32> undef
-L2:
-  %extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %tuple, i32 0)
-  ret <vscale x 4 x i32> %extract
-}
-
-define <vscale x 4 x i32> @test_svcreate2_s32_vec1(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_s32_vec1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB9_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB9_2: // %L2
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x i32> undef
-L2:
-  %extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
-  ret <vscale x 4 x i32> %extract
-}
-
-;
-; SVCREATE2 (float)
-;
-
-define <vscale x 4 x float> @test_svcreate2_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_f32_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB10_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB10_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x float> undef
-L2:
-  %extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(<vscale x 8 x float> %tuple, i32 0)
-  ret <vscale x 4 x float> %extract
-}
-
-define <vscale x 4 x float> @test_svcreate2_f32_vec1(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_f32_vec1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB11_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB11_2: // %L2
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 8 x float> @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x float> undef
-L2:
-  %extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(<vscale x 8 x float> %tuple, i32 1)
-  ret <vscale x 4 x float> %extract
-}
-
-;
-; SVCREATE2 (i64)
-;
-
-define <vscale x 2 x i64> @test_svcreate2_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_s64_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB12_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB12_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x i64> undef
-L2:
-  %extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %tuple, i32 0)
-  ret <vscale x 2 x i64> %extract
-}
-
-define <vscale x 2 x i64> @test_svcreate2_s64_vec1(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_s64_vec1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB13_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB13_2: // %L2
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x i64> undef
-L2:
-  %extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64> %tuple, i32 1)
-  ret <vscale x 2 x i64> %extract
-}
-
-;
-; SVCREATE2 (double)
-;
-
-define <vscale x 2 x double> @test_svcreate2_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_f64_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB14_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB14_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x double> undef
-L2:
-  %extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double> %tuple, i32 0)
-  ret <vscale x 2 x double> %extract
-}
-
-define <vscale x 2 x double> @test_svcreate2_f64_vec1(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1) #0 {
-; CHECK-LABEL: test_svcreate2_f64_vec1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB15_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB15_2: // %L2
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 4 x double> @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x double> undef
-L2:
-  %extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double> %tuple, i32 1)
-  ret <vscale x 2 x double> %extract
-}
-
-;
-; SVCREATE3 (i8)
-;
-
-define <vscale x 16 x i8> @test_svcreate3_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_s8_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB16_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB16_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 16 x i8> undef
-L2:
-  %extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %tuple, i32 0)
-  ret <vscale x 16 x i8> %extract
-}
-
-define <vscale x 16 x i8> @test_svcreate3_s8_vec2(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_s8_vec2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB17_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB17_2: // %L2
-; CHECK-NEXT:    mov z0.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 48 x i8> @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 16 x i8> undef
-L2:
-  %extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8> %tuple, i32 2)
-  ret <vscale x 16 x i8> %extract
-}
-
-;
-; SVCREATE3 (i16)
-;
-
-define <vscale x 8 x i16> @test_svcreate3_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_s16_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB18_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB18_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x i16> undef
-L2:
-  %extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %tuple, i32 0)
-  ret <vscale x 8 x i16> %extract
-}
-
-define <vscale x 8 x i16> @test_svcreate3_s16_vec2(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_s16_vec2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB19_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB19_2: // %L2
-; CHECK-NEXT:    mov z0.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x i16> undef
-L2:
-  %extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16> %tuple, i32 2)
-  ret <vscale x 8 x i16> %extract
-}
-
-;
-; SVCREATE3 (half)
-;
-
-define <vscale x 8 x half> @test_svcreate3_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_f16_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB20_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB20_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 24 x half> @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x half> undef
-L2:
-  %extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half> %tuple, i32 0)
-  ret <vscale x 8 x half> %extract
-}
-
-define <vscale x 8 x half> @test_svcreate3_f16_vec2(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_f16_vec2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB21_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB21_2: // %L2
-; CHECK-NEXT:    mov z0.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 24 x half> @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x half> undef
-L2:
-  %extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half> %tuple, i32 2)
-  ret <vscale x 8 x half> %extract
-}
-
-;
-; SVCREATE3 (bfloat)
-;
-
-define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) #1 {
-; CHECK-LABEL: test_svcreate3_bf16_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB22_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB22_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x bfloat> undef
-L2:
-  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> %tuple, i32 0)
-  ret <vscale x 8 x bfloat> %extract
-}
-
-define <vscale x 8 x bfloat> @test_svcreate3_bf16_vec2(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2) #1 {
-; CHECK-LABEL: test_svcreate3_bf16_vec2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB23_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB23_2: // %L2
-; CHECK-NEXT:    mov z0.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x bfloat> undef
-L2:
-  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat> %tuple, i32 2)
-  ret <vscale x 8 x bfloat> %extract
-}
-
-;
-; SVCREATE3 (i32)
-;
-
-define <vscale x 4 x i32> @test_svcreate3_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_s32_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB24_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB24_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x i32> undef
-L2:
-  %extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %tuple, i32 0)
-  ret <vscale x 4 x i32> %extract
-}
-
-define <vscale x 4 x i32> @test_svcreate3_s32_vec2(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_s32_vec2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB25_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB25_2: // %L2
-; CHECK-NEXT:    mov z0.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x i32> undef
-L2:
-  %extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32> %tuple, i32 2)
-  ret <vscale x 4 x i32> %extract
-}
-
-;
-; SVCREATE3 (float)
-;
-
-define <vscale x 4 x float> @test_svcreate3_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_f32_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB26_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB26_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x float> undef
-L2:
-  %extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float> %tuple, i32 0)
-  ret <vscale x 4 x float> %extract
-}
-
-define <vscale x 4 x float> @test_svcreate3_f32_vec2(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_f32_vec2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB27_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB27_2: // %L2
-; CHECK-NEXT:    mov z0.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x float> undef
-L2:
-  %extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float> %tuple, i32 2)
-  ret <vscale x 4 x float> %extract
-}
-
-;
-; SVCREATE3 (i64)
-;
-
-define <vscale x 2 x i64> @test_svcreate3_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_s64_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB28_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB28_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x i64> undef
-L2:
-  %extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %tuple, i32 0)
-  ret <vscale x 2 x i64> %extract
-}
-
-define <vscale x 2 x i64> @test_svcreate3_s64_vec2(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_s64_vec2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB29_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB29_2: // %L2
-; CHECK-NEXT:    mov z0.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x i64> undef
-L2:
-  %extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64> %tuple, i32 2)
-  ret <vscale x 2 x i64> %extract
-}
-
-;
-; SVCREATE3 (double)
-;
-
-define <vscale x 2 x double> @test_svcreate3_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_f64_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB30_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB30_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x double> undef
-L2:
-  %extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double> %tuple, i32 0)
-  ret <vscale x 2 x double> %extract
-}
-
-define <vscale x 2 x double> @test_svcreate3_f64_vec2(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2) #0 {
-; CHECK-LABEL: test_svcreate3_f64_vec2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB31_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB31_2: // %L2
-; CHECK-NEXT:    mov z0.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 6 x double> @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x double> undef
-L2:
-  %extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double> %tuple, i32 2)
-  ret <vscale x 2 x double> %extract
-}
-
-;
-; SVCREATE4 (i8)
-;
-
-define <vscale x 16 x i8> @test_svcreate4_s8_vec0(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_s8_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB32_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB32_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 16 x i8> undef
-L2:
-  %extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %tuple, i32 0)
-  ret <vscale x 16 x i8> %extract
-}
-
-define <vscale x 16 x i8> @test_svcreate4_s8_vec3(i1 %p, <vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_s8_vec3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB33_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB33_2: // %L2
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8> %z0, <vscale x 16 x i8> %z1, <vscale x 16 x i8> %z2, <vscale x 16 x i8> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 16 x i8> undef
-L2:
-  %extract = tail call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %tuple, i32 3)
-  ret <vscale x 16 x i8> %extract
-}
-
-;
-; SVCREATE4 (i16)
-;
-
-define <vscale x 8 x i16> @test_svcreate4_s16_vec0(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_s16_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB34_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB34_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x i16> undef
-L2:
-  %extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %tuple, i32 0)
-  ret <vscale x 8 x i16> %extract
-}
-
-define <vscale x 8 x i16> @test_svcreate4_s16_vec3(i1 %p, <vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_s16_vec3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB35_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB35_2: // %L2
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16> %z0, <vscale x 8 x i16> %z1, <vscale x 8 x i16> %z2, <vscale x 8 x i16> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x i16> undef
-L2:
-  %extract = tail call <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16> %tuple, i32 3)
-  ret <vscale x 8 x i16> %extract
-}
-
-;
-; SVCREATE4 (half)
-;
-
-define <vscale x 8 x half> @test_svcreate4_f16_vec0(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_f16_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB36_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB36_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 32 x half> @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x half> undef
-L2:
-  %extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %tuple, i32 0)
-  ret <vscale x 8 x half> %extract
-}
-
-define <vscale x 8 x half> @test_svcreate4_f16_vec3(i1 %p, <vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_f16_vec3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB37_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB37_2: // %L2
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 32 x half> @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(<vscale x 8 x half> %z0, <vscale x 8 x half> %z1, <vscale x 8 x half> %z2, <vscale x 8 x half> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x half> undef
-L2:
-  %extract = tail call <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half> %tuple, i32 3)
-  ret <vscale x 8 x half> %extract
-}
-
-;
-; SVCREATE4 (bfloat)
-;
-
-define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec0(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) #1 {
-; CHECK-LABEL: test_svcreate4_bf16_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB38_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB38_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x bfloat> undef
-L2:
-  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 0)
-  ret <vscale x 8 x bfloat> %extract
-}
-
-define <vscale x 8 x bfloat> @test_svcreate4_bf16_vec3(i1 %p, <vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3) #1 {
-; CHECK-LABEL: test_svcreate4_bf16_vec3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB39_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB39_2: // %L2
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 32 x bfloat> @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat> %z0, <vscale x 8 x bfloat> %z1, <vscale x 8 x bfloat> %z2, <vscale x 8 x bfloat> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 8 x bfloat> undef
-L2:
-  %extract = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat> %tuple, i32 3)
-  ret <vscale x 8 x bfloat> %extract
-}
-
-;
-; SVCREATE4 (i32)
-;
-
-define <vscale x 4 x i32> @test_svcreate4_s32_vec0(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_s32_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB40_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB40_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x i32> undef
-L2:
-  %extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %tuple, i32 0)
-  ret <vscale x 4 x i32> %extract
-}
-
-define <vscale x 4 x i32> @test_svcreate4_s32_vec3(i1 %p, <vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_s32_vec3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB41_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB41_2: // %L2
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x i32> undef
-L2:
-  %extract = tail call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32> %tuple, i32 3)
-  ret <vscale x 4 x i32> %extract
-}
-
-;
-; SVCREATE4 (float)
-;
-
-define <vscale x 4 x float> @test_svcreate4_f32_vec0(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_f32_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB42_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB42_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x float> undef
-L2:
-  %extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %tuple, i32 0)
-  ret <vscale x 4 x float> %extract
-}
-
-define <vscale x 4 x float> @test_svcreate4_f32_vec3(i1 %p, <vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_f32_vec3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB43_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB43_2: // %L2
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %z0, <vscale x 4 x float> %z1, <vscale x 4 x float> %z2, <vscale x 4 x float> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 4 x float> undef
-L2:
-  %extract = tail call <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float> %tuple, i32 3)
-  ret <vscale x 4 x float> %extract
-}
-
-;
-; SVCREATE4 (i64)
-;
-
-define <vscale x 2 x i64> @test_svcreate4_s64_vec0(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_s64_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB44_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB44_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x i64> undef
-L2:
-  %extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %tuple, i32 0)
-  ret <vscale x 2 x i64> %extract
-}
-
-define <vscale x 2 x i64> @test_svcreate4_s64_vec3(i1 %p, <vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_s64_vec3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB45_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB45_2: // %L2
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %z0, <vscale x 2 x i64> %z1, <vscale x 2 x i64> %z2, <vscale x 2 x i64> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x i64> undef
-L2:
-  %extract = tail call <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64> %tuple, i32 3)
-  ret <vscale x 2 x i64> %extract
-}
-
-;
-; SVCREATE4 (double)
-;
-
-define <vscale x 2 x double> @test_svcreate4_f64_vec0(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_f64_vec0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB46_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    // implicit-def: $z0
-; CHECK-NEXT:  .LBB46_2: // %L2
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x double> undef
-L2:
-  %extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %tuple, i32 0)
-  ret <vscale x 2 x double> %extract
-}
-
-define <vscale x 2 x double> @test_svcreate4_f64_vec3(i1 %p, <vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3) #0 {
-; CHECK-LABEL: test_svcreate4_f64_vec3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    tbz w0, #0, .LBB47_2
-; CHECK-NEXT:  // %bb.1: // %common.ret
-; CHECK-NEXT:    ret
-; CHECK-NEXT:  .LBB47_2: // %L2
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 8 x double> @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64(<vscale x 2 x double> %z0, <vscale x 2 x double> %z1, <vscale x 2 x double> %z2, <vscale x 2 x double> %z3)
-  br i1 %p, label %L1, label %L2
-L1:
-  ret <vscale x 2 x double> undef
-L2:
-  %extract = tail call <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double> %tuple, i32 3)
-  ret <vscale x 2 x double> %extract
-}
-
-attributes #0 = { nounwind }
-; +bf16 is required for the bfloat version.
-attributes #1 = { nounwind "target-features"="+bf16" }
-
-declare <vscale x 4 x double>  @llvm.aarch64.sve.tuple.create2.nxv4f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 8 x float>  @llvm.aarch64.sve.tuple.create2.nxv8f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 16 x half>  @llvm.aarch64.sve.tuple.create2.nxv16f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 16 x bfloat>  @llvm.aarch64.sve.tuple.create2.nxv16bf16.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
-declare <vscale x 4 x i64>  @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 8 x i32>  @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 32 x i8>  @llvm.aarch64.sve.tuple.create2.nxv32i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
-
-declare <vscale x 6 x double>  @llvm.aarch64.sve.tuple.create3.nxv6f64.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 24 x half> @llvm.aarch64.sve.tuple.create3.nxv24f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 24 x bfloat> @llvm.aarch64.sve.tuple.create3.nxv24bf16.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
-declare <vscale x 6 x i64>  @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 48 x i8>  @llvm.aarch64.sve.tuple.create3.nxv48i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-
-declare <vscale x 8 x double>  @llvm.aarch64.sve.tuple.create4.nxv8f64.nxv2f64 (<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 16 x float>  @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 32 x half>  @llvm.aarch64.sve.tuple.create4.nxv32f16.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 32 x bfloat>  @llvm.aarch64.sve.tuple.create4.nxv32bf16.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
-declare <vscale x  8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 32 x i16> @llvm.aarch64.sve.tuple.create4.nxv32i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 64 x i8>  @llvm.aarch64.sve.tuple.create4.nxv64i8.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv32i8(<vscale x 32 x i8>, i32 immarg)
-declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv48i8(<vscale x 48 x i8>, i32 immarg)
-declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8>, i32 immarg)
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv16i16(<vscale x 16 x i16>, i32 immarg)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv24i16(<vscale x 24 x i16>, i32 immarg)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.tuple.get.nxv8i16.nxv32i16(<vscale x 32 x i16>, i32 immarg)
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv8i32(<vscale x 8 x i32>, i32 immarg)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv12i32(<vscale x 12 x i32>, i32 immarg)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv4i32.nxv16i32(<vscale x 16 x i32>, i32 immarg)
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv4i64(<vscale x 4 x i64>, i32 immarg)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv6i64(<vscale x 6 x i64>, i32 immarg)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.tuple.get.nxv2i64.nxv8i64(<vscale x 8 x i64>, i32 immarg)
-
-declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16(<vscale x 16 x bfloat>, i32 immarg)
-declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv24bf16(<vscale x 24 x bfloat>, i32 immarg)
-declare <vscale x 8 x bfloat> @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv32bf16(<vscale x 32 x bfloat>, i32 immarg)
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv16f16(<vscale x 16 x half>, i32 immarg)
-declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv24f16(<vscale x 24 x half>, i32 immarg)
-declare <vscale x 8 x half> @llvm.aarch64.sve.tuple.get.nxv8f16.nxv32f16(<vscale x 32 x half>, i32 immarg)
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv8f32(<vscale x 8 x float>, i32 immarg)
-declare <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv12f32(<vscale x 12 x float>, i32 immarg)
-declare <vscale x 4 x float> @llvm.aarch64.sve.tuple.get.nxv4f32.nxv16f32(<vscale x 16 x float>, i32 immarg)
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv4f64(<vscale x 4 x double>, i32 immarg)
-declare <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv6f64(<vscale x 6 x double>, i32 immarg)
-declare <vscale x 2 x double> @llvm.aarch64.sve.tuple.get.nxv2f64.nxv8f64(<vscale x 8 x double>, i32 immarg)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
deleted file mode 100644
index f576d6079e821..0000000000000
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-insert-extract-tuple.ll
+++ /dev/null
@@ -1,234 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple aarch64 -mattr=+sve < %s | FileCheck %s
-; RUN: llc -mtriple aarch64 -mattr=+sme < %s | FileCheck %s
-
-; All these tests create a vector tuple, insert z5 into one of the elements,
-; and finally extracts that element from the wide vector to return it.  These
-; checks ensure that z5 is always the value that is returned.
-
-;
-; Insert into two element tuples
-;
-
-; tuple:      { tuple2.res0, tuple2.res1 }
-; insert z5:  {     z5     , tuple2.res1 }
-; extract z5:       ^^
-define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple2_nxv8i32_elt0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z5.d
-; CHECK-NEXT:    ret
-  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
-  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
-  ret <vscale x 4 x i32> %ext
-}
-
-; tuple:       { tuple2.res0, tuple2.res1 }
-; insert z5:   { tuple2.res0,     z5      }
-; extract z5:                     ^^
-define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple2_nxv8i32_elt1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z5.d
-; CHECK-NEXT:    ret
-  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
-  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 1)
-  ret <vscale x 4 x i32> %ext
-}
-
-; This test checks the elements _not_ being set aren't changed.
-
-; tuple:       { tuple2.res0, tuple2.res1 }
-; insert z5:   { tuple2.res0,     z5      }
-; extract z0:         ^^
-define <vscale x 4 x i32> @set_tuple2_nxv8i32_elt1_ret_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple2_nxv8i32_elt1_ret_elt0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ret
-  %tuple = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1)
-  %ins = call <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %ins, i32 0)
-  ret <vscale x 4 x i32> %ext
-}
-
-; Test extract of tuple passed into function
-define <vscale x 4 x i32> @get_tuple2_nxv8i32_elt1(<vscale x 8 x i32> %tuple) {
-; CHECK-LABEL: get_tuple2_nxv8i32_elt1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z1.d
-; CHECK-NEXT:    ret
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32> %tuple, i32 1)
-  ret <vscale x 4 x i32> %ext
-}
-
-;
-; Insert into three element tuples
-;
-
-; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
-; insert z5:   {     z5     , tuple3.res0, tuple3.res2 }
-; extract z5:        ^^
-define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple3_nxv12i32_elt0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z5.d
-; CHECK-NEXT:    ret
-  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
-  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 0)
-  ret <vscale x 4 x i32> %ext
-}
-
-; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
-; insert z5:   { tuple3.res0,     z5     , tuple3.res2 }
-; extract z5:                     ^^
-define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple3_nxv12i32_elt1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z5.d
-; CHECK-NEXT:    ret
-  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
-  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 1)
-  ret <vscale x 4 x i32> %ext
-}
-
-; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
-; insert z5:   { tuple3.res0, tuple3.res1,     z5      }
-; extract z5:                                  ^^
-define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple3_nxv12i32_elt2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z5.d
-; CHECK-NEXT:    ret
-  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
-  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
-  ret <vscale x 4 x i32> %ext
-}
-
-; This test checks the elements _not_ being set aren't changed.
-
-; tuple:       { tuple3.res0, tuple3.res1, tuple3.res2 }
-; insert z5:   { tuple3.res0,     z5     , tuple3.res2 }
-; extract z2:                                  ^^
-define <vscale x 4 x i32> @set_tuple3_nxv12i32_elt1_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple3_nxv12i32_elt1_ret_elt2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2)
-  %ins = call <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %ins, i32 2)
-  ret <vscale x 4 x i32> %ext
-}
-
-; Test extract of tuple passed into function
-define <vscale x 4 x i32> @get_tuple3_nxv12i32_elt2(<vscale x 4 x i32> %z0, <vscale x 12 x i32> %tuple) {
-; CHECK-LABEL: get_tuple3_nxv12i32_elt2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ret
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32> %tuple, i32 2)
-  ret <vscale x 4 x i32> %ext
-}
-
-;
-; Insert into four element tuples
-;
-
-; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
-; insert z5:   {     z5     , tuple4.res1, tuple4.res2, tuple4.res3 }
-; extract z5:        ^^
-define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt0(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple4_nxv16i32_elt0:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z5.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
-  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 0, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 0)
-  ret <vscale x 4 x i32> %ext
-}
-
-; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
-; insert z5:   { tuple4.res0,     z5     , tuple4.res2, tuple4.res3 }
-; extract z5:                     ^^
-define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt1(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple4_nxv16i32_elt1:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z5.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
-  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 1, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 1)
-  ret <vscale x 4 x i32> %ext
-}
-
-; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
-; insert z5:   { tuple4.res0, tuple4.res1,     z5     , tuple4.res3 }
-; extract z5:                                  ^^
-define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple4_nxv16i32_elt2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z5.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
-  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 2, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
-  ret <vscale x 4 x i32> %ext
-}
-
-; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
-; insert z5:   { tuple4.res0, tuple4.res1, tuple4.res2,     z5      }
-; extract z5:                                               ^^
-define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple4_nxv16i32_elt3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z5.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
-  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 3)
-  ret <vscale x 4 x i32> %ext
-}
-
-; This test checks the elements _not_ being set aren't changed.
-
-; tuple:       { tuple4.res0, tuple4.res1, tuple4.res2, tuple4.res3 }
-; insert z5:   { tuple4.res0, tuple4.res1, tuple4.res2,     z5      }
-; extract z2:                                               ^^
-define <vscale x 4 x i32> @set_tuple4_nxv16i32_elt3_ret_elt2(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3, <vscale x 4 x i32> %z4, <vscale x 4 x i32> %z5) {
-; CHECK-LABEL: set_tuple4_nxv16i32_elt3_ret_elt2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32> %z0, <vscale x 4 x i32> %z1, <vscale x 4 x i32> %z2, <vscale x 4 x i32> %z3)
-  %ins = call <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32> %tuple, i32 3, <vscale x 4 x i32> %z5)
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %ins, i32 2)
-  ret <vscale x 4 x i32> %ext
-}
-
-; Test extract of tuple passed into function
-define <vscale x 4 x i32> @get_tuple4_nxv16i32_elt3(<vscale x 16 x i32> %tuple) {
-; CHECK-LABEL: get_tuple4_nxv16i32_elt3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov z0.d, z3.d
-; CHECK-NEXT:    ret
-  %ext = call <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32> %tuple, i32 3)
-  ret <vscale x 4 x i32> %ext
-}
-
-declare <vscale x 8 x i32>  @llvm.aarch64.sve.tuple.create2.nxv8i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 8 x i32> @llvm.aarch64.sve.tuple.set.nxv8i32.nxv4i32(<vscale x 8 x i32>, i32, <vscale x 4 x i32>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv8i32(<vscale x 8 x i32>, i32)
-
-declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.create3.nxv12i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 12 x i32> @llvm.aarch64.sve.tuple.set.nxv12i32.nxv4i32(<vscale x 12 x i32>, i32, <vscale x 4 x i32>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv12i32(<vscale x 12 x i32>, i32)
-
-declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.create4.nxv16i32.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 16 x i32> @llvm.aarch64.sve.tuple.set.nxv16i32.nxv4i32(<vscale x 16 x i32>, i32, <vscale x 4 x i32>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.tuple.get.nxv16i32(<vscale x 16 x i32>, i32)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
index fe94b0274c624..9267ab29bde5a 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
@@ -397,45 +397,6 @@ define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %
   ret <vscale x 2 x i64> %out
 }
 
-; ADD (tuples)
-
-define <vscale x 4 x i64> @add_i64_tuple2(<vscale x 4 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2) {
-; CHECK-LABEL: add_i64_tuple2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    add z0.d, z0.d, z0.d
-; CHECK-NEXT:    add z1.d, z1.d, z1.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2)
-  %res = add <vscale x 4 x i64> %tuple, %tuple
-  ret <vscale x 4 x i64> %res
-}
-
-define <vscale x 6 x i64> @add_i64_tuple3(<vscale x 6 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3) {
-; CHECK-LABEL: add_i64_tuple3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    add z1.d, z1.d, z1.d
-; CHECK-NEXT:    add z0.d, z0.d, z0.d
-; CHECK-NEXT:    add z2.d, z2.d, z2.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3)
-  %res = add <vscale x 6 x i64> %tuple, %tuple
-  ret <vscale x 6 x i64> %res
-}
-
-define <vscale x 8 x i64> @add_i64_tuple4(<vscale x 8 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4) {
-; CHECK-LABEL: add_i64_tuple4:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    add z2.d, z2.d, z2.d
-; CHECK-NEXT:    add z0.d, z0.d, z0.d
-; CHECK-NEXT:    add z1.d, z1.d, z1.d
-; CHECK-NEXT:    add z3.d, z3.d, z3.d
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4)
-  %res = add <vscale x 8 x i64> %tuple, %tuple
-  ret <vscale x 8 x i64> %res
-}
-
-
 declare <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
 declare <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
 declare <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
@@ -478,6 +439,3 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.x.nxv8i16(<vscale x 8 x i16>,
 declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.x.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
 declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.x.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
 
-declare <vscale x 4 x i64> @llvm.aarch64.sve.tuple.create2.nxv4i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)

diff  --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index 49c1153135bfc..556ecfe866f5e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -586,87 +586,6 @@ define void @stnt1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, do
 }
 
 
-; Stores (tuples)
-
-define void @store_i64_tuple3(<vscale x 6 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3) {
-; CHECK-LABEL: store_i64_tuple3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    st1d { z2.d }, p0, [x0, #2, mul vl]
-; CHECK-NEXT:    st1d { z1.d }, p0, [x0, #1, mul vl]
-; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3)
-  store <vscale x 6 x i64> %tuple, <vscale x 6 x i64>* %out
-  ret void
-}
-
-define void @store_i64_tuple4(<vscale x 8 x i64>* %out, <vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4) {
-; CHECK-LABEL: store_i64_tuple4:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    st1d { z3.d }, p0, [x0, #3, mul vl]
-; CHECK-NEXT:    st1d { z2.d }, p0, [x0, #2, mul vl]
-; CHECK-NEXT:    st1d { z1.d }, p0, [x0, #1, mul vl]
-; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64> %in1, <vscale x 2 x i64> %in2, <vscale x 2 x i64> %in3, <vscale x 2 x i64> %in4)
-  store <vscale x 8 x i64> %tuple, <vscale x 8 x i64>* %out
-  ret void
-}
-
-define void @store_i16_tuple2(<vscale x 16 x i16>* %out, <vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2) {
-; CHECK-LABEL: store_i16_tuple2:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    st1h { z1.h }, p0, [x0, #1, mul vl]
-; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2)
-  store <vscale x 16 x i16> %tuple, <vscale x 16 x i16>* %out
-  ret void
-}
-
-define void @store_i16_tuple3(<vscale x 24 x i16>* %out, <vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2, <vscale x 8 x i16> %in3) {
-; CHECK-LABEL: store_i16_tuple3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.h
-; CHECK-NEXT:    st1h { z2.h }, p0, [x0, #2, mul vl]
-; CHECK-NEXT:    st1h { z1.h }, p0, [x0, #1, mul vl]
-; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16> %in1, <vscale x 8 x i16> %in2, <vscale x 8 x i16> %in3)
-  store <vscale x 24 x i16> %tuple, <vscale x 24 x i16>* %out
-  ret void
-}
-
-define void @store_f32_tuple3(<vscale x 12 x float>* %out, <vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3) {
-; CHECK-LABEL: store_f32_tuple3:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    st1w { z2.s }, p0, [x0, #2, mul vl]
-; CHECK-NEXT:    st1w { z1.s }, p0, [x0, #1, mul vl]
-; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3)
-  store <vscale x 12 x float> %tuple, <vscale x 12 x float>* %out
-  ret void
-}
-
-define void @store_f32_tuple4(<vscale x 16 x float>* %out, <vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3, <vscale x 4 x float> %in4) {
-; CHECK-LABEL: store_f32_tuple4:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    st1w { z3.s }, p0, [x0, #3, mul vl]
-; CHECK-NEXT:    st1w { z2.s }, p0, [x0, #2, mul vl]
-; CHECK-NEXT:    st1w { z1.s }, p0, [x0, #1, mul vl]
-; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
-; CHECK-NEXT:    ret
-  %tuple = tail call <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float> %in1, <vscale x 4 x float> %in2, <vscale x 4 x float> %in3, <vscale x 4 x float> %in4)
-  store <vscale x 16 x float> %tuple, <vscale x 16 x float>* %out
-  ret void
-}
-
 declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
 declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, i16*)
 declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, i32*)
@@ -706,14 +625,5 @@ declare void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8
 declare void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float*)
 declare void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double*)
 
-declare <vscale x 6 x i64> @llvm.aarch64.sve.tuple.create3.nxv6i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-declare <vscale x 8 x i64> @llvm.aarch64.sve.tuple.create4.nxv8i64.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-
-declare <vscale x 16 x i16> @llvm.aarch64.sve.tuple.create2.nxv16i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 24 x i16> @llvm.aarch64.sve.tuple.create3.nxv24i16.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-
-declare <vscale x 12 x float> @llvm.aarch64.sve.tuple.create3.nxv12f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 16 x float> @llvm.aarch64.sve.tuple.create4.nxv16f32.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-
 ; +bf16 is required for the bfloat version.
 attributes #0 = { "target-features"="+bf16" }

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll
deleted file mode 100644
index 5d8700bab19ba..0000000000000
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-tuple-get.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
-
-target triple = "aarch64-unknown-linux-gnu"
-
-; This stores %a using st4 after reversing the 4 tuples. Check that the
-; redundant sequences of get/set are eliminated.
-define void @redundant_tuple_get_set(<vscale x 64 x i8> %a, i8* %ptr) #0 {
-; CHECK-LABEL: @redundant_tuple_get_set(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> [[A:%.*]], i32 3)
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> [[A]], i32 0)
-; CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> [[A]], i32 2)
-; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> [[A]], i32 1)
-; CHECK-NEXT:    call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> [[TMP1]], <vscale x 16 x i8> [[TMP3]], <vscale x 16 x i8> [[TMP4]], <vscale x 16 x i8> [[TMP2]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i32 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i8* [[PTR:%.*]])
-; CHECK-NEXT:    ret void
-;
-  %1 = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %a, i32 3)
-  %2 = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %a, i32 0)
-  %3 = call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8(<vscale x 64 x i8> %a, i32 3, <vscale x 16 x i8> %2)
-  %4 = call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8(<vscale x 64 x i8> %3, i32 0, <vscale x 16 x i8> %1)
-  %5 = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %4, i32 2)
-  %6 = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %4, i32 1)
-  %7 = call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8(<vscale x 64 x i8> %4, i32 2, <vscale x 16 x i8> %6)
-  %8 = call <vscale x 64 x i8> @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8(<vscale x 64 x i8> %7, i32 1, <vscale x 16 x i8> %5)
-  %9 = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %8, i32 0)
-  %10 = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %8, i32 1)
-  %11 = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %8, i32 2)
-  %12 = call <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8> %8, i32 3)
-  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %9, <vscale x 16 x i8> %10, <vscale x 16 x i8> %11, <vscale x 16 x i8> %12, <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i32 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i8* %ptr)
-  ret void
-}
-
-declare <vscale x 64 x i8> @llvm.aarch64.sve.tuple.set.nxv64i8.nxv16i8(<vscale x 64 x i8>, i32, <vscale x 16 x i8>)
-declare <vscale x 16 x i8> @llvm.aarch64.sve.tuple.get.nxv16i8.nxv64i8(<vscale x 64 x i8>, i32)
-declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, i8*)
-
-attributes #0 = { "target-features"="+sve" }


        


More information about the llvm-commits mailing list