[llvm] [AArch64][SVE] Added optimisation for SVE intrinsics for no active lanes (PR #73943)

Mark Harley via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 30 06:22:12 PST 2023


https://github.com/MarkAHarley created https://github.com/llvm/llvm-project/pull/73943

Closes #56573 

This patch introduces optimisations for SVE intrinsic function calls which have all false predicates.

>From 29f83fd26cdcb65eb65cc7c0b6dd181fae9fb2cd Mon Sep 17 00:00:00 2001
From: Mark Harley <mark.harley at arm.com>
Date: Tue, 28 Nov 2023 14:55:43 +0000
Subject: [PATCH] [AArch64][SVE] Added optimisation for SVE intrinsics when
 there are no active lanes

---
 .../AArch64/AArch64TargetTransformInfo.cpp    |  166 ++-
 ...-intrinsic-comb-m-forms-no-active-lanes.ll | 1324 +++++++++++++++++
 ...-intrinsic-comb-u-forms-no-active-lanes.ll | 1318 ++++++++++++++++
 3 files changed, 2764 insertions(+), 44 deletions(-)
 create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
 create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll

diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b5b8b68291786dc..a0e42a183f1d90d 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1406,9 +1406,30 @@ static std::optional<Instruction *> instCombineSVEAllActive(IntrinsicInst &II,
   return &II;
 }
 
+// Optimize operations that take an all false predicate or send them for
+// canonicalization.
+static std::optional<Instruction *>
+instCombineSVEAllOrNoActive(InstCombiner &IC, IntrinsicInst &II,
+                            Intrinsic::ID IID) {
+  if (match(II.getOperand(0), m_ZeroInt())) {
+    if (II.getIntrinsicID() != IID)
+      // llvm_ir, pred(0), op1, op2 - Spec says to return op1 when all lanes are
+      // inactive for sv[func]_m or sv[func]_z
+      return IC.replaceInstUsesWith(II, II.getOperand(1));
+    else
+      // llvm_ir_u, pred(0), op1, op2 - Spec says to return undef when all lanes
+      // are inactive for sv[func]_x
+      return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
+  }
+  if (II.getIntrinsicID() != IID)
+    return instCombineSVEAllActive(II, IID);
+  return std::nullopt;
+}
+
 static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
                                                             IntrinsicInst &II) {
-  if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_add_u))
+  if (auto II_U =
+          instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u))
     return II_U;
   if (auto MLA = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
                                                    Intrinsic::aarch64_sve_mla>(
@@ -1421,9 +1442,22 @@ static std::optional<Instruction *> instCombineSVEVectorAdd(InstCombiner &IC,
   return std::nullopt;
 }
 
+static std::optional<Instruction *>
+instCombineSVEVectorAddU(InstCombiner &IC, IntrinsicInst &II) {
+  if (auto II_U =
+          instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_add_u))
+    return II_U;
+  else {
+    return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
+                                             Intrinsic::aarch64_sve_mla_u>(
+        IC, II, true);
+  }
+}
+
 static std::optional<Instruction *>
 instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
-  if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fadd_u))
+  if (auto II_U =
+          instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u))
     return II_U;
   if (auto FMLA =
           instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1445,6 +1479,9 @@ instCombineSVEVectorFAdd(InstCombiner &IC, IntrinsicInst &II) {
 
 static std::optional<Instruction *>
 instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
+  if (auto II_U =
+          instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fadd_u))
+    return II_U;
   if (auto FMLA =
           instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
                                             Intrinsic::aarch64_sve_fmla>(IC, II,
@@ -1465,7 +1502,8 @@ instCombineSVEVectorFAddU(InstCombiner &IC, IntrinsicInst &II) {
 
 static std::optional<Instruction *>
 instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
-  if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fsub_u))
+  if (auto II_U =
+          instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u))
     return II_U;
   if (auto FMLS =
           instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
@@ -1487,6 +1525,9 @@ instCombineSVEVectorFSub(InstCombiner &IC, IntrinsicInst &II) {
 
 static std::optional<Instruction *>
 instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
+  if (auto II_U =
+          instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fsub_u))
+    return II_U;
   if (auto FMLS =
           instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_fmul,
                                             Intrinsic::aarch64_sve_fmls>(IC, II,
@@ -1507,7 +1548,8 @@ instCombineSVEVectorFSubU(InstCombiner &IC, IntrinsicInst &II) {
 
 static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
                                                             IntrinsicInst &II) {
-  if (auto II_U = instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sub_u))
+  if (auto II_U =
+          instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u))
     return II_U;
   if (auto MLS = instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul,
                                                    Intrinsic::aarch64_sve_mls>(
@@ -1516,6 +1558,18 @@ static std::optional<Instruction *> instCombineSVEVectorSub(InstCombiner &IC,
   return std::nullopt;
 }
 
+static std::optional<Instruction *>
+instCombineSVEVectorSubU(InstCombiner &IC, IntrinsicInst &II) {
+  if (auto II_U =
+          instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sub_u))
+    return II_U;
+  else {
+    return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
+                                             Intrinsic::aarch64_sve_mls_u>(
+        IC, II, true);
+  }
+}
+
 static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
                                                             IntrinsicInst &II,
                                                             Intrinsic::ID IID) {
@@ -1523,10 +1577,8 @@ static std::optional<Instruction *> instCombineSVEVectorMul(InstCombiner &IC,
   auto *OpMultiplicand = II.getOperand(1);
   auto *OpMultiplier = II.getOperand(2);
 
-  // Canonicalise a non _u intrinsic only.
-  if (II.getIntrinsicID() != IID)
-    if (auto II_U = instCombineSVEAllActive(II, IID))
-      return II_U;
+  if (auto II_U = instCombineSVEAllOrNoActive(IC, II, IID))
+    return II_U;
 
   // Return true if a given instruction is a unit splat value, false otherwise.
   auto IsUnitSplat = [](auto *I) {
@@ -1891,34 +1943,45 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   case Intrinsic::aarch64_sve_ptest_last:
     return instCombineSVEPTest(IC, II);
   case Intrinsic::aarch64_sve_fabd:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fabd_u);
+  case Intrinsic::aarch64_sve_fabd_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fabd_u);
   case Intrinsic::aarch64_sve_fadd:
     return instCombineSVEVectorFAdd(IC, II);
   case Intrinsic::aarch64_sve_fadd_u:
     return instCombineSVEVectorFAddU(IC, II);
   case Intrinsic::aarch64_sve_fdiv:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fdiv_u);
+  case Intrinsic::aarch64_sve_fdiv_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fdiv_u);
   case Intrinsic::aarch64_sve_fmax:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmax_u);
+  case Intrinsic::aarch64_sve_fmax_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmax_u);
   case Intrinsic::aarch64_sve_fmaxnm:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmaxnm_u);
+  case Intrinsic::aarch64_sve_fmaxnm_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmaxnm_u);
   case Intrinsic::aarch64_sve_fmin:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmin_u);
+  case Intrinsic::aarch64_sve_fmin_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmin_u);
   case Intrinsic::aarch64_sve_fminnm:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fminnm_u);
+  case Intrinsic::aarch64_sve_fminnm_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fminnm_u);
   case Intrinsic::aarch64_sve_fmla:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmla_u);
+  case Intrinsic::aarch64_sve_fmla_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmla_u);
   case Intrinsic::aarch64_sve_fmls:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmls_u);
+  case Intrinsic::aarch64_sve_fmls_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmls_u);
   case Intrinsic::aarch64_sve_fmul:
   case Intrinsic::aarch64_sve_fmul_u:
     return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_fmul_u);
   case Intrinsic::aarch64_sve_fmulx:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fmulx_u);
+  case Intrinsic::aarch64_sve_fmulx_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fmulx_u);
   case Intrinsic::aarch64_sve_fnmla:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmla_u);
+  case Intrinsic::aarch64_sve_fnmla_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmla_u);
   case Intrinsic::aarch64_sve_fnmls:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_fnmls_u);
+  case Intrinsic::aarch64_sve_fnmls_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_fnmls_u);
   case Intrinsic::aarch64_sve_fsub:
     return instCombineSVEVectorFSub(IC, II);
   case Intrinsic::aarch64_sve_fsub_u:
@@ -1926,56 +1989,71 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC,
   case Intrinsic::aarch64_sve_add:
     return instCombineSVEVectorAdd(IC, II);
   case Intrinsic::aarch64_sve_add_u:
-    return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
-                                             Intrinsic::aarch64_sve_mla_u>(
-        IC, II, true);
+    return instCombineSVEVectorAddU(IC, II);
   case Intrinsic::aarch64_sve_mla:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mla_u);
+  case Intrinsic::aarch64_sve_mla_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mla_u);
   case Intrinsic::aarch64_sve_mls:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_mls_u);
+  case Intrinsic::aarch64_sve_mls_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_mls_u);
   case Intrinsic::aarch64_sve_mul:
   case Intrinsic::aarch64_sve_mul_u:
     return instCombineSVEVectorMul(IC, II, Intrinsic::aarch64_sve_mul_u);
   case Intrinsic::aarch64_sve_sabd:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sabd_u);
+  case Intrinsic::aarch64_sve_sabd_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sabd_u);
   case Intrinsic::aarch64_sve_smax:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smax_u);
+  case Intrinsic::aarch64_sve_smax_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smax_u);
   case Intrinsic::aarch64_sve_smin:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smin_u);
+  case Intrinsic::aarch64_sve_smin_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smin_u);
   case Intrinsic::aarch64_sve_smulh:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_smulh_u);
+  case Intrinsic::aarch64_sve_smulh_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_smulh_u);
   case Intrinsic::aarch64_sve_sub:
     return instCombineSVEVectorSub(IC, II);
   case Intrinsic::aarch64_sve_sub_u:
-    return instCombineSVEVectorFuseMulAddSub<Intrinsic::aarch64_sve_mul_u,
-                                             Intrinsic::aarch64_sve_mls_u>(
-        IC, II, true);
+    return instCombineSVEVectorSubU(IC, II);
   case Intrinsic::aarch64_sve_uabd:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uabd_u);
+  case Intrinsic::aarch64_sve_uabd_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uabd_u);
   case Intrinsic::aarch64_sve_umax:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umax_u);
+  case Intrinsic::aarch64_sve_umax_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umax_u);
   case Intrinsic::aarch64_sve_umin:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umin_u);
+  case Intrinsic::aarch64_sve_umin_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umin_u);
   case Intrinsic::aarch64_sve_umulh:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_umulh_u);
+  case Intrinsic::aarch64_sve_umulh_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_umulh_u);
   case Intrinsic::aarch64_sve_asr:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_asr_u);
+  case Intrinsic::aarch64_sve_asr_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_asr_u);
   case Intrinsic::aarch64_sve_lsl:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsl_u);
+  case Intrinsic::aarch64_sve_lsl_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsl_u);
   case Intrinsic::aarch64_sve_lsr:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_lsr_u);
+  case Intrinsic::aarch64_sve_lsr_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_lsr_u);
   case Intrinsic::aarch64_sve_and:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_and_u);
+  case Intrinsic::aarch64_sve_and_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_and_u);
   case Intrinsic::aarch64_sve_bic:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_bic_u);
+  case Intrinsic::aarch64_sve_bic_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_bic_u);
   case Intrinsic::aarch64_sve_eor:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_eor_u);
+  case Intrinsic::aarch64_sve_eor_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_eor_u);
   case Intrinsic::aarch64_sve_orr:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_orr_u);
+  case Intrinsic::aarch64_sve_orr_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_orr_u);
   case Intrinsic::aarch64_sve_sqsub:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_sqsub_u);
+  case Intrinsic::aarch64_sve_sqsub_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_sqsub_u);
   case Intrinsic::aarch64_sve_uqsub:
-    return instCombineSVEAllActive(II, Intrinsic::aarch64_sve_uqsub_u);
+  case Intrinsic::aarch64_sve_uqsub_u:
+    return instCombineSVEAllOrNoActive(IC, II, Intrinsic::aarch64_sve_uqsub_u);
   case Intrinsic::aarch64_sve_tbl:
     return instCombineSVETBL(IC, II);
   case Intrinsic::aarch64_sve_uunpkhi:
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
new file mode 100644
index 000000000000000..56fe0a18f65c1d8
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
@@ -0,0 +1,1324 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Replace SVE _m intrinsics with their first operand when the predicate is all false.
+
+; Float arithmetic
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fabd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fabd_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fabd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fabd_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fabd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fabd_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+; aarch64_sve_fadd intrinsic combines to a LLVM instruction fadd.
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fadd_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fadd_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fadd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fadd_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fdiv_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fdiv_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fdiv_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fdiv_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fdiv_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fdiv_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmax_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmax_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmax_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmax_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmax_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmax_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmin_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmin_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmin_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmin_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmin_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmin_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fminnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fminnm_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fminnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fminnm_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fminnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fminnm_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmla_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmla_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmla_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmls_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmls_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmls_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %1
+}
+
+; aarch64_sve_fmul intrinsic combines to a LLVM instruction fmul.
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmul_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmul_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmul_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmul_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmulx_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmulx_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmulx_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmulx_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmulx_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmulx_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fnmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmla_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fnmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmla_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fnmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmla_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fnmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmls_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fnmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmls_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fnmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmls_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %1
+}
+
+; aarch64_sve_fsub intrinsic combines to a LLVM instruction fsub.
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fsub_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fsub_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fsub_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+; Integer arithmetic
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_add_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_add_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_add_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_add_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_add_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_add_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_add_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_add_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_mla_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_mla_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_mla_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_mla_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_mla_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_mla_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_mla_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_mla_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_mls_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_mls_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_mls_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_mls_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_mls_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_mls_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_mls_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_mls_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_mul_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_mul_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_mul_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_mul_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_mul_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_mul_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_mul_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_mul_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_sabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_sabd_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_sabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_sabd_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_sabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sabd_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_sabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_sabd_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_smax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_smax_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_smax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_smax_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_smax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_smax_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_smax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_smax_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_smin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_smin_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_smin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_smin_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_smin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_smin_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_smin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_smin_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_smulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_smulh_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_smulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_smulh_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_smulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_smulh_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_smulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_smulh_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_sub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_sub_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_sub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_sub_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_sub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_sub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_sub_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_uabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_uabd_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_uabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_uabd_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_uabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uabd_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_uabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_uabd_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_umax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_umax_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_umax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_umax_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_umax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_umax_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_umax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_umax_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_umin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_umin_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_umin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_umin_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_umin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_umin_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_umin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_umin_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_umulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_umulh_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_umulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_umulh_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_umulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_umulh_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_umulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_umulh_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+; Shifts
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_asr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_asr_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_asr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_asr_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_asr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_asr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_asr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_asr_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_lsl_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsl_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_lsl_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsl_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_lsl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_lsl_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsl_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_lsr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsr_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_lsr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsr_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_lsr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_lsr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsr_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+; Logical operations
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_and_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_and_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_and_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_and_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_and_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_and_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_and_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_and_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_bic_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_bic_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_bic_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_bic_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_bic_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_bic_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_bic_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_bic_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_eor_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_eor_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_eor_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_eor_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_eor_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_eor_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_eor_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_eor_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_orr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_orr_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_orr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_orr_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_orr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_orr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_orr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_orr_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+; SVE2 - Uniform DSP operations
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+attributes #0 = { "target-features"="+sve,+sve2" }
\ No newline at end of file
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll
new file mode 100644
index 000000000000000..02ac82fa5c36c28
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-u-forms-no-active-lanes.ll
@@ -0,0 +1,1318 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Replace SVE _u intrinsics with undef if the predicate is all false.
+
+; Float arithmetic
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fabd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fabd_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fabd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fabd_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fabd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fabd_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fadd_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fadd_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fadd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fadd_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fdiv_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fdiv_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fdiv_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fdiv_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fdiv_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fdiv_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmax_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmax_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmax_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmax_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmax_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmax_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmin.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmin_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmin_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmin_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmin_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmin_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmin_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fminnm.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fminnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fminnm_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fminnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fminnm_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fminnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fminnm_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmla_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmla_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmla_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmls_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmls_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmls_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmul_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmul_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmul_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmul_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fmulx_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fmulx_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmulx_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmulx_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fmulx_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fmulx_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fnmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmla_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fnmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmla_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fnmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmla_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fnmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmls_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fnmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmls_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fnmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmls_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
+  ret <vscale x 2 x double> %1
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x half> @replace_fsub_intrinsic_half
+; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x half> undef
+;
+  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.u.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+  ret <vscale x 8 x half> %1
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fsub_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> undef
+;
+  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.u.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %1
+}
+
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @replace_fsub_intrinsic_double
+; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x double> undef
+;
+  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.u.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+  ret <vscale x 2 x double> %1
+}
+
+; Integer arithmetic
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_add_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_add_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_add_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_add_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_add_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_add_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_add_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_add_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_mla_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_mla_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_mla_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_mla_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_mla_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_mla_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_mla_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_mla_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_mls_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_mls_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_mls_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_mls_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_mls_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_mls_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_mls_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_mls_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_mul_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_mul_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_mul_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_mul_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_mul_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_mul_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_mul_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_mul_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_sabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_sabd_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_sabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_sabd_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_sabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sabd_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_sabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_sabd_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_smax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_smax_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_smax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_smax_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_smax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_smax_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_smax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_smax_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_smin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_smin_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_smin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_smin_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_smin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_smin_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_smin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_smin_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_smulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_smulh_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_smulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_smulh_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_smulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_smulh_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_smulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_smulh_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_sub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_sub_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_sub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_sub_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_sub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_sub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_sub_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_uabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_uabd_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_uabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_uabd_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_uabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uabd_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_uabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_uabd_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_umax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_umax_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_umax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_umax_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_umax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_umax_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_umax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_umax_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_umin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_umin_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_umin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_umin_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_umin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_umin_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_umin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_umin_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_umulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_umulh_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_umulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_umulh_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_umulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_umulh_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_umulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_umulh_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+; Shifts
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_asr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_asr_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_asr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_asr_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_asr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_asr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_asr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_asr_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_lsl_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsl_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_lsl_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsl_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_lsl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_lsl_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsl_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_lsr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsr_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_lsr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsr_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_lsr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_lsr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsr_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+; Logical operations
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_and_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_and_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_and_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_and_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_and_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_and_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_and_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_and_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_bic_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_bic_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_bic_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_bic_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_bic_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_bic_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_bic_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_bic_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_eor_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_eor_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_eor_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_eor_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_eor_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_eor_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_eor_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_eor_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_orr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_orr_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_orr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_orr_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_orr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_orr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_orr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_orr_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+; SVE2 - Uniform DSP operations
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
+; CHECK-LABEL: define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8
+; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 16 x i8> undef
+;
+  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+  ret <vscale x 16 x i8> %1
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
+; CHECK-LABEL: define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16
+; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 8 x i16> undef
+;
+  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+  ret <vscale x 8 x i16> %1
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> undef
+;
+  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %1
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
+; CHECK-LABEL: define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 2 x i64> undef
+;
+  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %1
+}
+
+attributes #0 = { "target-features"="+sve,+sve2" }
\ No newline at end of file



More information about the llvm-commits mailing list