[llvm] 50d3feb - [NFC][InstCombine][AArch64] Refactor SVE intrinsics tests.

Paul Walker via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 29 03:48:01 PDT 2025


Author: Paul Walker
Date: 2025-04-29T10:44:08Z
New Revision: 50d3febf1700e5b829a596cc66afe24ae8e4c6c9

URL: https://github.com/llvm/llvm-project/commit/50d3febf1700e5b829a596cc66afe24ae8e4c6c9
DIFF: https://github.com/llvm/llvm-project/commit/50d3febf1700e5b829a596cc66afe24ae8e4c6c9.diff

LOG: [NFC][InstCombine][AArch64] Refactor SVE intrinsics tests.

Only test a single element type because the combines are not sensitive
to them. Extend coverage to cover the majority of SVE merging
intrinsics.

Added: 
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll

Modified: 
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll

Removed: 
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
    llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll


################################################################################
diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
deleted file mode 100644
index 463a5f5d2cfb5..0000000000000
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-m-forms-no-active-lanes.ll
+++ /dev/null
@@ -1,1324 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
-
-target triple = "aarch64-unknown-linux-gnu"
-
-; Replace SVE _m intrinsics with their first operand when the predicate is all false.
-
-; Float arithmetic
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fabd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fabd_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fabd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fabd_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fabd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fabd_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %1
-}
-
-; aarch64_sve_fadd intrinsic combines to a LLVM instruction fadd.
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fadd_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fadd_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fadd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fadd_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %1
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fdiv_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fdiv_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fdiv_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fdiv_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fdiv_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fdiv_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %1
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmax_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmax_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmax_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmax_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmax_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmax_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %1
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %1
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmin_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmin_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmin_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmin_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmin_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmin_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %1
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fminnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fminnm_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fminnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fminnm_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fminnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fminnm_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %1
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmla_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmla_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmla_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %1
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmls_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmls_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmls_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %1
-}
-
-; aarch64_sve_fmul intrinsic combines to a LLVM instruction fmul.
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmul_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmul_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmul_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmul_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %1
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmulx_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmulx_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmulx_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmulx_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmulx_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmulx_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %1
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fnmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmla_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fnmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmla_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fnmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmla_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %1
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fnmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmls_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fnmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmls_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fnmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmls_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %1
-}
-
-; aarch64_sve_fsub intrinsic combines to a LLVM instruction fsub.
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fsub_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x half> [[A]]
-;
-  %1 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %1
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fsub_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
-;
-  %1 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %1
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fsub_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x double> [[A]]
-;
-  %1 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %1
-}
-
-; Integer arithmetic
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_add_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_add_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_add_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_add_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_add_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_add_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_add_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_add_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_mla_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_mla_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_mla_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_mla_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_mla_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_mla_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_mla_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_mla_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_mls_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_mls_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_mls_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_mls_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_mls_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_mls_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_mls_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_mls_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_mul_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_mul_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_mul_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_mul_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_mul_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_mul_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_mul_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_mul_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_sabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_sabd_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_sabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_sabd_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_sabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_sabd_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_sabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_sabd_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_smax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_smax_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_smax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_smax_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_smax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_smax_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_smax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_smax_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_smin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_smin_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_smin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_smin_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_smin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_smin_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_smin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_smin_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_smulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_smulh_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_smulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_smulh_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_smulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_smulh_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_smulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_smulh_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_sub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_sub_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_sub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_sub_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_sub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_sub_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_sub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_sub_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_uabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_uabd_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_uabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_uabd_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_uabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_uabd_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_uabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_uabd_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_umax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_umax_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_umax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_umax_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_umax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_umax_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_umax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_umax_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_umin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_umin_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_umin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_umin_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_umin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_umin_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_umin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_umin_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_umulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_umulh_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_umulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_umulh_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_umulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_umulh_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_umulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_umulh_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-; Shifts
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_asr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_asr_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_asr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_asr_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_asr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_asr_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_asr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_asr_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_lsl_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsl_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_lsl_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsl_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_lsl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsl_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_lsl_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsl_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_lsr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsr_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_lsr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsr_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_lsr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsr_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_lsr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsr_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-; Logical operations
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_and_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_and_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_and_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_and_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_and_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_and_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_and_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_and_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_bic_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_bic_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_bic_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_bic_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_bic_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_bic_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_bic_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_bic_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_eor_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_eor_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_eor_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_eor_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_eor_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_eor_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_eor_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_eor_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_orr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_orr_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_orr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_orr_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_orr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_orr_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_orr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_orr_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-; SVE2 - Uniform DSP operations
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[A]]
-;
-  %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> zeroinitializer, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %1
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[A]]
-;
-  %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %1
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
-;
-  %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %1
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[A]]
-;
-  %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %1
-}
-
-attributes #0 = { "target-features"="+sve,+sve2" }

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll
index 1833bb6db248d..3d5fab800aaee 100644
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-cmp.ll
@@ -179,11 +179,21 @@ entry:
 }
 
 define <vscale x 8 x i1> @test_facge(<vscale x 8 x half> %a, <vscale x 8 x half> %b){ry:
+; CHECK-LABEL: define <vscale x 8 x i1> @test_facge(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:  [[RY:.*:]]
+; CHECK-NEXT:    ret <vscale x 8 x i1> zeroinitializer
+;
   %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.facge.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
   ret <vscale x 8 x i1> %0
 }
 
 define <vscale x 8 x i1> @test_facgt(<vscale x 8 x half> %a, <vscale x 8 x half> %b){ry:
+; CHECK-LABEL: define <vscale x 8 x i1> @test_facgt(
+; CHECK-SAME: <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) {
+; CHECK-NEXT:  [[RY:.*:]]
+; CHECK-NEXT:    ret <vscale x 8 x i1> zeroinitializer
+;
   %0 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.facgt.nxv8f16(<vscale x 8 x i1> zeroinitializer, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
   ret <vscale x 8 x i1> %0
 }

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
new file mode 100644
index 0000000000000..1b5c6a2d4c3fd
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll
@@ -0,0 +1,1237 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Replace SVE merging intrinsics with their first operand when they take an all false predicate.
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_abs_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_abs_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_add_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_add_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.addp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_addp_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_addp_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.addp.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.addp.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_and_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_and_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_asr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_asr_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_bic_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_bic_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_cls_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_cls_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cls.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_clz_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_clz_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.clz.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_cnot_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_cnot_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnot.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_cnt_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_cnt_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.cnt.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_eor_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_eor_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fabd_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fabd_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fabs_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fabs_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fadd_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fadd_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.faddp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_faddp_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_faddp_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.faddp.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.faddp.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_famax_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_famax_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_famin_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_famin_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fdiv_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fdiv_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fdivr_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fdivr_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x i32> @simplify_flogb_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_flogb_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.flogb.nxv4f32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmad_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmad_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmad.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmax_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmax_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmaxp_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmaxp_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxp.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxp.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmaxnm_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmaxnm_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnmp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmaxnmp_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmaxnmp_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnmp.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnmp.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmin_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmin_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fminp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fminp_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fminp_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminp.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminp.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fminnm_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fminnm_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fminnmp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fminnmp_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fminnmp_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnmp.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnmp.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmla_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmla_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmls_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmls_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmsb_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmsb_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmsb.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmul_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmul_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fmulx_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fmulx_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fneg_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fneg_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.fneg.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fnmad_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fnmad_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmad.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fnmla_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fnmla_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fnmls_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fnmls_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fnmsb_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fnmsb_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmsb.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.frecpx.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_frecpx_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_frecpx_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frecpx.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.frecpx.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.frinta.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_frinta_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_frinta_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frinta.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.frinta.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.frinti.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_frinti_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_frinti_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frinti.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.frinti.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.frintm.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_frintm_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_frintm_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintm.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintm.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.frintn.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_frintn_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_frintn_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintn.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintn.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.frintp.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_frintp_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_frintp_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintp.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintp.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.frintx.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_frintx_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_frintx_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintx.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintx.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.frintz.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_frintz_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_frintz_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintz.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.frintz.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsqrt.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fsqrt_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fsqrt_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsqrt.nxv4f32(<vscale x 4 x float> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call <vscale x 4 x float> @llvm.aarch64.sve.fsqrt.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fsub_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fsub_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x float> [[A]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @simplify_fsubr_intrinsic(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @simplify_fsubr_intrinsic
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_lsl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_lsl_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_lsr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_lsr_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_mad_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_mad_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mad.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_mla_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_mla_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_mls_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_mls_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_msb_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_msb_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.msb.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_mul_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_mul_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_neg_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_neg_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.neg.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_not_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_not_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.not.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_orr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_orr_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_rbit_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_rbit_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.rbit.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_revb_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_revb_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_revh_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_revh_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revh.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @simplify_revw_intrinsic(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @simplify_revw_intrinsic
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> [[A]], <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> [[B]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
+;
+  %r = tail call <vscale x 2 x i64> @llvm.aarch64.sve.revw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_revd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_revd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.revd.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sabd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sabd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sdiv_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sdiv_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sdivr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sdivr_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdivr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_shadd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_shadd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_shsub_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_shsub_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_shsubr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_shsubr_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_smax_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_smax_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smaxp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_smaxp_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_smaxp_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smaxp.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smaxp.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_smin_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_smin_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sminp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sminp_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sminp_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sminp.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sminp.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_smulh_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_smulh_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sqabs_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqabs_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sqadd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqadd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sqneg_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqneg_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqneg.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sqrshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqrshl_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sqshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqshl_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sqsub_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqsub_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sqsubr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sqsubr_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_srhadd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_srhadd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_srshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_srshl_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sub_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sub_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_subr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_subr_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.subr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_suqadd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_suqadd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sxtb_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sxtb_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxtb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_sxth_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_sxth_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sxth.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @simplify_sxtw_intrinsic(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @simplify_sxtw_intrinsic
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> [[A]], <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> [[B]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
+;
+  %r = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uabd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uabd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_udiv_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_udiv_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_udivr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_udivr_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udivr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uhadd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uhadd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uhsub_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uhsub_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uhsubr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uhsubr_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_umax_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_umax_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umaxp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_umaxp_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_umaxp_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umaxp.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umaxp.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_umin_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_umin_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uminp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uminp_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uminp_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uminp.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uminp.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_umulh_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_umulh_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uqadd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uqadd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uqrshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uqrshl_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uqshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uqshl_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uqsub_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uqsub_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[A]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uqsubr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uqsubr_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_urecpe_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_urecpe_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_urhadd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_urhadd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_urshl_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_urshl_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_ursqrte_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_ursqrte_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_usqadd_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_usqadd_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uxtb_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uxtb_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxtb.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @simplify_uxth_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uxth_intrinsic
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> [[A]], <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uxth.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @simplify_uxtw_intrinsic(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: define <vscale x 2 x i64> @simplify_uxtw_intrinsic
+; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> [[A]], <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> [[B]])
+; CHECK-NEXT:    ret <vscale x 2 x i64> [[R]]
+;
+  %r = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> zeroinitializer, <vscale x 2 x i64> %b)
+  ret <vscale x 2 x i64> %r
+}
+
+attributes #0 = { "target-features"="+sve" }

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
new file mode 100644
index 0000000000000..59f5e56cfd38a
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll
@@ -0,0 +1,448 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -S -passes=instcombine < %s | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Simplify SVE merging intrinics into their equivalent undef (_u) variants when they take an all true predicate.
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_add_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_add_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_and_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_and_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_asr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_asr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_bic_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_bic_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_eor_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_eor_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fabd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fabd_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fadd_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = fadd fast <vscale x 4 x float> [[A]], [[B]]
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_famax_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_famax_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_famin_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_famin_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fdiv_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fdiv_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmax_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmax_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmin_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmin_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fminnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fminnm_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmla_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmls_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmul_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = fmul fast <vscale x 4 x float> [[A]], [[B]]
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fmulx_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fmulx_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fnmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmla_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fnmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmls_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @replace_fsub_intrinsic_float
+; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = fsub fast <vscale x 4 x float> [[A]], [[B]]
+; CHECK-NEXT:    ret <vscale x 4 x float> [[R]]
+;
+  %r = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+  ret <vscale x 4 x float> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_lsl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsl_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_lsr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_mla_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_mla_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_mls_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_mls_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_mul_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_mul_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_orr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_orr_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_sabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sabd_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_sdiv_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sdiv_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sdiv.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_smax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_smax_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_smin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_smin_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_smulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_smulh_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_sub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_sub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_uabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uabd_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_udiv_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_udiv_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.udiv.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_umax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_umax_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_umin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_umin_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_umulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_umulh_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
+; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32
+; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
+; CHECK-NEXT:    ret <vscale x 4 x i32> [[R]]
+;
+  %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+  ret <vscale x 4 x i32> %r
+}
+
+attributes #0 = { "target-features"="+sve,+sve2" }

diff  --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll
deleted file mode 100644
index bbf4b3c65c30a..0000000000000
--- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsics-combine-to-u-forms.ll
+++ /dev/null
@@ -1,2021 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
-; RUN: opt -S -passes=instcombine < %s | FileCheck %s
-
-target triple = "aarch64-unknown-linux-gnu"
-
-declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
-declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
-declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
-declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
-
-; Replace with SVE merging intrinsics to their equivalent undef (_u) variants when they take an all active predicate.
-
-; Float arithmetic
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fabd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fabd_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fabd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fabd_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fabd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fabd_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fabd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fabd_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-; aarch64_sve_fadd intrinsic combines to a LLVM instruction fadd.
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fadd_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <vscale x 8 x half> [[A]], [[B]]
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fadd_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <vscale x 4 x float> [[A]], [[B]]
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fadd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fadd_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <vscale x 2 x double> [[A]], [[B]]
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fadd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fadd_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fdiv_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fdiv_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fdiv_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fdiv_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fdiv_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fdiv_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fdiv_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fdiv_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmax_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmax_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmax_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmax_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmax_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmax_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fmax_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fmax_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmaxnm_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmaxnm_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmaxnm_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fmaxnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fmaxnm_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmin_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmin_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmin_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmin_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmin_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmin_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fmin_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fmin_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fminnm_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fminnm_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fminnm_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fminnm_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fminnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fminnm_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fminnm_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fminnm_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmla_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmla.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmla_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmla.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmla_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fmla_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmla.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %2
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmls_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmls.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmls_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmls.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmls_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fmls_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmls.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %2
-}
-
-; aarch64_sve_fmul intrinsic combines to a LLVM instruction fmul.
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmul_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <vscale x 8 x half> [[A]], [[B]]
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmul_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <vscale x 4 x float> [[A]], [[B]]
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmul_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmul_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast <vscale x 2 x double> [[A]], [[B]]
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fmul_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fmul_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fmulx_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fmulx_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fmulx_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fmulx_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fmulx_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fmulx_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fmulx_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fmulx_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fnmla_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmla_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmla.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fnmla_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmla_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmla.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fnmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmla_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fnmla_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fnmla_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmla.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %2
-}
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fnmls_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fnmls_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]], <vscale x 8 x half> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.u.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]], <vscale x 8 x half> [[C]])
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fnmls.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fnmls_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fnmls_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]], <vscale x 4 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.u.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]], <vscale x 4 x float> [[C]])
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fnmls.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fnmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fnmls_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.u.nxv2f64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fnmls_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fnmls_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]], <vscale x 2 x double> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]], <vscale x 2 x double> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fnmls.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
-  ret <vscale x 2 x double> %2
-}
-
-; aarch64_sve_fsub intrinsic combines to a LLVM instruction fsub.
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x half> @replace_fsub_intrinsic_half
-; CHECK-SAME: (<vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <vscale x 8 x half> [[A]], [[B]]
-; CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-  ret <vscale x 8 x half> %2
-}
-
-declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x float> @replace_fsub_intrinsic_float
-; CHECK-SAME: (<vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <vscale x 4 x float> [[A]], [[B]]
-; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-  ret <vscale x 4 x float> %2
-}
-
-declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @replace_fsub_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast <vscale x 2 x double> [[A]], [[B]]
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-define <vscale x 2 x double> @no_replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x double> @no_replace_fsub_intrinsic_double
-; CHECK-SAME: (<vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-  ret <vscale x 2 x double> %2
-}
-
-; Integer arithmetic
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_add_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_add_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.add.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_add_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_add_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.add.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_add_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_add_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_add_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_add_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_add_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_add_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.add.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_mla_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_mla_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]], <vscale x 16 x i8> [[C]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mla.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_mla_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_mla_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]], <vscale x 8 x i16> [[C]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mla.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_mla_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_mla_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mla.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_mla_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_mla_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_mla_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_mla_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mla.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_mls_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_mls_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]], <vscale x 16 x i8> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]], <vscale x 16 x i8> [[C]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mls.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_mls_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_mls_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]], <vscale x 8 x i16> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]], <vscale x 8 x i16> [[C]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mls.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_mls_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_mls_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]], <vscale x 4 x i32> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]], <vscale x 4 x i32> [[C]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mls.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_mls_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_mls_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_mls_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_mls_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]], <vscale x 2 x i64> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]], <vscale x 2 x i64> [[C]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mls.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_mul_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_mul_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.mul.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_mul_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_mul_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.mul.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_mul_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_mul_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.mul.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_mul_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_mul_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_mul_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_mul_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.mul.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_sabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_sabd_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sabd.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_sabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_sabd_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sabd.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_sabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_sabd_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sabd.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_sabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_sabd_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_sabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_sabd_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sabd.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_smax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_smax_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smax.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_smax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_smax_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smax.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_smax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_smax_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_smax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_smax_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_smax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_smax_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smax.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_smin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_smin_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smin.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_smin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_smin_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smin.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_smin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_smin_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smin.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_smin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_smin_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_smin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_smin_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smin.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_smulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_smulh_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.smulh.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_smulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_smulh_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.smulh.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_smulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_smulh_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.smulh.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_smulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_smulh_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_smulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_smulh_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.smulh.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_sub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_sub_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sub.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_sub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_sub_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sub.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_sub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_sub_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sub.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_sub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_sub_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_sub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_sub_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_uabd_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_uabd_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uabd.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_uabd_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_uabd_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uabd.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_uabd_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_uabd_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uabd.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_uabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_uabd_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_uabd_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_uabd_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uabd.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_umax_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_umax_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umax.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_umax_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_umax_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umax.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_umax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_umax_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_umax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_umax_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_umax_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_umax_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umax.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_umin_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_umin_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umin.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_umin_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_umin_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umin.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_umin_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_umin_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umin.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_umin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_umin_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_umin_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_umin_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umin.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_umulh_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_umulh_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.umulh.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_umulh_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_umulh_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.umulh.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_umulh_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_umulh_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.umulh.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_umulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_umulh_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_umulh_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_umulh_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.umulh.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-; Shifts
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_asr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_asr_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_asr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_asr_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_asr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_asr_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_asr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_asr_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_asr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_asr_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_lsl_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsl_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_lsl_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsl_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_lsl_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsl_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_lsl_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsl_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_lsl_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_lsl_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_lsr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_lsr_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_lsr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_lsr_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_lsr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_lsr_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_lsr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_lsr_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_lsr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_lsr_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-; Logical operations
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_and_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_and_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.and.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_and_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_and_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.and.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_and_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_and_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.and.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_and_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_and_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_and_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_and_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.and.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_bic_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_bic_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.bic.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_bic_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_bic_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.bic.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_bic_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_bic_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.bic.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_bic_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_bic_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_bic_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_bic_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.bic.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_eor_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_eor_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.eor.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_eor_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_eor_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.eor.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_eor_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_eor_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.eor.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_eor_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_eor_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_eor_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_eor_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.eor.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_orr_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_orr_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.orr.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_orr_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_orr_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.orr.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_orr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_orr_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.orr.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_orr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_orr_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_orr_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_orr_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.orr.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-; SVE2 - Uniform DSP operations
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_sqsub_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_sqsub_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_sqsub_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_sqsub_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_sqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_sqsub_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) #0 {
-; CHECK-LABEL: define <vscale x 16 x i8> @replace_uqsub_intrinsic_i8
-; CHECK-SAME: (<vscale x 16 x i8> [[A:%.*]], <vscale x 16 x i8> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.u.nxv16i8(<vscale x 16 x i1> splat (i1 true), <vscale x 16 x i8> [[A]], <vscale x 16 x i8> [[B]])
-; CHECK-NEXT:    ret <vscale x 16 x i8> [[TMP1]]
-;
-  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
-  %2 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %1, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
-  ret <vscale x 16 x i8> %2
-}
-
-declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) #0 {
-; CHECK-LABEL: define <vscale x 8 x i16> @replace_uqsub_intrinsic_i16
-; CHECK-SAME: (<vscale x 8 x i16> [[A:%.*]], <vscale x 8 x i16> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.u.nxv8i16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x i16> [[A]], <vscale x 8 x i16> [[B]])
-; CHECK-NEXT:    ret <vscale x 8 x i16> [[TMP1]]
-;
-  %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-  %2 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %1, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
-  ret <vscale x 8 x i16> %2
-}
-
-declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 {
-; CHECK-LABEL: define <vscale x 4 x i32> @replace_uqsub_intrinsic_i32
-; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]])
-; CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
-;
-  %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-  %2 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
-  ret <vscale x 4 x i32> %2
-}
-
-declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
-define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @replace_uqsub_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.u.nxv2i64(<vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-define <vscale x 2 x i64> @no_replace_uqsub_intrinsic_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) #0 {
-; CHECK-LABEL: define <vscale x 2 x i64> @no_replace_uqsub_intrinsic_i64
-; CHECK-SAME: (<vscale x 2 x i64> [[A:%.*]], <vscale x 2 x i64> [[B:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> [[TMP1]], <vscale x 2 x i64> [[A]], <vscale x 2 x i64> [[B]])
-; CHECK-NEXT:    ret <vscale x 2 x i64> [[TMP2]]
-;
-  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
-  %2 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %1, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
-  ret <vscale x 2 x i64> %2
-}
-
-attributes #0 = { "target-features"="+sve,+sve2" }


        


More information about the llvm-commits mailing list