[clang] [llvm] [AArch64] Implement intrinsics for SVE FAMIN/FAMAX (PR #99042)

Thu Aug 29 09:32:32 PDT 2024

https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/99042

>From aa74d04751558f3ab47d566c91fb8ad178df0dce Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 16 Jul 2024 13:37:34 +0100
Subject: [PATCH 1/3] [AArch64] Implement intrinsics for SVE FAMIN/FAMAX

This patch implements the following intrinsics:

* Floating-point absolute maximum (predicated)

  svfloat16_t svamax[_f16]_m(svbool_t, svfloat16_t, svfloat16_t);
  svfloat16_t svamax[_f16]_x(svbool_t, svfloat16_t, svfloat16_t);
  svfloat16_t svamax[_f16]_z(svbool_t, svfloat16_t, svfloat16_t);

  svfloat16_t svamax[_n_f16]_m(svbool_t, svfloat16_t, float16_t);
  svfloat16_t svamax[_n_f16]_x(svbool_t, svfloat16_t, float16_t);
  svfloat16_t svamax[_n_f16]_z(svbool_t, svfloat16_t, float16_t);

* Floating-point absolute minimum (predicated)

  svfloat16_t svmin[_f16]_m(svbool_t, svfloat16_t, svfloat16_t);
  svfloat16_t svmin[_f16]_x(svbool_t, svfloat16_t, svfloat16_t);
  svfloat16_t svmin[_f16]_z(svbool_t, svfloat16_t, svfloat16_t);

  svfloat16_t svmin[_n_f16]_m(svbool_t, svfloat16_t, float16_t);
  svfloat16_t svmin[_n_f16]_x(svbool_t, svfloat16_t, float16_t);
  svfloat16_t svmin[_n_f16]_z(svbool_t, svfloat16_t, float16_t);

All the intrinsics have also variants for `f32` and `f64`, and have
the `__arm_streaming` attribute.

(cf. https://github.com/ARM-software/acle/pull/324)
---
 clang/include/clang/Basic/arm_sve.td          |   5 +
 .../acle_sve2_faminmax.c                      | 775 ++++++++++++++++++
 llvm/include/llvm/IR/IntrinsicsAArch64.td     |   7 +
 .../Target/AArch64/AArch64ISelLowering.cpp    |   8 +
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   2 +
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  14 +-
 .../AArch64/sve2-intrinsics-faminmax.ll       | 266 ++++++
 7 files changed, 1075 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c
 create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll

diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 078373823a3b6f..b40ce9b4d11b56 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2401,3 +2401,8 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
   def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
   def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
 }
+
+let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in {
+  defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">;
+  defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">;
+}
diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c
new file mode 100644
index 00000000000000..3cf7d99d606f32
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c
@@ -0,0 +1,775 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+// RUN: %clang_cc1 -x c++ -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include "arm_sme.h"
+#else
+#include "arm_sve.h"
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_mu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_f16_m(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_xu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_f16_x(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_zu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famin_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_mu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_n_f16_m(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_xu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_n_f16_x(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_zu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famin_n_f16_z(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_mu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_f32_m(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_xu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_f32_x(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_zu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famin_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_mu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_n_f32_m(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_xu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_n_f32_x(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_zu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famin_n_f32_z(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_mu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_f64_m(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_xu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_f64_x(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_zu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famin_f64_z(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _f64, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_mu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_n_f64_m(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_xu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_n_f64_x(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_zu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famin_n_f64_z(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamin, _n_f64, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_mu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_f16_m(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_xu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_f16_x(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_zu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famax_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_mu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_n_f16_m(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_xu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_n_f16_x(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_zu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famax_n_f16_z(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_mu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_f32_m(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_xu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_f32_x(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_zu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famax_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_mu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_n_f32_m(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_xu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_n_f32_x(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_zu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famax_n_f32_z(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_mu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_f64_m(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_xu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_f64_x(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_zu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famax_f64_z(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _f64, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_mu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_n_f64_m(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_xu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_n_f64_x(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_zu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT:  [[ENTRY:.*:]]
+// CHECK-CPP-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT:    [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famax_n_f64_z(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+  return SVE_ACLE_FUNC(svamax, _n_f64, _z)(pg, a, b);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 3735bf5222fce3..ca4af6c2603ea6 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3730,3 +3730,10 @@ def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic
 def int_aarch64_sme_mopa_nonwide : SME_OuterProduct_Intrinsic;
 def int_aarch64_sme_mops_nonwide : SME_OuterProduct_Intrinsic;
 
+// SVE2/SME2 - Floating point absolute maximum and minimum
+
+def int_aarch64_sve_famax   : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_famax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
+
+def int_aarch64_sve_famin   : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 215f30128e7038..b1aa3c65745161 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2726,6 +2726,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::FMINNMV_PRED)
     MAKE_CASE(AArch64ISD::FMUL_PRED)
     MAKE_CASE(AArch64ISD::FSUB_PRED)
+    MAKE_CASE(AArch64ISD::FAMAX_PRED)
+    MAKE_CASE(AArch64ISD::FAMIN_PRED)
     MAKE_CASE(AArch64ISD::RDSVL)
     MAKE_CASE(AArch64ISD::BIC)
     MAKE_CASE(AArch64ISD::CBZ)
@@ -22056,6 +22058,12 @@ static SDValue performIntrinsicCombine(SDNode *N,
                     AArch64CC::LAST_ACTIVE);
   case Intrinsic::aarch64_sve_whilelo:
     return tryCombineWhileLo(N, DCI, Subtarget);
+  case Intrinsic::aarch64_sve_famax_u:
+    return DAG.getNode(AArch64ISD::FAMAX_PRED, SDLoc(N), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2), N->getOperand(3));
+  case Intrinsic::aarch64_sve_famin_u:
+    return DAG.getNode(AArch64ISD::FAMIN_PRED, SDLoc(N), N->getValueType(0),
+                       N->getOperand(1), N->getOperand(2), N->getOperand(3));
   }
   return SDValue();
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 39d5df0de0eec7..cdeb988c3966e5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -135,6 +135,8 @@ enum NodeType : unsigned {
   UDIV_PRED,
   UMAX_PRED,
   UMIN_PRED,
+  FAMAX_PRED,
+  FAMIN_PRED,
 
   // Unpredicated vector instructions
   BIC,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 694b7fb2068a29..e733b4c02f7d47 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -218,6 +218,9 @@ def AArch64fsub_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3),
   return N->getFlags().hasAllowContract();
 }]>;
 
+def AArch64famax_p : SDNode<"AArch64ISD::FAMAX_PRED", SDT_AArch64Arith>;
+def AArch64famin_p : SDNode<"AArch64ISD::FAMIN_PRED", SDT_AArch64Arith>;
+
 def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
   SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
   SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
@@ -483,6 +486,8 @@ def AArch64fminnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fminnm,
 def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>;
 def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AArch64fmin_p>;
 def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>;
+def AArch64famax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famax, AArch64famax_p>;
+def AArch64famin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famin, AArch64famin_p>;
 
 def AArch64fadd : PatFrags<(ops node:$op1, node:$op2),
                             [(fadd node:$op1, node:$op2),
@@ -717,6 +722,11 @@ let Predicates = [HasSVEorSME] in {
   defm FDIV_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
 } // End HasSVEorSME
 
+let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
+  defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64famax_p>;
+  defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64famin_p>;
+}
+
 let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
   defm FADD_ZPZZ   : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
   defm FSUB_ZPZZ   : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
@@ -4168,8 +4178,8 @@ defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
 
 let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
 // FP8 Arithmetic - Predicated Group
-defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "", null_frag, DestructiveOther>;
-defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "", null_frag, DestructiveOther>;
+defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", AArch64famin_m1, DestructiveBinaryComm>;
+defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", AArch64famax_m1, DestructiveBinaryComm>;
 } // End HasSVE2orSME2, HasFAMINMAX
 
 let Predicates = [HasSSVE_FP8FMA] in {
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
new file mode 100644
index 00000000000000..123756e88f6347
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
@@ -0,0 +1,266 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -mattr=+sme2 -force-streaming < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define <vscale x 8 x half> @famin_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: famin_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+    %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+    ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @famin_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: famin_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+    %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+    ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @famin_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: famin_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+    %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+    ret <vscale x 2 x double> %r
+}
+
+define <vscale x 8 x half> @famin_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: famin_u_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+    %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+    ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @famin_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: famin_u_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+    %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+    ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @famin_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: famin_u_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+    %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+    ret <vscale x 2 x double> %r
+}
+
+define <vscale x 8 x half> @famax_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: famax_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+    %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+    ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @famax_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: famax_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+    %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+    ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @famax_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: famax_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+    %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+    ret <vscale x 2 x double> %r
+}
+
+define <vscale x 8 x half> @famax_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: famax_u_f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+    %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+    ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @famax_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: famax_u_f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+    %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+    ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @famax_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: famax_u_f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+    %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+    ret <vscale x 2 x double> %r
+}
+
+define <vscale x 8 x half> @select_famin_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: select_famin_f16a:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+    %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+    %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
+    ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @select_famin_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: select_famin_f16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+    %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
+    %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
+    ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @select_famin_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: select_famin_f32a:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+    %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+    %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
+    ret <vscale x 4 x float> %r
+}
+
+define <vscale x 4 x float> @select_famin_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: select_famin_f32b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+    %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
+    %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
+    ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @select_famin_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: select_famin_f64a:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+    %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+    %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
+    ret <vscale x 2 x double> %r
+}
+
+define <vscale x 2 x double> @select_famin_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: select_famin_f64b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+    %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
+    %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
+    ret <vscale x 2 x double> %r
+}
+
+
+define <vscale x 8 x half> @select_famax_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: select_famax_f16a:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+    %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+    %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
+    ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @select_famax_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: select_famax_f16b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+    %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
+    %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
+    ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @select_famax_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: select_famax_f32a:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+    %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+    %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
+    ret <vscale x 4 x float> %r
+}
+
+define <vscale x 4 x float> @select_famax_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: select_famax_f32b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+    %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
+    %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
+    ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @select_famax_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: select_famax_f64a:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+    %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+    %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
+    ret <vscale x 2 x double> %r
+}
+
+define <vscale x 2 x double> @select_famax_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: select_famax_f64b:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    famax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+    %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+    %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
+    %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
+    ret <vscale x 2 x double> %r
+}
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+attributes #0 = { nounwind "target-features" = "+faminmax" }

>From 7dcd5a7cb5558226f41acabc8d84b166dee74f64 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Thu, 29 Aug 2024 10:41:44 +0100
Subject: [PATCH 2/3] [fixup] Remove AArch64ISD::FA{MIN,MAX}_PRED

---
 .../Target/AArch64/AArch64ISelLowering.cpp    |   8 -
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   2 -
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  13 +-
 .../AArch64/sve2-intrinsics-faminmax.ll       | 151 ------------------
 4 files changed, 4 insertions(+), 170 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b1aa3c65745161..215f30128e7038 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2726,8 +2726,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::FMINNMV_PRED)
     MAKE_CASE(AArch64ISD::FMUL_PRED)
     MAKE_CASE(AArch64ISD::FSUB_PRED)
-    MAKE_CASE(AArch64ISD::FAMAX_PRED)
-    MAKE_CASE(AArch64ISD::FAMIN_PRED)
     MAKE_CASE(AArch64ISD::RDSVL)
     MAKE_CASE(AArch64ISD::BIC)
     MAKE_CASE(AArch64ISD::CBZ)
@@ -22058,12 +22056,6 @@ static SDValue performIntrinsicCombine(SDNode *N,
                     AArch64CC::LAST_ACTIVE);
   case Intrinsic::aarch64_sve_whilelo:
     return tryCombineWhileLo(N, DCI, Subtarget);
-  case Intrinsic::aarch64_sve_famax_u:
-    return DAG.getNode(AArch64ISD::FAMAX_PRED, SDLoc(N), N->getValueType(0),
-                       N->getOperand(1), N->getOperand(2), N->getOperand(3));
-  case Intrinsic::aarch64_sve_famin_u:
-    return DAG.getNode(AArch64ISD::FAMIN_PRED, SDLoc(N), N->getValueType(0),
-                       N->getOperand(1), N->getOperand(2), N->getOperand(3));
   }
   return SDValue();
 }
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index cdeb988c3966e5..39d5df0de0eec7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -135,8 +135,6 @@ enum NodeType : unsigned {
   UDIV_PRED,
   UMAX_PRED,
   UMIN_PRED,
-  FAMAX_PRED,
-  FAMIN_PRED,
 
   // Unpredicated vector instructions
   BIC,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e733b4c02f7d47..0573bd717caa5b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -218,9 +218,6 @@ def AArch64fsub_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3),
   return N->getFlags().hasAllowContract();
 }]>;
 
-def AArch64famax_p : SDNode<"AArch64ISD::FAMAX_PRED", SDT_AArch64Arith>;
-def AArch64famin_p : SDNode<"AArch64ISD::FAMIN_PRED", SDT_AArch64Arith>;
-
 def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
   SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
   SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
@@ -486,8 +483,6 @@ def AArch64fminnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fminnm,
 def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>;
 def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AArch64fmin_p>;
 def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>;
-def AArch64famax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famax, AArch64famax_p>;
-def AArch64famin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famin, AArch64famin_p>;
 
 def AArch64fadd : PatFrags<(ops node:$op1, node:$op2),
                             [(fadd node:$op1, node:$op2),
@@ -723,8 +718,8 @@ let Predicates = [HasSVEorSME] in {
 } // End HasSVEorSME
 
 let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
-  defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64famax_p>;
-  defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64famin_p>;
+  defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famax_u>;
+  defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famin_u>;
 }
 
 let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
@@ -4178,8 +4173,8 @@ defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
 
 let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
 // FP8 Arithmetic - Predicated Group
-defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", AArch64famin_m1, DestructiveBinaryComm>;
-defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", AArch64famax_m1, DestructiveBinaryComm>;
+defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", int_aarch64_sve_famin, DestructiveBinaryComm>;
+defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", int_aarch64_sve_famax, DestructiveBinaryComm>;
 } // End HasSVE2orSME2, HasFAMINMAX
 
 let Predicates = [HasSSVE_FP8FMA] in {
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
index 123756e88f6347..0d247328a9475d 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
@@ -112,155 +112,4 @@ define <vscale x 2 x double> @famax_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
     ret <vscale x 2 x double> %r
 }
 
-define <vscale x 8 x half> @select_famin_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: select_famin_f16a:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-    %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-    %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
-    ret <vscale x 8 x half> %r
-}
-
-define <vscale x 8 x half> @select_famin_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: select_famin_f16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-    %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
-    %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
-    ret <vscale x 8 x half> %r
-}
-
-define <vscale x 4 x float> @select_famin_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: select_famin_f32a:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-    %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-    %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
-    ret <vscale x 4 x float> %r
-}
-
-define <vscale x 4 x float> @select_famin_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: select_famin_f32b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-    %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
-    %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
-    ret <vscale x 4 x float> %r
-}
-
-define <vscale x 2 x double> @select_famin_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: select_famin_f64a:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-    %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-    %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
-    ret <vscale x 2 x double> %r
-}
-
-define <vscale x 2 x double> @select_famin_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: select_famin_f64b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-    %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
-    %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
-    ret <vscale x 2 x double> %r
-}
-
-
-define <vscale x 8 x half> @select_famax_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: select_famax_f16a:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-    %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
-    %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
-    ret <vscale x 8 x half> %r
-}
-
-define <vscale x 8 x half> @select_famax_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: select_famax_f16b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
-    %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
-    %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
-    ret <vscale x 8 x half> %r
-}
-
-define <vscale x 4 x float> @select_famax_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: select_famax_f32a:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-    %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
-    %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
-    ret <vscale x 4 x float> %r
-}
-
-define <vscale x 4 x float> @select_famax_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: select_famax_f32b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
-    %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
-    %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
-    ret <vscale x 4 x float> %r
-}
-
-define <vscale x 2 x double> @select_famax_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: select_famax_f64a:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-    %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
-    %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
-    ret <vscale x 2 x double> %r
-}
-
-define <vscale x 2 x double> @select_famax_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: select_famax_f64b:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    famax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT:    ret
-    %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
-    %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
-    %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
-    ret <vscale x 2 x double> %r
-}
-
-declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
-declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
-declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-
 attributes #0 = { nounwind "target-features" = "+faminmax" }

>From 261e75c84200467514473f64b213ba31b761dec9 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Thu, 29 Aug 2024 17:29:42 +0100
Subject: [PATCH 3/3] [fixup] Stuff (NFC)

---
 llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td       |  9 +++------
 .../test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll | 12 ++++++------
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 0573bd717caa5b..ee2948c9b9f106 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -717,11 +717,6 @@ let Predicates = [HasSVEorSME] in {
   defm FDIV_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
 } // End HasSVEorSME
 
-let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
-  defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famax_u>;
-  defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famin_u>;
-}
-
 let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
   defm FADD_ZPZZ   : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
   defm FSUB_ZPZZ   : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
@@ -4172,9 +4167,11 @@ defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
 } // End HasSVE2orSME2, HasFP8
 
 let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
-// FP8 Arithmetic - Predicated Group
 defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", int_aarch64_sve_famin, DestructiveBinaryComm>;
 defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", int_aarch64_sve_famax, DestructiveBinaryComm>;
+
+defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famax_u>;
+defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famin_u>;
 } // End HasSVE2orSME2, HasFAMINMAX
 
 let Predicates = [HasSSVE_FP8FMA] in {
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
index 0d247328a9475d..7d16f8383d9682 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
@@ -36,7 +36,7 @@ define <vscale x 8 x half> @famin_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x hal
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    famin z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    ret
-    %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+    %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
     ret <vscale x 8 x half> %r
 }
 
@@ -45,7 +45,7 @@ define <vscale x 4 x float> @famin_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x fl
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    famin z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    ret
-    %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+    %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
     ret <vscale x 4 x float> %r
 }
 
@@ -54,7 +54,7 @@ define <vscale x 2 x double> @famin_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    famin z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
-    %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+    %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
     ret <vscale x 2 x double> %r
 }
 
@@ -90,7 +90,7 @@ define <vscale x 8 x half> @famax_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x hal
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    famax z0.h, p0/m, z0.h, z1.h
 ; CHECK-NEXT:    ret
-    %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+    %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
     ret <vscale x 8 x half> %r
 }
 
@@ -99,7 +99,7 @@ define <vscale x 4 x float> @famax_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x fl
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    famax z0.s, p0/m, z0.s, z1.s
 ; CHECK-NEXT:    ret
-    %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+    %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
     ret <vscale x 4 x float> %r
 }
 
@@ -108,7 +108,7 @@ define <vscale x 2 x double> @famax_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    famax z0.d, p0/m, z0.d, z1.d
 ; CHECK-NEXT:    ret
-    %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+    %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
     ret <vscale x 2 x double> %r
 }