[clang] [llvm] [AArch64] Implement intrinsics for SVE FAMIN/FAMAX (PR #99042)
Momchil Velikov via cfe-commits
cfe-commits at lists.llvm.org
Thu Aug 29 09:32:32 PDT 2024
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/99042
>From aa74d04751558f3ab47d566c91fb8ad178df0dce Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Tue, 16 Jul 2024 13:37:34 +0100
Subject: [PATCH 1/3] [AArch64] Implement intrinsics for SVE FAMIN/FAMAX
This patch implements the following intrinsics:
* Floating-point absolute maximum (predicated)
svfloat16_t svamax[_f16]_m(svbool_t, svfloat16_t, svfloat16_t);
svfloat16_t svamax[_f16]_x(svbool_t, svfloat16_t, svfloat16_t);
svfloat16_t svamax[_f16]_z(svbool_t, svfloat16_t, svfloat16_t);
svfloat16_t svamax[_n_f16]_m(svbool_t, svfloat16_t, float16_t);
svfloat16_t svamax[_n_f16]_x(svbool_t, svfloat16_t, float16_t);
svfloat16_t svamax[_n_f16]_z(svbool_t, svfloat16_t, float16_t);
* Floating-point absolute minimum (predicated)
svfloat16_t svmin[_f16]_m(svbool_t, svfloat16_t, svfloat16_t);
svfloat16_t svmin[_f16]_x(svbool_t, svfloat16_t, svfloat16_t);
svfloat16_t svmin[_f16]_z(svbool_t, svfloat16_t, svfloat16_t);
svfloat16_t svmin[_n_f16]_m(svbool_t, svfloat16_t, float16_t);
svfloat16_t svmin[_n_f16]_x(svbool_t, svfloat16_t, float16_t);
svfloat16_t svmin[_n_f16]_z(svbool_t, svfloat16_t, float16_t);
All the intrinsics have also variants for `f32` and `f64`, and have
the `__arm_streaming` attribute.
(cf. https://github.com/ARM-software/acle/pull/324)
---
clang/include/clang/Basic/arm_sve.td | 5 +
.../acle_sve2_faminmax.c | 775 ++++++++++++++++++
llvm/include/llvm/IR/IntrinsicsAArch64.td | 7 +
.../Target/AArch64/AArch64ISelLowering.cpp | 8 +
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 14 +-
.../AArch64/sve2-intrinsics-faminmax.ll | 266 ++++++
7 files changed, 1075 insertions(+), 2 deletions(-)
create mode 100644 clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c
create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td
index 078373823a3b6f..b40ce9b4d11b56 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2401,3 +2401,8 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>;
}
+
+let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in {
+ defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">;
+ defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">;
+}
diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c
new file mode 100644
index 00000000000000..3cf7d99d606f32
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_faminmax.c
@@ -0,0 +1,775 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
+
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+// RUN: %clang_cc1 -x c++ -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CPP
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include "arm_sme.h"
+#else
+#include "arm_sve.h"
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_mu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_f16_m(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_xu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_f16_x(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famin_f16_zu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famin_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_mu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_n_f16_m(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_xu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famin_n_f16_x(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famin_n_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famin_n_f16_zu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famin_n_f16_z(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_mu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_f32_m(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_xu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_f32_x(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famin_f32_zu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famin_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_mu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_n_f32_m(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_xu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famin_n_f32_x(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famin_n_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famin_n_f32_zu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famin_n_f32_z(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_mu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_f64_m(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_xu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_f64_x(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famin_f64_zu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famin_f64_z(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _f64, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_mu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_n_f64_m(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_xu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famin_n_f64_x(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famin_n_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famin_n_f64_zu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famin_n_f64_z(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamin, _n_f64, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_mu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_f16_m(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_xu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_f16_x(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z16test_famax_f16_zu10__SVBool_tu13__SVFloat16_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], <vscale x 8 x half> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famax_f16_z(svbool_t pg, svfloat16_t a, svfloat16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_mu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_n_f16_m(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _n_f16, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_xu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP1]]
+//
+svfloat16_t test_famax_n_f16_x(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _n_f16, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 8 x half> @test_famax_n_f16_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 8 x half> @_Z18test_famax_n_f16_zu10__SVBool_tu13__SVFloat16_tDh(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 8 x half> [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x half> poison, half [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x half> [[DOTSPLATINSERT]], <vscale x 8 x half> poison, <vscale x 8 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[A]], <vscale x 8 x half> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x half> [[TMP1]], <vscale x 8 x half> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 8 x half> [[TMP2]]
+//
+svfloat16_t test_famax_n_f16_z(svbool_t pg, svfloat16_t a, float16_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _n_f16, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_mu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_f32_m(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_xu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_f32_x(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z16test_famax_f32_zu10__SVBool_tu13__SVFloat32_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], <vscale x 4 x float> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famax_f32_z(svbool_t pg, svfloat32_t a, svfloat32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_mu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_n_f32_m(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _n_f32, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_xu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP1]]
+//
+svfloat32_t test_famax_n_f32_x(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _n_f32, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 4 x float> @test_famax_n_f32_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 4 x float> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 4 x float> @_Z18test_famax_n_f32_zu10__SVBool_tu13__SVFloat32_tf(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 4 x float> [[A:%.*]], float noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[A]], <vscale x 4 x float> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 4 x float> [[TMP2]]
+//
+svfloat32_t test_famax_n_f32_z(svbool_t pg, svfloat32_t a, float32_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _n_f32, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_mu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_f64_m(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_xu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_f64_x(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z16test_famax_f64_zu10__SVBool_tu13__SVFloat64_tS0_(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], <vscale x 2 x double> [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[B]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famax_f64_z(svbool_t pg, svfloat64_t a, svfloat64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _f64, _z)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_m(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_mu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_n_f64_m(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _n_f64, _m)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_x(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_xu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t test_famax_n_f64_x(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _n_f64, _x)(pg, a, b);
+}
+
+// CHECK-LABEL: define dso_local <vscale x 2 x double> @test_famax_n_f64_z(
+// CHECK-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-NEXT: ret <vscale x 2 x double> [[TMP2]]
+//
+// CHECK-CPP-LABEL: define dso_local <vscale x 2 x double> @_Z18test_famax_n_f64_zu10__SVBool_tu13__SVFloat64_td(
+// CHECK-CPP-SAME: <vscale x 16 x i1> [[PG:%.*]], <vscale x 2 x double> [[A:%.*]], double noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {
+// CHECK-CPP-NEXT: [[ENTRY:.*:]]
+// CHECK-CPP-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG]])
+// CHECK-CPP-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x double> poison, double [[B]], i64 0
+// CHECK-CPP-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x double> [[DOTSPLATINSERT]], <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP1:%.*]] = select <vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[A]], <vscale x 2 x double> zeroinitializer
+// CHECK-CPP-NEXT: [[TMP2:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[TMP1]], <vscale x 2 x double> [[DOTSPLAT]])
+// CHECK-CPP-NEXT: ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t test_famax_n_f64_z(svbool_t pg, svfloat64_t a, float64_t b) STREAMING {
+ return SVE_ACLE_FUNC(svamax, _n_f64, _z)(pg, a, b);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 3735bf5222fce3..ca4af6c2603ea6 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -3730,3 +3730,10 @@ def int_aarch64_sve_pmov_to_vector_lane_zeroing : SVE2_Pred_1VectorArg_Intrinsic
def int_aarch64_sme_mopa_nonwide : SME_OuterProduct_Intrinsic;
def int_aarch64_sme_mops_nonwide : SME_OuterProduct_Intrinsic;
+// SVE2/SME2 - Floating point absolute maximum and minimum
+
+def int_aarch64_sve_famax : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_famax_u : AdvSIMD_Pred2VectorArg_Intrinsic;
+
+def int_aarch64_sve_famin : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 215f30128e7038..b1aa3c65745161 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2726,6 +2726,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
MAKE_CASE(AArch64ISD::FMUL_PRED)
MAKE_CASE(AArch64ISD::FSUB_PRED)
+ MAKE_CASE(AArch64ISD::FAMAX_PRED)
+ MAKE_CASE(AArch64ISD::FAMIN_PRED)
MAKE_CASE(AArch64ISD::RDSVL)
MAKE_CASE(AArch64ISD::BIC)
MAKE_CASE(AArch64ISD::CBZ)
@@ -22056,6 +22058,12 @@ static SDValue performIntrinsicCombine(SDNode *N,
AArch64CC::LAST_ACTIVE);
case Intrinsic::aarch64_sve_whilelo:
return tryCombineWhileLo(N, DCI, Subtarget);
+ case Intrinsic::aarch64_sve_famax_u:
+ return DAG.getNode(AArch64ISD::FAMAX_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
+ case Intrinsic::aarch64_sve_famin_u:
+ return DAG.getNode(AArch64ISD::FAMIN_PRED, SDLoc(N), N->getValueType(0),
+ N->getOperand(1), N->getOperand(2), N->getOperand(3));
}
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 39d5df0de0eec7..cdeb988c3966e5 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -135,6 +135,8 @@ enum NodeType : unsigned {
UDIV_PRED,
UMAX_PRED,
UMIN_PRED,
+ FAMAX_PRED,
+ FAMIN_PRED,
// Unpredicated vector instructions
BIC,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 694b7fb2068a29..e733b4c02f7d47 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -218,6 +218,9 @@ def AArch64fsub_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3),
return N->getFlags().hasAllowContract();
}]>;
+def AArch64famax_p : SDNode<"AArch64ISD::FAMAX_PRED", SDT_AArch64Arith>;
+def AArch64famin_p : SDNode<"AArch64ISD::FAMIN_PRED", SDT_AArch64Arith>;
+
def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
@@ -483,6 +486,8 @@ def AArch64fminnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fminnm,
def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>;
def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AArch64fmin_p>;
def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>;
+def AArch64famax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famax, AArch64famax_p>;
+def AArch64famin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famin, AArch64famin_p>;
def AArch64fadd : PatFrags<(ops node:$op1, node:$op2),
[(fadd node:$op1, node:$op2),
@@ -717,6 +722,11 @@ let Predicates = [HasSVEorSME] in {
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
} // End HasSVEorSME
+let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
+ defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64famax_p>;
+ defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64famin_p>;
+}
+
let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
@@ -4168,8 +4178,8 @@ defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
// FP8 Arithmetic - Predicated Group
-defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "", null_frag, DestructiveOther>;
-defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "", null_frag, DestructiveOther>;
+defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", AArch64famin_m1, DestructiveBinaryComm>;
+defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", AArch64famax_m1, DestructiveBinaryComm>;
} // End HasSVE2orSME2, HasFAMINMAX
let Predicates = [HasSSVE_FP8FMA] in {
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
new file mode 100644
index 00000000000000..123756e88f6347
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
@@ -0,0 +1,266 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mattr=+sve2 < %s | FileCheck %s
+; RUN: llc -mattr=+sme2 -force-streaming < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define <vscale x 8 x half> @famin_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: famin_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @famin_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: famin_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @famin_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: famin_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %r
+}
+
+define <vscale x 8 x half> @famin_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: famin_u_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @famin_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: famin_u_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @famin_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: famin_u_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %r
+}
+
+define <vscale x 8 x half> @famax_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: famax_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @famax_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: famax_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @famax_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: famax_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %r
+}
+
+define <vscale x 8 x half> @famax_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: famax_u_f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @famax_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: famax_u_f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @famax_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: famax_u_f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %r
+}
+
+define <vscale x 8 x half> @select_famin_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: select_famin_f16a:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @select_famin_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: select_famin_f16b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
+ %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @select_famin_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: select_famin_f32a:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+ %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
+ ret <vscale x 4 x float> %r
+}
+
+define <vscale x 4 x float> @select_famin_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: select_famin_f32b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
+ %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
+ ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @select_famin_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: select_famin_f64a:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+ %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
+ ret <vscale x 2 x double> %r
+}
+
+define <vscale x 2 x double> @select_famin_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: select_famin_f64b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
+ %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
+ ret <vscale x 2 x double> %r
+}
+
+
+define <vscale x 8 x half> @select_famax_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: select_famax_f16a:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 8 x half> @select_famax_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
+; CHECK-LABEL: select_famax_f16b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
+ %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
+ ret <vscale x 8 x half> %r
+}
+
+define <vscale x 4 x float> @select_famax_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: select_famax_f32a:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+ %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
+ ret <vscale x 4 x float> %r
+}
+
+define <vscale x 4 x float> @select_famax_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
+; CHECK-LABEL: select_famax_f32b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
+ %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
+ ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @select_famax_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: select_famax_f64a:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+ %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
+ ret <vscale x 2 x double> %r
+}
+
+define <vscale x 2 x double> @select_famax_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
+; CHECK-LABEL: select_famax_f64b:
+; CHECK: // %bb.0:
+; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
+ %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
+ ret <vscale x 2 x double> %r
+}
+
+declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+attributes #0 = { nounwind "target-features" = "+faminmax" }
>From 7dcd5a7cb5558226f41acabc8d84b166dee74f64 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Thu, 29 Aug 2024 10:41:44 +0100
Subject: [PATCH 2/3] [fixup] Remove AArch64ISD::FA{MIN,MAX}_PRED
---
.../Target/AArch64/AArch64ISelLowering.cpp | 8 -
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 -
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 13 +-
.../AArch64/sve2-intrinsics-faminmax.ll | 151 ------------------
4 files changed, 4 insertions(+), 170 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b1aa3c65745161..215f30128e7038 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2726,8 +2726,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::FMINNMV_PRED)
MAKE_CASE(AArch64ISD::FMUL_PRED)
MAKE_CASE(AArch64ISD::FSUB_PRED)
- MAKE_CASE(AArch64ISD::FAMAX_PRED)
- MAKE_CASE(AArch64ISD::FAMIN_PRED)
MAKE_CASE(AArch64ISD::RDSVL)
MAKE_CASE(AArch64ISD::BIC)
MAKE_CASE(AArch64ISD::CBZ)
@@ -22058,12 +22056,6 @@ static SDValue performIntrinsicCombine(SDNode *N,
AArch64CC::LAST_ACTIVE);
case Intrinsic::aarch64_sve_whilelo:
return tryCombineWhileLo(N, DCI, Subtarget);
- case Intrinsic::aarch64_sve_famax_u:
- return DAG.getNode(AArch64ISD::FAMAX_PRED, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3));
- case Intrinsic::aarch64_sve_famin_u:
- return DAG.getNode(AArch64ISD::FAMIN_PRED, SDLoc(N), N->getValueType(0),
- N->getOperand(1), N->getOperand(2), N->getOperand(3));
}
return SDValue();
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index cdeb988c3966e5..39d5df0de0eec7 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -135,8 +135,6 @@ enum NodeType : unsigned {
UDIV_PRED,
UMAX_PRED,
UMIN_PRED,
- FAMAX_PRED,
- FAMIN_PRED,
// Unpredicated vector instructions
BIC,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index e733b4c02f7d47..0573bd717caa5b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -218,9 +218,6 @@ def AArch64fsub_p_contract : PatFrag<(ops node:$op1, node:$op2, node:$op3),
return N->getFlags().hasAllowContract();
}]>;
-def AArch64famax_p : SDNode<"AArch64ISD::FAMAX_PRED", SDT_AArch64Arith>;
-def AArch64famin_p : SDNode<"AArch64ISD::FAMIN_PRED", SDT_AArch64Arith>;
-
def SDT_AArch64Arith_Imm : SDTypeProfile<1, 3, [
SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisVT<3,i32>,
SDTCVecEltisVT<1,i1>, SDTCisSameAs<0,2>
@@ -486,8 +483,6 @@ def AArch64fminnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fminnm,
def AArch64fmaxnm_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmaxnm, AArch64fmaxnm_p>;
def AArch64fmin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmin, AArch64fmin_p>;
def AArch64fmax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_fmax, AArch64fmax_p>;
-def AArch64famax_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famax, AArch64famax_p>;
-def AArch64famin_m1 : VSelectCommPredOrPassthruPatFrags<int_aarch64_sve_famin, AArch64famin_p>;
def AArch64fadd : PatFrags<(ops node:$op1, node:$op2),
[(fadd node:$op1, node:$op2),
@@ -723,8 +718,8 @@ let Predicates = [HasSVEorSME] in {
} // End HasSVEorSME
let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
- defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<AArch64famax_p>;
- defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<AArch64famin_p>;
+ defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famax_u>;
+ defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famin_u>;
}
let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
@@ -4178,8 +4173,8 @@ defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
// FP8 Arithmetic - Predicated Group
-defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", AArch64famin_m1, DestructiveBinaryComm>;
-defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", AArch64famax_m1, DestructiveBinaryComm>;
+defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", int_aarch64_sve_famin, DestructiveBinaryComm>;
+defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", int_aarch64_sve_famax, DestructiveBinaryComm>;
} // End HasSVE2orSME2, HasFAMINMAX
let Predicates = [HasSSVE_FP8FMA] in {
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
index 123756e88f6347..0d247328a9475d 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
@@ -112,155 +112,4 @@ define <vscale x 2 x double> @famax_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
ret <vscale x 2 x double> %r
}
-define <vscale x 8 x half> @select_famin_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: select_famin_f16a:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
- %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
- %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
- ret <vscale x 8 x half> %r
-}
-
-define <vscale x 8 x half> @select_famin_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: select_famin_f16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
- %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %m = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
- %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
- ret <vscale x 8 x half> %r
-}
-
-define <vscale x 4 x float> @select_famin_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: select_famin_f32a:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
- %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
- %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
- ret <vscale x 4 x float> %r
-}
-
-define <vscale x 4 x float> @select_famin_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: select_famin_f32b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
- %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %m = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
- %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
- ret <vscale x 4 x float> %r
-}
-
-define <vscale x 2 x double> @select_famin_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: select_famin_f64a:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
- %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
- %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
- ret <vscale x 2 x double> %r
-}
-
-define <vscale x 2 x double> @select_famin_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: select_famin_f64b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
- %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %m = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
- %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
- ret <vscale x 2 x double> %r
-}
-
-
-define <vscale x 8 x half> @select_famax_f16a(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: select_famax_f16a:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
- %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
- %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
- ret <vscale x 8 x half> %r
-}
-
-define <vscale x 8 x half> @select_famax_f16b(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
-; CHECK-LABEL: select_famax_f16b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
-; CHECK-NEXT: ret
- %all.true = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
- %m = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %all.true, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
- %r = select <vscale x 8 x i1> %pg, <vscale x 8 x half> %m, <vscale x 8 x half> %a
- ret <vscale x 8 x half> %r
-}
-
-define <vscale x 4 x float> @select_famax_f32a(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: select_famax_f32a:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
- %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
- %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
- ret <vscale x 4 x float> %r
-}
-
-define <vscale x 4 x float> @select_famax_f32b(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
-; CHECK-LABEL: select_famax_f32b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT: ret
- %all.true = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
- %m = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %all.true, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
- %r = select <vscale x 4 x i1> %pg, <vscale x 4 x float> %m, <vscale x 4 x float> %a
- ret <vscale x 4 x float> %r
-}
-
-define <vscale x 2 x double> @select_famax_f64a(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: select_famax_f64a:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
- %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
- %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
- ret <vscale x 2 x double> %r
-}
-
-define <vscale x 2 x double> @select_famax_f64b(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
-; CHECK-LABEL: select_famax_f64b:
-; CHECK: // %bb.0:
-; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
-; CHECK-NEXT: ret
- %all.true = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
- %m = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %all.true, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
- %r = select <vscale x 2 x i1> %pg, <vscale x 2 x double> %m, <vscale x 2 x double> %a
- ret <vscale x 2 x double> %r
-}
-
-declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
-declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
-declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.famin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.famin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.famin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-
-declare <vscale x 8 x half> @llvm.aarch64.sve.famax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.famax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.famax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-declare <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
-declare <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
-declare <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
-
attributes #0 = { nounwind "target-features" = "+faminmax" }
>From 261e75c84200467514473f64b213ba31b761dec9 Mon Sep 17 00:00:00 2001
From: Momchil Velikov <momchil.velikov at arm.com>
Date: Thu, 29 Aug 2024 17:29:42 +0100
Subject: [PATCH 3/3] [fixup] Stuff (NFC)
---
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td | 9 +++------
.../test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll | 12 ++++++------
2 files changed, 9 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 0573bd717caa5b..ee2948c9b9f106 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -717,11 +717,6 @@ let Predicates = [HasSVEorSME] in {
defm FDIV_ZPZZ : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
} // End HasSVEorSME
-let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
- defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famax_u>;
- defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famin_u>;
-}
-
let Predicates = [HasSVEorSME, UseExperimentalZeroingPseudos] in {
defm FADD_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
defm FSUB_ZPZZ : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fsub>;
@@ -4172,9 +4167,11 @@ defm FCVTNT_Z2Z_StoB : sve2_fp8_down_cvt_single<0b11, "fcvtnt", ZZ_s_mul_r>;
} // End HasSVE2orSME2, HasFP8
let Predicates = [HasSVE2orSME2, HasFAMINMAX] in {
-// FP8 Arithmetic - Predicated Group
defm FAMIN_ZPmZ : sve_fp_2op_p_zds<0b1111, "famin", "FAMIN_ZPZZ", int_aarch64_sve_famin, DestructiveBinaryComm>;
defm FAMAX_ZPmZ : sve_fp_2op_p_zds<0b1110, "famax", "FAMAX_ZPZZ", int_aarch64_sve_famax, DestructiveBinaryComm>;
+
+defm FAMAX_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famax_u>;
+defm FAMIN_ZPZZ : sve_fp_bin_pred_hfd<int_aarch64_sve_famin_u>;
} // End HasSVE2orSME2, HasFAMINMAX
let Predicates = [HasSSVE_FP8FMA] in {
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
index 0d247328a9475d..7d16f8383d9682 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-faminmax.ll
@@ -36,7 +36,7 @@ define <vscale x 8 x half> @famin_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x hal
; CHECK: // %bb.0:
; CHECK-NEXT: famin z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
- %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.famin.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
ret <vscale x 8 x half> %r
}
@@ -45,7 +45,7 @@ define <vscale x 4 x float> @famin_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x fl
; CHECK: // %bb.0:
; CHECK-NEXT: famin z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
- %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+ %r = call <vscale x 4 x float> @llvm.aarch64.sve.famin.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
ret <vscale x 4 x float> %r
}
@@ -54,7 +54,7 @@ define <vscale x 2 x double> @famin_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
; CHECK: // %bb.0:
; CHECK-NEXT: famin z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
- %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+ %r = call <vscale x 2 x double> @llvm.aarch64.sve.famin.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
ret <vscale x 2 x double> %r
}
@@ -90,7 +90,7 @@ define <vscale x 8 x half> @famax_u_f16(<vscale x 8 x i1> %pg, <vscale x 8 x hal
; CHECK: // %bb.0:
; CHECK-NEXT: famax z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
- %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
+ %r = call <vscale x 8 x half> @llvm.aarch64.sve.famax.u.nxv8f16(<vscale x 8 x i1> %pg, <vscale x 8 x half> %b, <vscale x 8 x half> %a)
ret <vscale x 8 x half> %r
}
@@ -99,7 +99,7 @@ define <vscale x 4 x float> @famax_u_f32(<vscale x 4 x i1> %pg, <vscale x 4 x fl
; CHECK: // %bb.0:
; CHECK-NEXT: famax z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
- %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+ %r = call <vscale x 4 x float> @llvm.aarch64.sve.famax.u.nxv4f32(<vscale x 4 x i1> %pg, <vscale x 4 x float> %b, <vscale x 4 x float> %a)
ret <vscale x 4 x float> %r
}
@@ -108,7 +108,7 @@ define <vscale x 2 x double> @famax_u_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
; CHECK: // %bb.0:
; CHECK-NEXT: famax z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
- %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+ %r = call <vscale x 2 x double> @llvm.aarch64.sve.famax.u.nxv2f64(<vscale x 2 x i1> %pg, <vscale x 2 x double> %b, <vscale x 2 x double> %a)
ret <vscale x 2 x double> %r
}
More information about the cfe-commits
mailing list