[llvm] 5ec34df - [AArch64][SVE] Implement several floating-point arithmetic intrinsics
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 1 03:55:43 PDT 2019
Author: Kerry McLaughlin
Date: 2019-11-01T10:40:36Z
New Revision: 5ec34dfdf733a17f277ba87e3d30e90083359390
URL: https://github.com/llvm/llvm-project/commit/5ec34dfdf733a17f277ba87e3d30e90083359390
DIFF: https://github.com/llvm/llvm-project/commit/5ec34dfdf733a17f277ba87e3d30e90083359390.diff
LOG: [AArch64][SVE] Implement several floating-point arithmetic intrinsics
Summary:
Adds intrinsics for the following:
- fabd, fadd, fsub & fsubr
- fmul, fmulx, fdiv & fdivr
- fmax, fmaxnm, fmin & fminnm
- fscale & ftsmul
Reviewers: huntergr, sdesmalen, dancgr
Reviewed By: sdesmalen
Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69657
Added:
llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index cfd37149e493..15030d0cd188 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -788,6 +788,19 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[llvm_anyvector_ty],
[IntrNoMem]>;
+ class AdvSIMD_SVE_SCALE_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMVectorOfBitcastsToInt<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_TSMUL_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMVectorOfBitcastsToInt<0>],
+ [IntrNoMem]>;
+
class AdvSIMD_SVE_DOT_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
@@ -877,6 +890,25 @@ def int_aarch64_sve_uxtb : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_uxth : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_uxtw : AdvSIMD_Merged1VectorArg_Intrinsic;
+//
+// Floating-point arithmetic
+//
+
+def int_aarch64_sve_fabd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fadd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fdiv : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fdivr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmax : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmaxnm : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmin : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fminnm : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmul : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fmulx : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fscale : AdvSIMD_SVE_SCALE_Intrinsic;
+def int_aarch64_sve_fsub : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_fsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_ftsmul_x : AdvSIMD_SVE_TSMUL_Intrinsic;
+
//
// Floating-point comparisons
//
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4c8d7023c6d5..8d8ec199f8a5 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -124,25 +124,25 @@ let Predicates = [HasSVE] in {
defm FMAX_ZPmI : sve_fp_2op_i_p_zds<0b110, "fmax", sve_fpimm_zero_one>;
defm FMIN_ZPmI : sve_fp_2op_i_p_zds<0b111, "fmin", sve_fpimm_zero_one>;
- defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd">;
- defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub">;
- defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul">;
- defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr">;
- defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm">;
- defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm">;
- defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax">;
- defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin">;
- defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd">;
- defm FSCALE_ZPmZ : sve_fp_2op_p_zds<0b1001, "fscale">;
- defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx">;
- defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr">;
- defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv">;
-
- defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
- defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
- defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>;
- defm FTSMUL_ZZZ : sve_fp_3op_u_zd<0b011, "ftsmul", null_frag>;
- defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
+ defm FADD_ZPmZ : sve_fp_2op_p_zds<0b0000, "fadd", int_aarch64_sve_fadd>;
+ defm FSUB_ZPmZ : sve_fp_2op_p_zds<0b0001, "fsub", int_aarch64_sve_fsub>;
+ defm FMUL_ZPmZ : sve_fp_2op_p_zds<0b0010, "fmul", int_aarch64_sve_fmul>;
+ defm FSUBR_ZPmZ : sve_fp_2op_p_zds<0b0011, "fsubr", int_aarch64_sve_fsubr>;
+ defm FMAXNM_ZPmZ : sve_fp_2op_p_zds<0b0100, "fmaxnm", int_aarch64_sve_fmaxnm>;
+ defm FMINNM_ZPmZ : sve_fp_2op_p_zds<0b0101, "fminnm", int_aarch64_sve_fminnm>;
+ defm FMAX_ZPmZ : sve_fp_2op_p_zds<0b0110, "fmax", int_aarch64_sve_fmax>;
+ defm FMIN_ZPmZ : sve_fp_2op_p_zds<0b0111, "fmin", int_aarch64_sve_fmin>;
+ defm FABD_ZPmZ : sve_fp_2op_p_zds<0b1000, "fabd", int_aarch64_sve_fabd>;
+ defm FSCALE_ZPmZ : sve_fp_2op_p_zds_fscale<0b1001, "fscale", int_aarch64_sve_fscale>;
+ defm FMULX_ZPmZ : sve_fp_2op_p_zds<0b1010, "fmulx", int_aarch64_sve_fmulx>;
+ defm FDIVR_ZPmZ : sve_fp_2op_p_zds<0b1100, "fdivr", int_aarch64_sve_fdivr>;
+ defm FDIV_ZPmZ : sve_fp_2op_p_zds<0b1101, "fdiv", int_aarch64_sve_fdiv>;
+
+ defm FADD_ZZZ : sve_fp_3op_u_zd<0b000, "fadd", fadd>;
+ defm FSUB_ZZZ : sve_fp_3op_u_zd<0b001, "fsub", fsub>;
+ defm FMUL_ZZZ : sve_fp_3op_u_zd<0b010, "fmul", fmul>;
+ defm FTSMUL_ZZZ : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
+ defm FRECPS_ZZZ : sve_fp_3op_u_zd<0b110, "frecps", int_aarch64_sve_frecps_x>;
defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
defm FTSSEL_ZZZ : sve_int_bin_cons_misc_0_b<"ftssel">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 583e1430e72c..8dc4efc7c885 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -1197,10 +1197,26 @@ class sve_fp_2op_p_zds<bits<2> sz, bits<4> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_fp_2op_p_zds<bits<4> opc, string asm> {
+multiclass sve_fp_2op_p_zds<bits<4> opc, string asm,
+ SDPatternOperator op> {
def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>;
def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>;
def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve_fp_2op_p_zds_fscale<bits<4> opc, string asm,
+ SDPatternOperator op> {
+ def _H : sve_fp_2op_p_zds<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_2op_p_zds<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_2op_p_zds<0b11, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve_fp_ftmad<bits<2> sz, string asm, ZPRRegOp zprty>
@@ -1235,13 +1251,11 @@ multiclass sve_fp_ftmad<string asm> {
// SVE Floating Point Arithmetic - Unpredicated Group
//===----------------------------------------------------------------------===//
-class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm,
- ZPRRegOp zprty,
- ValueType vt, ValueType vt2, SDPatternOperator op>
+class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
: I<(outs zprty:$Zd), (ins zprty:$Zn, zprty:$Zm),
asm, "\t$Zd, $Zn, $Zm",
"",
- [(set (vt zprty:$Zd), (op (vt zprty:$Zn), (vt2 zprty:$Zm)))]>, Sched<[]> {
+ []>, Sched<[]> {
bits<5> Zd;
bits<5> Zm;
bits<5> Zn;
@@ -1256,9 +1270,24 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm,
}
multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
- def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16, nxv8f16, nxv8f16, op>;
- def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32, nxv4f32, nxv4f32, op>;
- def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64, nxv2f64, nxv2f64, op>;
+ def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
+
+}
+
+multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
+ def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
+ def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
+ def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
new file mode 100644
index 000000000000..47345ad7f0c4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -0,0 +1,530 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; FABD
+;
+
+define <vscale x 8 x half> @fabd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fabd_h:
+; CHECK: fabd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fabd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fabd_s:
+; CHECK: fabd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fabd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fabd_d:
+; CHECK: fabd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FADD
+;
+
+define <vscale x 8 x half> @fadd_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fadd_h:
+; CHECK: fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fadd_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fadd_s:
+; CHECK: fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fadd_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fadd_d:
+; CHECK: fadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FDIV
+;
+
+define <vscale x 8 x half> @fdiv_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fdiv_h:
+; CHECK: fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fdiv_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fdiv_s:
+; CHECK: fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fdiv_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fdiv_d:
+; CHECK: fdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FDIVR
+;
+
+define <vscale x 8 x half> @fdivr_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fdivr_h:
+; CHECK: fdivr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fdivr.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fdivr_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fdivr_s:
+; CHECK: fdivr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fdivr_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fdivr_d:
+; CHECK: fdivr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMAX
+;
+
+define <vscale x 8 x half> @fmax_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fmax_h:
+; CHECK: fmax z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fmax_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmax_s:
+; CHECK: fmax z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmax_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmax_d:
+; CHECK: fmax z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMAXNM
+;
+
+define <vscale x 8 x half> @fmaxnm_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fmaxnm_h:
+; CHECK: fmaxnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fmaxnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmaxnm_s:
+; CHECK: fmaxnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmaxnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmaxnm_d:
+; CHECK: fmaxnm z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMIN
+;
+
+define <vscale x 8 x half> @fmin_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fmin_h:
+; CHECK: fmin z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fmin_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmin_s:
+; CHECK: fmin z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmin_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmin_d:
+; CHECK: fmin z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMINNM
+;
+
+define <vscale x 8 x half> @fminnm_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fminnm_h:
+; CHECK: fminnm z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fminnm_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fminnm_s:
+; CHECK: fminnm z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fminnm_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fminnm_d:
+; CHECK: fminnm z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMUL
+;
+
+define <vscale x 8 x half> @fmul_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fmul_h:
+; CHECK: fmul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fmul_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmul_s:
+; CHECK: fmul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmul_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmul_d:
+; CHECK: fmul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FMULX
+;
+
+define <vscale x 8 x half> @fmulx_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fmulx_h:
+; CHECK: fmulx z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fmulx_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmulx_s:
+; CHECK: fmulx z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fmulx_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmulx_d:
+; CHECK: fmulx z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FSCALE
+;
+
+define <vscale x 8 x half> @fscale_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: fscale_h:
+; CHECK: fscale z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fscale_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: fscale_s:
+; CHECK: fscale z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fscale_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: fscale_d:
+; CHECK: fscale z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FSUB
+;
+
+define <vscale x 8 x half> @fsub_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fsub_h:
+; CHECK: fsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fsub_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fsub_s:
+; CHECK: fsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fsub_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fsub_d:
+; CHECK: fsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FSUBR
+;
+
+define <vscale x 8 x half> @fsubr_h(<vscale x 8 x i1> %pg, <vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fsubr_h:
+; CHECK: fsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x half> %a,
+ <vscale x 8 x half> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @fsubr_s(<vscale x 4 x i1> %pg, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fsubr_s:
+; CHECK: fsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x float> %a,
+ <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @fsubr_d(<vscale x 2 x i1> %pg, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fsubr_d:
+; CHECK: fsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x double> %a,
+ <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; FTSMUL
+;
+
+define <vscale x 8 x half> @ftsmul_h(<vscale x 8 x half> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: ftsmul_h:
+; CHECK: ftsmul z0.h, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.ftsmul.x.nxv8f16(<vscale x 8 x half> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @ftsmul_s(<vscale x 4 x float> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: ftsmul_s:
+; CHECK: ftsmul z0.s, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.ftsmul.x.nxv4f32(<vscale x 4 x float> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @ftsmul_d(<vscale x 2 x double> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: ftsmul_d:
+; CHECK: ftsmul z0.d, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.ftsmul.x.nxv2f64(<vscale x 2 x double> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x double> %out
+}
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fabd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fabd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fabd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fdiv.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdiv.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fdiv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fdivr.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fdivr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fdivr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmax.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmax.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmax.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmaxnm.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmaxnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmaxnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmin.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmin.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmin.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fminnm.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fminnm.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fminnm.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fmulx.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fmulx.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fmulx.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fscale.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x i16>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fscale.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x i32>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fscale.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.fsubr.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.fsubr.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.fsubr.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 8 x half> @llvm.aarch64.sve.ftsmul.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i16>)
+declare <vscale x 4 x float> @llvm.aarch64.sve.ftsmul.x.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i32>)
+declare <vscale x 2 x double> @llvm.aarch64.sve.ftsmul.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i64>)
More information about the llvm-commits
mailing list